|
import json |
|
from datetime import datetime |
|
|
|
import gradio as gr |
|
|
|
import hf_data_loader |
|
|
|
|
|
def group_changes(changes): |
|
groups = {} |
|
for change in changes: |
|
group = datetime.fromisoformat(change['ts']) |
|
if group not in groups: |
|
groups[group] = [] |
|
groups[group].append(change) |
|
|
|
grouped_changes = [] |
|
for group in sorted(groups.keys()): |
|
groups[group].sort(key=lambda x: x['p']) |
|
grouped_changes.append(groups[group]) |
|
|
|
return grouped_changes |
|
|
|
|
|
def get_annotated_diff(initial_text, changes): |
|
grouped_changes = group_changes(changes) |
|
text = [(c, None) for c in initial_text] |
|
for change_group in grouped_changes: |
|
next_text = [] |
|
text_pointer = 0 |
|
real_text_ind = 0 |
|
change_pointer = 0 |
|
while text_pointer < len(text): |
|
if change_pointer >= len(change_group) or real_text_ind < change_group[change_pointer]['p']: |
|
next_text.append(text[text_pointer]) |
|
real_text_ind += 1 |
|
text_pointer += 1 |
|
elif change_group[change_pointer]['t'] == '+': |
|
if not (text[text_pointer][1] == '-' and text[text_pointer][0] == change_group[change_pointer]['c']): |
|
next_text.append((change_group[change_pointer]['c'], '+')) |
|
else: |
|
text_pointer += 1 |
|
|
|
real_text_ind += 1 |
|
change_pointer += 1 |
|
elif change_group[change_pointer]['t'] == '-': |
|
if not (text[text_pointer][1] == '+' and text[text_pointer][0] == change_group[change_pointer]['c']): |
|
next_text.append((text[text_pointer][0], '-')) |
|
text_pointer += 1 |
|
|
|
real_text_ind += 1 |
|
change_pointer += 1 |
|
else: |
|
raise RuntimeError("Unexpected branch") |
|
text = next_text |
|
return text |
|
|
|
|
|
def annotated_diff_for_row(row): |
|
start = row['commit_msg_start'] |
|
changes = json.loads(row['commit_msg_history']) |
|
return get_annotated_diff(start, changes) |
|
|
|
|
|
if __name__ == '__main__': |
|
df = hf_data_loader.load_raw_dataset_as_pandas() |
|
annotated = df.apply(annotated_diff_for_row, axis=1) |
|
with gr.Blocks(theme=gr.themes.Soft()) as application: |
|
gr.Highlightedtext(value=annotated[0], combine_adjacent=True, color_map={'+': "green", '-': "red"}) |
|
gr.Markdown(value=df.iloc[0]['commit_msg_start']) |
|
gr.Markdown(value=df.iloc[0]['commit_msg_end']) |
|
application.launch() |
|
|