commit-message-editing-visualization / generate_annotated_diffs.py
Petr Tsvetkov
WIP on annotated diffs generation
6503e4e
raw
history blame
2.52 kB
import json
from datetime import datetime
import gradio as gr
import hf_data_loader
def group_changes(changes):
groups = {}
for change in changes:
group = datetime.fromisoformat(change['ts'])
if group not in groups:
groups[group] = []
groups[group].append(change)
grouped_changes = []
for group in sorted(groups.keys()):
groups[group].sort(key=lambda x: x['p'])
grouped_changes.append(groups[group])
return grouped_changes
def get_annotated_diff(initial_text, changes):
grouped_changes = group_changes(changes)
text = [(c, None) for c in initial_text]
for change_group in grouped_changes:
next_text = []
text_pointer = 0
real_text_ind = 0
change_pointer = 0
while text_pointer < len(text):
if change_pointer >= len(change_group) or real_text_ind < change_group[change_pointer]['p']:
next_text.append(text[text_pointer])
real_text_ind += 1
text_pointer += 1
elif change_group[change_pointer]['t'] == '+':
if not (text[text_pointer][1] == '-' and text[text_pointer][0] == change_group[change_pointer]['c']):
next_text.append((change_group[change_pointer]['c'], '+'))
else:
text_pointer += 1
real_text_ind += 1
change_pointer += 1
elif change_group[change_pointer]['t'] == '-':
if not (text[text_pointer][1] == '+' and text[text_pointer][0] == change_group[change_pointer]['c']):
next_text.append((text[text_pointer][0], '-'))
text_pointer += 1
real_text_ind += 1
change_pointer += 1
else:
raise RuntimeError("Unexpected branch")
text = next_text
return text
def annotated_diff_for_row(row):
start = row['commit_msg_start']
changes = json.loads(row['commit_msg_history'])
return get_annotated_diff(start, changes)
if __name__ == '__main__':
df = hf_data_loader.load_raw_dataset_as_pandas()
annotated = df.apply(annotated_diff_for_row, axis=1)
with gr.Blocks(theme=gr.themes.Soft()) as application:
gr.Highlightedtext(value=annotated[0], combine_adjacent=True, color_map={'+': "green", '-': "red"})
gr.Markdown(value=df.iloc[0]['commit_msg_start'])
gr.Markdown(value=df.iloc[0]['commit_msg_end'])
application.launch()