import json from datetime import datetime import gradio as gr import hf_data_loader def group_changes(changes): groups = {} for change in changes: group = datetime.fromisoformat(change['ts']) if group not in groups: groups[group] = [] groups[group].append(change) grouped_changes = [] for group in sorted(groups.keys()): groups[group].sort(key=lambda x: x['p']) grouped_changes.append(groups[group]) return grouped_changes def get_annotated_diff(initial_text, changes): grouped_changes = group_changes(changes) text = [(c, None) for c in initial_text] for change_group in grouped_changes: next_text = [] text_pointer = 0 real_text_ind = 0 change_pointer = 0 while text_pointer < len(text): if change_pointer >= len(change_group) or real_text_ind < change_group[change_pointer]['p']: next_text.append(text[text_pointer]) real_text_ind += 1 text_pointer += 1 elif change_group[change_pointer]['t'] == '+': if not (text[text_pointer][1] == '-' and text[text_pointer][0] == change_group[change_pointer]['c']): next_text.append((change_group[change_pointer]['c'], '+')) else: text_pointer += 1 real_text_ind += 1 change_pointer += 1 elif change_group[change_pointer]['t'] == '-': if not (text[text_pointer][1] == '+' and text[text_pointer][0] == change_group[change_pointer]['c']): next_text.append((text[text_pointer][0], '-')) text_pointer += 1 real_text_ind += 1 change_pointer += 1 else: raise RuntimeError("Unexpected branch") text = next_text return text def annotated_diff_for_row(row): start = row['commit_msg_start'] changes = json.loads(row['commit_msg_history']) return get_annotated_diff(start, changes) if __name__ == '__main__': df = hf_data_loader.load_raw_dataset_as_pandas() annotated = df.apply(annotated_diff_for_row, axis=1) with gr.Blocks(theme=gr.themes.Soft()) as application: gr.Highlightedtext(value=annotated[0], combine_adjacent=True, color_map={'+': "green", '-': "red"}) gr.Markdown(value=df.iloc[0]['commit_msg_start']) gr.Markdown(value=df.iloc[0]['commit_msg_end']) application.launch()