Petr Tsvetkov
commited on
Commit
β’
c151bb0
1
Parent(s):
0b259d2
Fix the statistics in visualization
Browse files- change_visualizer.py +2 -2
- statistics.py +3 -11
change_visualizer.py
CHANGED
@@ -9,8 +9,8 @@ n_diffs_manual = len(df_manual)
|
|
9 |
df_synthetic = generate_annotated_diffs.synthetic_data_with_annotated_diffs()
|
10 |
n_diffs_synthetic = len(df_synthetic)
|
11 |
|
12 |
-
STATISTICS = {"manual": statistics.
|
13 |
-
"synthetic": statistics.
|
14 |
|
15 |
|
16 |
def update_dataset_view(diff_idx):
|
|
|
9 |
df_synthetic = generate_annotated_diffs.synthetic_data_with_annotated_diffs()
|
10 |
n_diffs_synthetic = len(df_synthetic)
|
11 |
|
12 |
+
STATISTICS = {"manual": statistics.get_statistics_for_df(df_manual),
|
13 |
+
"synthetic": statistics.get_statistics_for_df(df_synthetic)}
|
14 |
|
15 |
|
16 |
def update_dataset_view(diff_idx):
|
statistics.py
CHANGED
@@ -22,18 +22,10 @@ def get_statistics(start_msg, end_msg, annotated_msg):
|
|
22 |
}
|
23 |
|
24 |
|
25 |
-
def get_statistics_for_df(df: pd.DataFrame
|
26 |
-
stats = [get_statistics(row[
|
|
|
27 |
|
28 |
assert len(stats) > 0
|
29 |
|
30 |
return {stat_name: np.asarray([e[stat_name] for e in stats]) for stat_name in stats[0]}
|
31 |
-
|
32 |
-
|
33 |
-
def get_statistics_for_manual_df(df):
|
34 |
-
return get_statistics_for_df(df, start_col="commit_msg_start", end_col='commit_msg_end',
|
35 |
-
annotated_col='annotated_diff')
|
36 |
-
|
37 |
-
|
38 |
-
def get_statistics_for_synthetic_df(df):
|
39 |
-
return get_statistics_for_df(df, start_col="initial_msg_pred", end_col='reference', annotated_col='annotated_diff')
|
|
|
22 |
}
|
23 |
|
24 |
|
25 |
+
def get_statistics_for_df(df: pd.DataFrame):
|
26 |
+
stats = [get_statistics(row["commit_msg_start"], row["commit_msg_end"], row["annotated_diff"]) for _, row in
|
27 |
+
df.iterrows()]
|
28 |
|
29 |
assert len(stats) > 0
|
30 |
|
31 |
return {stat_name: np.asarray([e[stat_name] for e in stats]) for stat_name in stats[0]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|