File size: 6,983 Bytes
b66f230
 
 
 
 
 
 
 
 
23931c3
 
 
b66f230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23931c3
b66f230
23931c3
b66f230
 
23931c3
 
b66f230
 
 
 
 
 
 
 
 
 
 
 
23931c3
 
 
 
 
 
 
 
 
 
 
b66f230
23931c3
b66f230
23931c3
b66f230
23931c3
 
 
 
 
b66f230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23931c3
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import copy
import glob
import json
import os

import gradio as gr
import pandas as pd
from huggingface_hub import HfApi, snapshot_download

from compare_significance import check_significance, SUPPORTED_METRICS

VISIBLE_METRICS = SUPPORTED_METRICS + ["macro_f1"]

api = HfApi()

ORG = "CZLC"
REPO = f"{ORG}/LLM_benchmark_data"
HF_TOKEN = os.environ.get("HF_TOKEN")
TASKS_METADATA_PATH = "./tasks_metadata.json"


class LeaderboardServer:
    def __init__(self):
        self.server_address = REPO
        self.repo_type = "dataset"
        self.local_leaderboard = snapshot_download(self.server_address, repo_type=self.repo_type, token=HF_TOKEN,
                                                   local_dir="./")
        self.submisssion_id_to_file = {}  # Map submission ids to file paths
        self.tasks_metadata = json.load(open(TASKS_METADATA_PATH))['tasks']
        self.submission_ids = set()
        self.fetch_existing_models()
        self.tournament_results = self.load_tournament_results()
        self.pre_submit = None

    def update_leaderboard(self):
        self.local_leaderboard = snapshot_download(self.server_address, repo_type=self.repo_type, token=HF_TOKEN,
                                                   local_dir="./")
        self.fetch_existing_models()
        self.tournament_results = self.load_tournament_results()

    def load_tournament_results(self):
        metadata_rank_paths = os.path.join(self.local_leaderboard, "tournament.json")
        if not os.path.exists(metadata_rank_paths):
            return {}
        with open(metadata_rank_paths) as ranks_file:
            results = json.load(ranks_file)
        return results

    def fetch_existing_models(self):
        # Models data
        for submission in glob.glob(os.path.join(self.local_leaderboard, "data") + "/*.json"):
            data = json.load(open(submission))
            metadata = data.get('metadata')
            if metadata is None:
                continue
            submission_id = metadata["team_name"] + "_" + metadata["submission_id"]
            self.submission_ids.add(submission_id)

            self.submisssion_id_to_file[submission_id] = submission

    def get_leaderboard(self, tournament_results=None):
        results = tournament_results if tournament_results else self.tournament_results

        if len(results) == 0:
            return pd.DataFrame(columns=['No submissions yet'])
        else:
            processed_results = []
            for submission in results.keys():
                path = self.submisssion_id_to_file.get(submission)
                if path is None:
                    if self.pre_submit and submission == self.pre_submit[1]:
                        data = json.load(open(self.pre_submit[2]))
                    else:
                        raise gr.Error(f"Internal error: Submission [{submission}] not found")
                elif path:
                    data = json.load(open(path))
                else:
                    raise gr.Error(f"Submission [{submission}] not found")
                submission_id = data["metadata"]["team_name"] + "_" + data["metadata"]["submission_id"]

                local_results = {}
                for task in self.tasks_metadata.keys():
                    local_results[task] = 0
                    for model in results[submission].keys():
                        if results[submission][model][task]:
                            local_results[task] += 1
                    for metric in VISIBLE_METRICS:
                        metric_value = data['results'][task].get(metric)
                        if metric_value is not None:
                            local_results[task + "_" + metric] = metric_value

                local_results["submission_id"] = submission_id

                if self.pre_submit and submission == self.pre_submit[1]:
                    processed_results.insert(0, local_results)
                else:
                    processed_results.append(local_results)
            dataframe = pd.DataFrame.from_records(processed_results)
            df_order = (["submission_id"] + list(self.tasks_metadata.keys()) +
                        [col for col in dataframe.columns if
                         col != "submission_id" and col not in self.tasks_metadata.keys()])
            dataframe = dataframe[df_order]
            dataframe = dataframe.rename(columns={key: value["name"] for key, value in self.tasks_metadata.items()})
            return dataframe

    def start_tournament(self, new_model_id, new_model_file):
        new_tournament = copy.deepcopy(self.tournament_results)
        new_tournament[new_model_id] = {}
        new_tournament[new_model_id][new_model_id] = {task: False for task in self.tasks_metadata.keys()}

        for model in self.submission_ids:
            res = check_significance(new_model_file, self.submisssion_id_to_file[model])
            res_inverse = check_significance(self.submisssion_id_to_file[model], new_model_file)
            new_tournament[new_model_id][model] = {
                task: data["significant"] for task, data in res.items()
            }
            new_tournament[model][new_model_id] = {
                task: data["significant"] for task, data in res_inverse.items()
            }
        return new_tournament

    def prepare_model_for_submission(self, file, metadata) -> None:
        with open(file, "r") as f:
            data = json.load(f)
        data["metadata"] = metadata
        with open(file, "w") as f:
            json.dump(data, f)

        model_id = metadata["team_name"] + "_" + metadata["submission_id"]
        tournament_results = self.start_tournament(model_id, file)
        self.pre_submit = tournament_results, model_id, file

    def save_pre_submit(self):
        if self.pre_submit:
            tournament_results, model_id, file = self.pre_submit
            filename = os.path.basename(file)
            api.upload_file(
                path_or_fileobj=file,
                path_in_repo=f"data/{model_id}_{filename}",
                repo_id=self.server_address,
                repo_type=self.repo_type,
                token=HF_TOKEN,
            )

            # Temporary save tournament results
            tournament_results_path = os.path.join(self.local_leaderboard, "tournament.json")
            with open(tournament_results_path, "w") as f:
                json.dump(tournament_results, f)

            api.upload_file(
                path_or_fileobj=tournament_results_path,
                path_in_repo="tournament.json",
                repo_id=self.server_address,
                repo_type=self.repo_type,
                token=HF_TOKEN,
            )

    def get_model_detail(self, submission_id):
        path = self.submisssion_id_to_file.get(submission_id)
        if path is None:
            raise gr.Error(f"Submission [{submission_id}] not found")
        data = json.load(open(path))
        return data["metadata"]