BenCzechMark / server.py
Lakoc
v0.0.2 added model details, proper ranking and modal for confirmation
23931c3
raw
history blame
6.98 kB
import copy
import glob
import json
import os
import gradio as gr
import pandas as pd
from huggingface_hub import HfApi, snapshot_download
from compare_significance import check_significance, SUPPORTED_METRICS
VISIBLE_METRICS = SUPPORTED_METRICS + ["macro_f1"]
api = HfApi()
ORG = "CZLC"
REPO = f"{ORG}/LLM_benchmark_data"
HF_TOKEN = os.environ.get("HF_TOKEN")
TASKS_METADATA_PATH = "./tasks_metadata.json"
class LeaderboardServer:
def __init__(self):
self.server_address = REPO
self.repo_type = "dataset"
self.local_leaderboard = snapshot_download(self.server_address, repo_type=self.repo_type, token=HF_TOKEN,
local_dir="./")
self.submisssion_id_to_file = {} # Map submission ids to file paths
self.tasks_metadata = json.load(open(TASKS_METADATA_PATH))['tasks']
self.submission_ids = set()
self.fetch_existing_models()
self.tournament_results = self.load_tournament_results()
self.pre_submit = None
def update_leaderboard(self):
self.local_leaderboard = snapshot_download(self.server_address, repo_type=self.repo_type, token=HF_TOKEN,
local_dir="./")
self.fetch_existing_models()
self.tournament_results = self.load_tournament_results()
def load_tournament_results(self):
metadata_rank_paths = os.path.join(self.local_leaderboard, "tournament.json")
if not os.path.exists(metadata_rank_paths):
return {}
with open(metadata_rank_paths) as ranks_file:
results = json.load(ranks_file)
return results
def fetch_existing_models(self):
# Models data
for submission in glob.glob(os.path.join(self.local_leaderboard, "data") + "/*.json"):
data = json.load(open(submission))
metadata = data.get('metadata')
if metadata is None:
continue
submission_id = metadata["team_name"] + "_" + metadata["submission_id"]
self.submission_ids.add(submission_id)
self.submisssion_id_to_file[submission_id] = submission
def get_leaderboard(self, tournament_results=None):
results = tournament_results if tournament_results else self.tournament_results
if len(results) == 0:
return pd.DataFrame(columns=['No submissions yet'])
else:
processed_results = []
for submission in results.keys():
path = self.submisssion_id_to_file.get(submission)
if path is None:
if self.pre_submit and submission == self.pre_submit[1]:
data = json.load(open(self.pre_submit[2]))
else:
raise gr.Error(f"Internal error: Submission [{submission}] not found")
elif path:
data = json.load(open(path))
else:
raise gr.Error(f"Submission [{submission}] not found")
submission_id = data["metadata"]["team_name"] + "_" + data["metadata"]["submission_id"]
local_results = {}
for task in self.tasks_metadata.keys():
local_results[task] = 0
for model in results[submission].keys():
if results[submission][model][task]:
local_results[task] += 1
for metric in VISIBLE_METRICS:
metric_value = data['results'][task].get(metric)
if metric_value is not None:
local_results[task + "_" + metric] = metric_value
local_results["submission_id"] = submission_id
if self.pre_submit and submission == self.pre_submit[1]:
processed_results.insert(0, local_results)
else:
processed_results.append(local_results)
dataframe = pd.DataFrame.from_records(processed_results)
df_order = (["submission_id"] + list(self.tasks_metadata.keys()) +
[col for col in dataframe.columns if
col != "submission_id" and col not in self.tasks_metadata.keys()])
dataframe = dataframe[df_order]
dataframe = dataframe.rename(columns={key: value["name"] for key, value in self.tasks_metadata.items()})
return dataframe
def start_tournament(self, new_model_id, new_model_file):
new_tournament = copy.deepcopy(self.tournament_results)
new_tournament[new_model_id] = {}
new_tournament[new_model_id][new_model_id] = {task: False for task in self.tasks_metadata.keys()}
for model in self.submission_ids:
res = check_significance(new_model_file, self.submisssion_id_to_file[model])
res_inverse = check_significance(self.submisssion_id_to_file[model], new_model_file)
new_tournament[new_model_id][model] = {
task: data["significant"] for task, data in res.items()
}
new_tournament[model][new_model_id] = {
task: data["significant"] for task, data in res_inverse.items()
}
return new_tournament
def prepare_model_for_submission(self, file, metadata) -> None:
with open(file, "r") as f:
data = json.load(f)
data["metadata"] = metadata
with open(file, "w") as f:
json.dump(data, f)
model_id = metadata["team_name"] + "_" + metadata["submission_id"]
tournament_results = self.start_tournament(model_id, file)
self.pre_submit = tournament_results, model_id, file
def save_pre_submit(self):
if self.pre_submit:
tournament_results, model_id, file = self.pre_submit
filename = os.path.basename(file)
api.upload_file(
path_or_fileobj=file,
path_in_repo=f"data/{model_id}_{filename}",
repo_id=self.server_address,
repo_type=self.repo_type,
token=HF_TOKEN,
)
# Temporary save tournament results
tournament_results_path = os.path.join(self.local_leaderboard, "tournament.json")
with open(tournament_results_path, "w") as f:
json.dump(tournament_results, f)
api.upload_file(
path_or_fileobj=tournament_results_path,
path_in_repo="tournament.json",
repo_id=self.server_address,
repo_type=self.repo_type,
token=HF_TOKEN,
)
def get_model_detail(self, submission_id):
path = self.submisssion_id_to_file.get(submission_id)
if path is None:
raise gr.Error(f"Submission [{submission_id}] not found")
data = json.load(open(path))
return data["metadata"]