BenCzechMark / app.py
idolezal's picture
Added table with tournament results for selected model
8c3991f
raw
history blame
17.6 kB
import os
import regex as re
import gradio as gr
import pandas as pd
from gradio.themes.utils.sizes import text_md
from gradio_modal import Modal
from content import (
HEADER_MARKDOWN,
LEADERBOARD_TAB_TITLE_MARKDOWN,
SUBMISSION_TAB_TITLE_MARKDOWN,
MODAL_SUBMIT_MARKDOWN,
SUBMISSION_DETAILS_MARKDOWN,
RANKING_AFTER_SUBMISSION_MARKDOWN,
MORE_DETAILS_MARKDOWN,
)
from server import LeaderboardServer
leaderboard_server = LeaderboardServer()
SUBMISSION_INPUTS = dict.fromkeys((
"team_name",
"model_name",
"model_type",
"parameters",
"input_length",
"precision",
"description",
"link_to_model",
"submission_file",
)).keys()
def on_submit_pressed():
return gr.update(value='Processing submission...', interactive=False)
def validate_submission_inputs(**inputs):
if any(key for key, value in inputs.items() if key != "description" and value in (None, "")):
raise ValueError('Please fill in all fields (only the description field is optional)')
if not os.path.exists(inputs["submission_file"]):
raise ValueError('File does not exist')
if not (inputs["link_to_model"].startswith("http://") or inputs["link_to_model"].startswith("https://")):
raise ValueError('Link does not starts with "http://" or "https://"')
if not inputs["parameters"] > 0:
raise ValueError('Attribute `Parameters (B)` should be greater than zero')
if not (inputs["input_length"] > 0 and inputs["input_length"] == int(inputs["input_length"])):
raise ValueError('Attribute `Input length (# tokens)` should be greater than zero and integer type')
def process_submission(*inputs):
try:
inputs = dict(zip(SUBMISSION_INPUTS, inputs))
for key in inputs:
if key in ("team_name", "model_name"):
inputs[key] = re.sub(r"""\s+""", " ", inputs[key]).strip()
elif key in ("description", "link_to_model"):
inputs[key] = inputs[key].strip()
validate_submission_inputs(**inputs)
metadata = SUBMISSION_INPUTS - {"submission_file"}
metadata = {key: inputs[key] for key in metadata}
gr.Info('Submission valid, going to queue for the tournament...')
pre_submit = leaderboard_server.prepare_model_for_submission(inputs["submission_file"], metadata)
except ValueError as err:
gr.Warning(str(err))
return (
gr.update(value='Pre-submit model', visible=True, interactive=True),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
)
except Exception as err:
gr.Warning(str(err), duration=None)
return (
gr.update(value='Pre-submit model', visible=True, interactive=True),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
)
return (
gr.update(visible=False),
gr.update(visible=True),
gr.update(interactive=True, visible=True),
gr.update(interactive=True, visible=True),
gr.update(visible=True),
gr.update(
value=leaderboard_server.get_leaderboard(pre_submit),
visible=True,
datatype="markdown",
elem_classes="leaderboard-table",
),
)
def submit_results():
leaderboard_server.save_pre_submit()
leaderboard_server.update_leaderboard()
gr.Info('Submission successful!')
with leaderboard_server.var_lock.ro:
leaderboard = leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL)
submission_ids = leaderboard_server.submission_ids
return (
gr.update(value='Pre-submit model', visible=True, interactive=True),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.DataFrame(value=leaderboard, visible=True),
gr.update(visible=False),
gr.update(choices=submission_ids),
gr.update(value=leaderboard_server.TASKS_CATEGORY_OVERALL),
)
def erase_pre_submit():
with leaderboard_server.pre_submit_lock:
if leaderboard_server.pre_submit:
leaderboard_server.pre_submit = None # NOTE: Is it safe? How to confirm that `submission_id` is equal?
return (
gr.update(value='Pre-submit model', visible=True, interactive=True),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
)
def fetch_model_detail(submission_id):
metadata = leaderboard_server.get_model_detail(submission_id)
return (
gr.update(value=metadata['description'], visible=True),
gr.update(value=metadata['link_to_model'], visible=True)
)
def fetch_model_tournament_results_table(submission_id, category):
return gr.update(
value=leaderboard_server.get_model_tournament_table(submission_id, category),
visible=True,
)
def change_leaderboard_category(category):
if category == leaderboard_server.TASKS_CATEGORY_OVERALL:
task_abbreviation_legend = gr.update(
visible=False,
)
tournament_results_dropdown = gr.update(
visible=False,
)
model_tournament_results_table = gr.update(
visible=False,
)
else:
task_abbreviation_legend_body = []
abbreviation2name = leaderboard_server.CATEGORY_TO_TASK_ABBREVIATION_TO_NAME[category]
for abbr, name in abbreviation2name.items():
task_abbreviation_legend_body.append([abbr, name])
task_abbreviation_legend = gr.update(
value=task_abbreviation_legend_body,
visible=True,
)
tournament_results_dropdown = gr.update(
visible=True,
)
model_tournament_results_table = gr.update()
return (
gr.update(
value=leaderboard_server.get_leaderboard(category=category),
visible=True,
datatype="markdown",
),
task_abbreviation_legend,
tournament_results_dropdown,
model_tournament_results_table,
)
def show_modal():
return gr.update(visible=True)
def hide_modal():
return gr.update(visible=False)
def on_application_load():
leaderboard_server.update_leaderboard()
with leaderboard_server.var_lock.ro:
return (
gr.DataFrame(value=leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL), visible=True),
gr.update(choices=leaderboard_server.submission_ids),
gr.update(value=leaderboard_server.TASKS_CATEGORY_OVERALL),
)
custom_css = """
footer {visibility: hidden}
.leaderboard-table tr:first-child th {
background-color: var(--table-even-background-fill);
}
.leaderboard-table th:first-child, .leaderboard-table td:first-child {
position: sticky;
left: 0;
z-index: 1;
background-color: inherit;
}
.leaderboard-table td:first-child p {
margin: 0px;
}
.leaderboard-table th:nth-child(2), .leaderboard-table td:nth-child(2) {
position: sticky;
left: var(--cell-width-0);
z-index: 1;
background-color: inherit;
}
.leaderboard-table th:nth-child(3), .leaderboard-table td:nth-child(3) {
position: sticky;
left: calc(var(--cell-width-0) + var(--cell-width-1));
z-index: 1;
background-color: inherit;
}
.leaderboard-table th:nth-child(4), .leaderboard-table td:nth-child(4) {
position: sticky;
left: calc(var(--cell-width-0) + var(--cell-width-1) + var(--cell-width-2));
z-index: 1;
background-color: inherit;
}
.leaderboard-table th:nth-child(5), .leaderboard-table td:nth-child(5) {
position: sticky;
left: calc(var(--cell-width-0) + var(--cell-width-1) + var(--cell-width-2) + var(--cell-width-3));
z-index: 1;
background-color: inherit;
}
.leaderboard-table td:nth-child(5) p {
font-weight: bolder;
}
.leaderboard-table th:nth-child(5)::after, .leaderboard-table td:nth-child(5)::after {
box-shadow: inset 5px 0px 4px -4px var(--border-color-primary);
position: absolute;
top: 0;
right: 0;
bottom: -1px;
content: "";
width: 30px;
transform: translateX(100%);
}
"""
with gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css=custom_css) as main:
gr.Markdown(HEADER_MARKDOWN)
with gr.Tabs():
with gr.TabItem('Leaderboard'):
with gr.Column():
gr.Markdown(LEADERBOARD_TAB_TITLE_MARKDOWN)
with gr.Row():
category_of_tasks = gr.Dropdown(
choices=[leaderboard_server.TASKS_CATEGORY_OVERALL] + sorted(leaderboard_server.TASKS_CATEGORIES),
value=leaderboard_server.TASKS_CATEGORY_OVERALL,
label="Category of benchmarks",
interactive=True,
)
with gr.Row():
results_table = gr.DataFrame(
leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL),
interactive=False,
label=None,
visible=True,
datatype="markdown",
elem_classes="leaderboard-table",
)
with gr.Row():
results_table_legend = gr.DataFrame(
value=None,
headers=[
"task abbr.", # "task abbreviation"
"task name",
],
column_widths=["150px"],
datatype="str",
label=None,
visible=False,
interactive=False,
elem_classes="leaderboard-table-legend",
)
with gr.Row():
gr.Markdown("## Tournament results for selected model")
with leaderboard_server.var_lock.ro:
tournament_results_dropdown = gr.Dropdown(
value=None,
choices=leaderboard_server.submission_ids, # TODO: team_name/model_name
label="Select model",
visible=False,
interactive=True,
)
model_tournament_results_table = gr.DataFrame(
value=None,
datatype="markdown",
label="The model won against…",
visible=False,
interactive=False,
elem_classes="leaderboard-table-model-details",
)
category_of_tasks.change(
fn=change_leaderboard_category,
inputs=category_of_tasks,
outputs=[
results_table,
results_table_legend,
tournament_results_dropdown,
model_tournament_results_table,
],
)
tournament_results_dropdown.change(
fn=fetch_model_tournament_results_table,
inputs=[
tournament_results_dropdown,
category_of_tasks,
],
outputs=model_tournament_results_table,
)
with gr.TabItem('Model details'):
gr.Markdown(MORE_DETAILS_MARKDOWN)
with leaderboard_server.var_lock.ro:
detail_dropdown = gr.Dropdown(
choices=leaderboard_server.submission_ids, # TODO: team_name/model_name
label="Select model",
interactive=True,
)
with gr.Row():
model_description = gr.Text(value='', label='Model description', visible=False, interactive=False)
model_url = gr.Text(value='', label='Model url', visible=False, interactive=False)
detail_dropdown.change(
fn=fetch_model_detail,
inputs=[detail_dropdown],
outputs=[model_description, model_url],
)
with gr.TabItem('Submission'):
with gr.Column():
gr.Markdown(SUBMISSION_TAB_TITLE_MARKDOWN)
submission_inputs = dict.fromkeys(SUBMISSION_INPUTS)
with gr.Row():
submission_inputs["team_name"] = gr.Textbox(label='Team name', type='text')
submission_inputs["model_name"] = gr.Textbox(label='Model name', type='text')
submission_inputs["model_type"] = gr.Dropdown(
label="Model type",
choices=("chat", "pretrained", "ensemble"),
)
submission_inputs["parameters"] = gr.Number(
label='Parameters (B)',
value=0.01,
step=0.01,
)
with gr.Row():
submission_inputs["input_length"] = gr.Number(
label='Input length (# tokens)',
value=0,
step=1,
)
submission_inputs["precision"] = gr.Dropdown(
label="Precision",
choices=("float32", "bfloat32", "float16", "bfloat16", "8bit", "4bit"),
)
submission_inputs["description"] = gr.Textbox(label='Description', type='text')
submission_inputs["link_to_model"] = gr.Textbox(label='Link to model', type='text')
submission_inputs["submission_file"] = gr.File(label='Upload your results', type='filepath')
pre_submission_btn = gr.Button(value='Pre-submit model', interactive=True)
submit_prompt = gr.Markdown(
SUBMISSION_DETAILS_MARKDOWN,
visible=False
)
pre_submit_info = gr.Markdown(
RANKING_AFTER_SUBMISSION_MARKDOWN,
visible=False
)
pre_submit_table = gr.DataFrame(pd.DataFrame(), interactive=False, label=None, visible=False)
submission_btn_yes = gr.Button(value='Submit model', interactive=False, visible=False)
submission_btn_no = gr.Button(value='Reverse process', interactive=False, visible=False)
with Modal(visible=False) as modal_submit:
gr.Markdown(MODAL_SUBMIT_MARKDOWN)
modal_submit_yes = gr.Button("Yes", interactive=True)
modal_submit_no = gr.Button("No", interactive=True)
pre_submission_btn.click(
fn=on_submit_pressed,
outputs=[pre_submission_btn],
).then( # TODO: Zjistit proč to neběží konkurentně.
fn=process_submission,
inputs=list(submission_inputs.values()),
outputs=[
pre_submission_btn,
submit_prompt,
submission_btn_yes,
submission_btn_no,
pre_submit_info,
pre_submit_table,
],
)
submission_btn_yes.click(
fn=show_modal,
outputs=[modal_submit]
)
modal_submit_yes.click(
fn=submit_results,
outputs=[
pre_submission_btn,
submission_btn_yes,
submission_btn_no,
submit_prompt,
pre_submit_info,
pre_submit_table,
results_table,
modal_submit,
detail_dropdown,
category_of_tasks,
],
)
modal_submit_no.click(
fn=hide_modal,
outputs=[modal_submit]
)
submission_btn_no.click(
fn=erase_pre_submit,
outputs=[
pre_submission_btn,
submission_btn_yes,
submission_btn_no,
submit_prompt,
pre_submit_info,
pre_submit_table,
],
)
main.load(
on_application_load,
inputs=None,
outputs=[
results_table,
detail_dropdown,
category_of_tasks,
]
)
main.launch()