AppleSwing
commited on
Commit
•
2a18e0a
1
Parent(s):
a549d9d
Add app debug mode and dynamic refresh tables
Browse files- app.py +289 -229
- src/envs.py +2 -2
- src/submission/submit.py +5 -1
app.py
CHANGED
@@ -3,10 +3,11 @@
|
|
3 |
import os
|
4 |
import datetime
|
5 |
import socket
|
|
|
6 |
|
7 |
import gradio as gr
|
8 |
import pandas as pd
|
9 |
-
|
10 |
from apscheduler.schedulers.background import BackgroundScheduler
|
11 |
|
12 |
from huggingface_hub import snapshot_download
|
@@ -38,11 +39,24 @@ from src.display.utils import (
|
|
38 |
Precision,
|
39 |
)
|
40 |
|
41 |
-
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC,
|
|
|
42 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
43 |
from src.submission.submit import add_new_eval
|
44 |
from src.utils import get_dataset_summary_table
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
def ui_snapshot_download(repo_id, local_dir, repo_type, tqdm_class, etag_timeout):
|
48 |
try:
|
@@ -76,11 +90,6 @@ def init_space():
|
|
76 |
)
|
77 |
return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
78 |
|
79 |
-
|
80 |
-
dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
|
81 |
-
leaderboard_df = original_df.copy()
|
82 |
-
|
83 |
-
|
84 |
# Searching and filtering
|
85 |
def update_table(
|
86 |
hidden_df: pd.DataFrame, columns: list, type_query: list, precision_query: list, size_query: list, query: str
|
@@ -143,123 +152,158 @@ def filter_models(df: pd.DataFrame, type_query: list, size_query: list, precisio
|
|
143 |
|
144 |
return filtered_df
|
145 |
|
|
|
|
|
|
|
146 |
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
return
|
|
|
|
|
|
|
|
|
|
|
151 |
|
|
|
|
|
|
|
152 |
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
157 |
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
with gr.Column():
|
162 |
-
with gr.Row():
|
163 |
-
search_bar = gr.Textbox(
|
164 |
-
placeholder=" 🔍 Model search (separate multiple queries with `;`)",
|
165 |
-
show_label=False,
|
166 |
-
elem_id="search-bar",
|
167 |
-
)
|
168 |
-
with gr.Row():
|
169 |
-
shown_columns = gr.CheckboxGroup(
|
170 |
-
choices=[
|
171 |
-
c.name
|
172 |
-
for c in fields(AutoEvalColumn)
|
173 |
-
if not c.hidden and not c.never_hidden and not c.dummy
|
174 |
-
],
|
175 |
-
value=[
|
176 |
-
c.name
|
177 |
-
for c in fields(AutoEvalColumn)
|
178 |
-
if c.displayed_by_default and not c.hidden and not c.never_hidden
|
179 |
-
],
|
180 |
-
label="Select columns to show",
|
181 |
-
elem_id="column-select",
|
182 |
-
interactive=True,
|
183 |
-
)
|
184 |
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
choices=[t.to_str() for t in InferenceFramework],
|
189 |
-
value=[t.to_str() for t in InferenceFramework],
|
190 |
-
interactive=True,
|
191 |
-
elem_id="filter-columns-size",
|
192 |
-
)
|
193 |
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
value=[t.to_str() for t in ModelType],
|
198 |
-
interactive=True,
|
199 |
-
elem_id="filter-columns-type",
|
200 |
-
)
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
|
|
|
|
|
|
|
|
209 |
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
update_table,
|
264 |
[
|
265 |
hidden_leaderboard_table_for_search,
|
@@ -270,124 +314,139 @@ with demo:
|
|
270 |
search_bar,
|
271 |
],
|
272 |
leaderboard_table,
|
273 |
-
queue=True,
|
274 |
)
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5
|
302 |
-
)
|
303 |
-
|
304 |
-
with gr.Accordion(f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
|
305 |
-
with gr.Row():
|
306 |
-
running_eval_table = gr.components.Dataframe(
|
307 |
-
value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5
|
308 |
-
)
|
309 |
-
|
310 |
-
with gr.Accordion(f"⏳ Scheduled Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
|
311 |
-
with gr.Row():
|
312 |
-
pending_eval_table = gr.components.Dataframe(
|
313 |
-
value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5
|
314 |
-
)
|
315 |
-
|
316 |
-
with gr.Row():
|
317 |
-
gr.Markdown("# Submit your model here", elem_classes="markdown-text")
|
318 |
-
|
319 |
-
with gr.Row():
|
320 |
-
inference_framework = gr.Dropdown(
|
321 |
-
choices=[t.to_str() for t in InferenceFramework],
|
322 |
-
label="Inference framework",
|
323 |
-
multiselect=False,
|
324 |
-
value=None,
|
325 |
-
interactive=True,
|
326 |
)
|
327 |
-
|
328 |
-
|
|
|
|
|
329 |
with gr.Column():
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
multiselect=False,
|
337 |
value=None,
|
338 |
interactive=True,
|
339 |
)
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
label="
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
387 |
scheduler = BackgroundScheduler()
|
388 |
|
389 |
-
scheduler.add_job(restart_space, "interval",
|
390 |
-
|
391 |
|
392 |
def launch_backend():
|
393 |
import subprocess
|
@@ -396,8 +455,9 @@ def launch_backend():
|
|
396 |
if DEVICE not in {"cpu"}:
|
397 |
_ = subprocess.run(["python", "backend-cli.py"])
|
398 |
|
399 |
-
|
400 |
# scheduler.add_job(launch_backend, "interval", seconds=120)
|
401 |
-
|
402 |
-
scheduler.start()
|
403 |
-
|
|
|
|
3 |
import os
|
4 |
import datetime
|
5 |
import socket
|
6 |
+
from threading import Thread
|
7 |
|
8 |
import gradio as gr
|
9 |
import pandas as pd
|
10 |
+
import time
|
11 |
from apscheduler.schedulers.background import BackgroundScheduler
|
12 |
|
13 |
from huggingface_hub import snapshot_download
|
|
|
39 |
Precision,
|
40 |
)
|
41 |
|
42 |
+
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, \
|
43 |
+
QUEUE_REPO, REPO_ID, RESULTS_REPO, DEBUG_QUEUE_REPO, DEBUG_RESULTS_REPO
|
44 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
45 |
from src.submission.submit import add_new_eval
|
46 |
from src.utils import get_dataset_summary_table
|
47 |
|
48 |
+
def get_args():
|
49 |
+
import argparse
|
50 |
+
|
51 |
+
parser = argparse.ArgumentParser(description="Run the LLM Leaderboard")
|
52 |
+
parser.add_argument("--debug", action="store_true", help="Run in debug mode")
|
53 |
+
return parser.parse_args()
|
54 |
+
|
55 |
+
args = get_args()
|
56 |
+
if args.debug:
|
57 |
+
print("Running in debug mode")
|
58 |
+
QUEUE_REPO = DEBUG_QUEUE_REPO
|
59 |
+
RESULTS_REPO = DEBUG_RESULTS_REPO
|
60 |
|
61 |
def ui_snapshot_download(repo_id, local_dir, repo_type, tqdm_class, etag_timeout):
|
62 |
try:
|
|
|
90 |
)
|
91 |
return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
92 |
|
|
|
|
|
|
|
|
|
|
|
93 |
# Searching and filtering
|
94 |
def update_table(
|
95 |
hidden_df: pd.DataFrame, columns: list, type_query: list, precision_query: list, size_query: list, query: str
|
|
|
152 |
|
153 |
return filtered_df
|
154 |
|
155 |
+
shown_columns = None
|
156 |
+
dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
|
157 |
+
leaderboard_df = original_df.copy()
|
158 |
|
159 |
+
def update_leaderboard_table():
|
160 |
+
global leaderboard_df, shown_columns
|
161 |
+
print("Updating leaderboard table")
|
162 |
+
return leaderboard_df[
|
163 |
+
[c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
164 |
+
+ shown_columns.value
|
165 |
+
+ [AutoEvalColumn.dummy.name]
|
166 |
+
] if not leaderboard_df.empty else leaderboard_df
|
167 |
+
|
168 |
|
169 |
+
def update_hidden_leaderboard_table():
|
170 |
+
global original_df
|
171 |
+
return original_df[COLS] if original_df.empty is False else original_df
|
172 |
|
173 |
+
def update_dataset_table():
|
174 |
+
global dataset_df
|
175 |
+
return dataset_df
|
|
|
176 |
|
177 |
+
def update_finish_table():
|
178 |
+
global finished_eval_queue_df
|
179 |
+
return finished_eval_queue_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
+
def update_running_table():
|
182 |
+
global running_eval_queue_df
|
183 |
+
return running_eval_queue_df
|
|
|
|
|
|
|
|
|
|
|
184 |
|
185 |
+
def update_pending_table():
|
186 |
+
global pending_eval_queue_df
|
187 |
+
return pending_eval_queue_df
|
|
|
|
|
|
|
|
|
188 |
|
189 |
+
def update_finish_num():
|
190 |
+
global finished_eval_queue_df
|
191 |
+
return len(finished_eval_queue_df)
|
192 |
+
|
193 |
+
def update_running_num():
|
194 |
+
global running_eval_queue_df
|
195 |
+
return len(running_eval_queue_df)
|
196 |
+
|
197 |
+
def update_pending_num():
|
198 |
+
global pending_eval_queue_df
|
199 |
+
return len(pending_eval_queue_df)
|
200 |
|
201 |
+
# triggered only once at startup => read query parameter if it exists
|
202 |
+
def load_query(request: gr.Request):
|
203 |
+
query = request.query_params.get("query") or ""
|
204 |
+
return query
|
205 |
+
|
206 |
+
def refresh_leaderboard():
|
207 |
+
return gr.update(value=update_leaderboard_table()), gr.update(value=update_hidden_leaderboard_table()), \
|
208 |
+
gr.update(value=update_dataset_table()), gr.update(value=update_finish_table()), \
|
209 |
+
gr.update(value=update_running_table()), gr.update(value=update_pending_table()), \
|
210 |
+
gr.update(value=update_finish_num()), gr.update(value=update_running_num()), gr.update(value=update_pending_num())
|
211 |
+
|
212 |
+
def periodic_init():
|
213 |
+
global dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df, leaderboard_df
|
214 |
+
while True:
|
215 |
+
time.sleep(60)
|
216 |
+
dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
|
217 |
+
leaderboard_df = original_df.copy()
|
218 |
+
|
219 |
+
def block_launch():
|
220 |
+
global dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df, leaderboard_df, shown_columns
|
221 |
+
demo = gr.Blocks(css=custom_css)
|
222 |
+
with demo:
|
223 |
+
gr.HTML(TITLE)
|
224 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
225 |
+
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
226 |
+
with gr.TabItem("MOE-LLM-GPU-Poor-Leaderboard Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
227 |
+
with gr.Row():
|
228 |
+
with gr.Column():
|
229 |
+
with gr.Row():
|
230 |
+
search_bar = gr.Textbox(
|
231 |
+
placeholder=" 🔍 Model search (separate multiple queries with `;`)",
|
232 |
+
show_label=False,
|
233 |
+
elem_id="search-bar",
|
234 |
+
)
|
235 |
+
with gr.Row():
|
236 |
+
shown_columns = gr.CheckboxGroup(
|
237 |
+
choices=[
|
238 |
+
c.name
|
239 |
+
for c in fields(AutoEvalColumn)
|
240 |
+
if not c.hidden and not c.never_hidden and not c.dummy
|
241 |
+
],
|
242 |
+
value=[
|
243 |
+
c.name
|
244 |
+
for c in fields(AutoEvalColumn)
|
245 |
+
if c.displayed_by_default and not c.hidden and not c.never_hidden
|
246 |
+
],
|
247 |
+
label="Select columns to show",
|
248 |
+
elem_id="column-select",
|
249 |
+
interactive=True,
|
250 |
+
)
|
251 |
+
with gr.Column(min_width=320):
|
252 |
+
filter_columns_size = gr.CheckboxGroup(
|
253 |
+
label="Inference frameworks",
|
254 |
+
choices=[t.to_str() for t in InferenceFramework],
|
255 |
+
value=[t.to_str() for t in InferenceFramework],
|
256 |
+
interactive=True,
|
257 |
+
elem_id="filter-columns-size",
|
258 |
+
)
|
259 |
+
filter_columns_type = gr.CheckboxGroup(
|
260 |
+
label="Model types",
|
261 |
+
choices=[t.to_str() for t in ModelType],
|
262 |
+
value=[t.to_str() for t in ModelType],
|
263 |
+
interactive=True,
|
264 |
+
elem_id="filter-columns-type",
|
265 |
+
)
|
266 |
+
filter_columns_precision = gr.CheckboxGroup(
|
267 |
+
label="Precision",
|
268 |
+
choices=[i.value.name for i in Precision],
|
269 |
+
value=[i.value.name for i in Precision],
|
270 |
+
interactive=True,
|
271 |
+
elem_id="filter-columns-precision",
|
272 |
+
)
|
273 |
+
# filter_columns_size = gr.CheckboxGroup(
|
274 |
+
# label="Model sizes (in billions of parameters)",
|
275 |
+
# choices=list(NUMERIC_INTERVALS.keys()),
|
276 |
+
# value=list(NUMERIC_INTERVALS.keys()),
|
277 |
+
# interactive=True,
|
278 |
+
# elem_id="filter-columns-size",
|
279 |
+
# )
|
280 |
+
# breakpoint()
|
281 |
+
refresh_button = gr.Button("Refresh", visible=True)
|
282 |
+
leaderboard_table = gr.components.Dataframe(
|
283 |
+
value=(
|
284 |
+
leaderboard_df[
|
285 |
+
[c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
286 |
+
+ shown_columns.value
|
287 |
+
+ [AutoEvalColumn.dummy.name]
|
288 |
+
]
|
289 |
+
if leaderboard_df.empty is False
|
290 |
+
else leaderboard_df
|
291 |
+
),
|
292 |
+
headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
|
293 |
+
datatype=TYPES,
|
294 |
+
elem_id="leaderboard-table",
|
295 |
+
interactive=False,
|
296 |
+
visible=True,
|
297 |
+
) # column_widths=["2%", "20%"]
|
298 |
+
# Dummy leaderboard for handling the case when the user uses backspace key
|
299 |
+
hidden_leaderboard_table_for_search = gr.components.Dataframe(
|
300 |
+
value=original_df[COLS] if original_df.empty is False else original_df,
|
301 |
+
headers=COLS,
|
302 |
+
datatype=TYPES,
|
303 |
+
visible=False,
|
304 |
+
)
|
305 |
+
# refresh_button.click(fn=update_leaderboard_tables, outputs=[leaderboard_table, hidden_leaderboard_table_for_search])
|
306 |
+
search_bar.submit(
|
307 |
update_table,
|
308 |
[
|
309 |
hidden_leaderboard_table_for_search,
|
|
|
314 |
search_bar,
|
315 |
],
|
316 |
leaderboard_table,
|
|
|
317 |
)
|
318 |
+
# Check query parameter once at startup and update search bar
|
319 |
+
demo.load(load_query, inputs=[], outputs=[search_bar])
|
320 |
+
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
|
321 |
+
selector.change(
|
322 |
+
update_table,
|
323 |
+
[
|
324 |
+
hidden_leaderboard_table_for_search,
|
325 |
+
shown_columns,
|
326 |
+
filter_columns_type,
|
327 |
+
filter_columns_precision,
|
328 |
+
filter_columns_size,
|
329 |
+
search_bar,
|
330 |
+
],
|
331 |
+
leaderboard_table,
|
332 |
+
queue=True,
|
333 |
+
)
|
334 |
+
with gr.TabItem("About", elem_id="llm-benchmark-tab-table", id=2):
|
335 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
336 |
+
dataset_table = gr.components.Dataframe(
|
337 |
+
value=dataset_df,
|
338 |
+
headers=list(dataset_df.columns),
|
339 |
+
datatype=["str", "markdown", "str", "str", "str"],
|
340 |
+
elem_id="dataset-table",
|
341 |
+
interactive=False,
|
342 |
+
visible=True,
|
343 |
+
column_widths=["15%", "20%"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
344 |
)
|
345 |
+
gr.Markdown(LLM_BENCHMARKS_DETAILS, elem_classes="markdown-text")
|
346 |
+
gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
|
347 |
+
# refresh_button.click(fn=update_dataset_table, outputs=[dataset_table])
|
348 |
+
with gr.TabItem("Submit a model ", elem_id="llm-benchmark-tab-table", id=3):
|
349 |
with gr.Column():
|
350 |
+
with gr.Row():
|
351 |
+
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
352 |
+
with gr.Column():
|
353 |
+
with gr.Accordion(f"✅ Finished Evaluations", open=False):
|
354 |
+
with gr.Column():
|
355 |
+
num_fin = gr.Number(len(finished_eval_queue_df), label="Number of finished evaluations", visible=True, interactive=False)
|
356 |
+
with gr.Row():
|
357 |
+
finished_eval_table = gr.components.Dataframe(
|
358 |
+
value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5
|
359 |
+
)
|
360 |
+
with gr.Accordion(f"🔄 Running Evaluation Queue", open=False):
|
361 |
+
with gr.Column():
|
362 |
+
num_run = gr.Number(len(running_eval_queue_df), label="Number of running evaluations", visible=True, interactive=False)
|
363 |
+
with gr.Row():
|
364 |
+
running_eval_table = gr.components.Dataframe(
|
365 |
+
value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5
|
366 |
+
)
|
367 |
+
with gr.Accordion(f"⏳ Scheduled Evaluation Queue", open=False):
|
368 |
+
with gr.Column():
|
369 |
+
num_sche = gr.Number(len(pending_eval_queue_df), label="Number of scheduled evaluations", visible=True, interactive=False)
|
370 |
+
with gr.Row():
|
371 |
+
pending_eval_table = gr.components.Dataframe(
|
372 |
+
value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5
|
373 |
+
)
|
374 |
+
# refresh_button.click(fn=update_submit_tables,
|
375 |
+
# outputs=[finished_eval_table, running_eval_table, pending_eval_table])
|
376 |
+
with gr.Row():
|
377 |
+
gr.Markdown("# Submit your model here", elem_classes="markdown-text")
|
378 |
+
with gr.Row():
|
379 |
+
inference_framework = gr.Dropdown(
|
380 |
+
choices=[t.to_str() for t in InferenceFramework],
|
381 |
+
label="Inference framework",
|
382 |
multiselect=False,
|
383 |
value=None,
|
384 |
interactive=True,
|
385 |
)
|
386 |
+
with gr.Row():
|
387 |
+
with gr.Column():
|
388 |
+
model_name_textbox = gr.Textbox(label="Model name")
|
389 |
+
revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
390 |
+
private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
|
391 |
+
model_type = gr.Dropdown(
|
392 |
+
choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
|
393 |
+
label="Model type",
|
394 |
+
multiselect=False,
|
395 |
+
value=None,
|
396 |
+
interactive=True,
|
397 |
+
)
|
398 |
+
with gr.Column():
|
399 |
+
precision = gr.Dropdown(
|
400 |
+
choices=[i.value.name for i in Precision if i != Precision.Unknown],
|
401 |
+
label="Precision",
|
402 |
+
multiselect=False,
|
403 |
+
value="float32",
|
404 |
+
interactive=True,
|
405 |
+
)
|
406 |
+
weight_type = gr.Dropdown(
|
407 |
+
choices=[i.value.name for i in WeightType],
|
408 |
+
label="Weights type",
|
409 |
+
multiselect=False,
|
410 |
+
value="Original",
|
411 |
+
interactive=True,
|
412 |
+
)
|
413 |
+
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
414 |
+
submit_button = gr.Button("Submit Eval")
|
415 |
+
submission_result = gr.Markdown()
|
416 |
+
debug = gr.Checkbox(args.debug, label="Debug", visible=False)
|
417 |
+
submit_button.click(
|
418 |
+
add_new_eval,
|
419 |
+
[
|
420 |
+
model_name_textbox,
|
421 |
+
base_model_name_textbox,
|
422 |
+
revision_name_textbox,
|
423 |
+
precision,
|
424 |
+
private,
|
425 |
+
weight_type,
|
426 |
+
model_type,
|
427 |
+
inference_framework,
|
428 |
+
debug
|
429 |
+
],
|
430 |
+
submission_result,
|
431 |
+
)
|
432 |
+
refresh_button.click(refresh_leaderboard,
|
433 |
+
outputs=[leaderboard_table, hidden_leaderboard_table_for_search, dataset_table,
|
434 |
+
finished_eval_table, running_eval_table, pending_eval_table, num_fin, num_run, num_sche])
|
435 |
+
|
436 |
+
with gr.Row():
|
437 |
+
with gr.Accordion("Citing this leaderboard", open=False):
|
438 |
+
citation_button = gr.Textbox(
|
439 |
+
value=CITATION_BUTTON_TEXT,
|
440 |
+
label=CITATION_BUTTON_LABEL,
|
441 |
+
lines=20,
|
442 |
+
elem_id="citation-button",
|
443 |
+
show_copy_button=True,
|
444 |
+
)
|
445 |
+
demo.queue(default_concurrency_limit=40).launch()
|
446 |
+
|
447 |
scheduler = BackgroundScheduler()
|
448 |
|
449 |
+
scheduler.add_job(restart_space, "interval", hours=6)
|
|
|
450 |
|
451 |
def launch_backend():
|
452 |
import subprocess
|
|
|
455 |
if DEVICE not in {"cpu"}:
|
456 |
_ = subprocess.run(["python", "backend-cli.py"])
|
457 |
|
458 |
+
Thread(target=periodic_init, daemon=True).start()
|
459 |
# scheduler.add_job(launch_backend, "interval", seconds=120)
|
460 |
+
if __name__ == "__main__":
|
461 |
+
scheduler.start()
|
462 |
+
block_launch()
|
463 |
+
|
src/envs.py
CHANGED
@@ -12,8 +12,8 @@ QUEUE_REPO = "sparse-generative-ai/requests"
|
|
12 |
QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
|
13 |
RESULTS_REPO = "sparse-generative-ai/results"
|
14 |
|
15 |
-
|
16 |
-
|
17 |
|
18 |
IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
|
19 |
|
|
|
12 |
QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
|
13 |
RESULTS_REPO = "sparse-generative-ai/results"
|
14 |
|
15 |
+
DEBUG_QUEUE_REPO = "sparse-generative-ai/debug_requests"
|
16 |
+
DEBUG_RESULTS_REPO = "sparse-generative-ai/debug_results"
|
17 |
|
18 |
IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
|
19 |
|
src/submission/submit.py
CHANGED
@@ -3,7 +3,7 @@ import os
|
|
3 |
from datetime import datetime, timezone
|
4 |
|
5 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
6 |
-
from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
|
7 |
from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
|
8 |
from src.submission.check_validity import (
|
9 |
already_submitted_models,
|
@@ -26,12 +26,16 @@ def add_new_eval(
|
|
26 |
weight_type: str,
|
27 |
model_type: str,
|
28 |
inference_framework: str,
|
|
|
29 |
):
|
30 |
global REQUESTED_MODELS
|
31 |
global USERS_TO_SUBMISSION_DATES
|
32 |
if not REQUESTED_MODELS:
|
33 |
REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
|
34 |
|
|
|
|
|
|
|
35 |
user_name = ""
|
36 |
model_path = model
|
37 |
if "/" in model:
|
|
|
3 |
from datetime import datetime, timezone
|
4 |
|
5 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
6 |
+
from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA, DEBUG_QUEUE_REPO
|
7 |
from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
|
8 |
from src.submission.check_validity import (
|
9 |
already_submitted_models,
|
|
|
26 |
weight_type: str,
|
27 |
model_type: str,
|
28 |
inference_framework: str,
|
29 |
+
debug: bool = False
|
30 |
):
|
31 |
global REQUESTED_MODELS
|
32 |
global USERS_TO_SUBMISSION_DATES
|
33 |
if not REQUESTED_MODELS:
|
34 |
REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
|
35 |
|
36 |
+
if debug:
|
37 |
+
QUEUE_REPO = DEBUG_QUEUE_REPO
|
38 |
+
|
39 |
user_name = ""
|
40 |
model_path = model
|
41 |
if "/" in model:
|