anson-huang commited on
Commit
a0327b4
β€’
1 Parent(s): 79f9e39
Files changed (4) hide show
  1. app.py +119 -111
  2. src/about.py +8 -5
  3. src/display/utils.py +10 -10
  4. src/leaderboard/read_evals.py +34 -34
app.py CHANGED
@@ -9,7 +9,7 @@ from src.about import (
9
  CITATION_BUTTON_TEXT,
10
  EVALUATION_QUEUE_TEXT,
11
  INTRODUCTION_TEXT,
12
- LLM_BENCHMARKS_TEXT,
13
  TITLE,
14
  )
15
  from src.display.css_html_js import custom_css
@@ -63,28 +63,28 @@ def init_leaderboard(dataframe):
63
  return Leaderboard(
64
  value=dataframe,
65
  datatype=[c.type for c in fields(AutoEvalColumn)],
66
- select_columns=SelectColumns(
67
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
- label="Select Columns to Display:",
70
- ),
71
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
- filter_columns=[
74
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
- ColumnFilter(
77
- AutoEvalColumn.params.name,
78
- type="slider",
79
- min=0.01,
80
- max=150,
81
- label="Select the number of parameters (B)",
82
- ),
83
- ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
- ),
86
- ],
87
- bool_checkboxgroup_label="Hide models",
88
  interactive=False,
89
  )
90
 
@@ -95,98 +95,106 @@ with demo:
95
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
 
97
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
- with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
  leaderboard = init_leaderboard(LEADERBOARD_DF)
100
 
101
- with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
102
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
103
-
104
- with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
105
- with gr.Column():
106
- with gr.Row():
107
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
108
-
109
- with gr.Column():
110
- with gr.Accordion(
111
- f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})",
112
- open=False,
113
- ):
114
- with gr.Row():
115
- finished_eval_table = gr.components.Dataframe(
116
- value=finished_eval_queue_df,
117
- headers=EVAL_COLS,
118
- datatype=EVAL_TYPES,
119
- row_count=5,
120
- )
121
- with gr.Accordion(
122
- f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})",
123
- open=False,
124
- ):
125
- with gr.Row():
126
- running_eval_table = gr.components.Dataframe(
127
- value=running_eval_queue_df,
128
- headers=EVAL_COLS,
129
- datatype=EVAL_TYPES,
130
- row_count=5,
131
- )
132
-
133
- with gr.Accordion(
134
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
135
- open=False,
136
- ):
137
- with gr.Row():
138
- pending_eval_table = gr.components.Dataframe(
139
- value=pending_eval_queue_df,
140
- headers=EVAL_COLS,
141
- datatype=EVAL_TYPES,
142
- row_count=5,
143
- )
144
- with gr.Row():
145
- gr.Markdown("# βœ‰οΈβœ¨ Submit your model here!", elem_classes="markdown-text")
146
-
147
  with gr.Row():
148
  with gr.Column():
149
- model_name_textbox = gr.Textbox(label="Model name")
150
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
151
- model_type = gr.Dropdown(
152
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
153
- label="Model type",
154
- multiselect=False,
155
- value=None,
156
- interactive=True,
157
- )
158
-
159
- with gr.Column():
160
- precision = gr.Dropdown(
161
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
162
- label="Precision",
163
- multiselect=False,
164
- value="float16",
165
- interactive=True,
166
- )
167
- weight_type = gr.Dropdown(
168
- choices=[i.value.name for i in WeightType],
169
- label="Weights type",
170
- multiselect=False,
171
- value="Original",
172
- interactive=True,
173
- )
174
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
175
-
176
- submit_button = gr.Button("Submit Eval")
177
- submission_result = gr.Markdown()
178
- submit_button.click(
179
- add_new_eval,
180
- [
181
- model_name_textbox,
182
- base_model_name_textbox,
183
- revision_name_textbox,
184
- precision,
185
- weight_type,
186
- model_type,
187
- ],
188
- submission_result,
189
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
  with gr.Row():
192
  with gr.Accordion("πŸ“™ Citation", open=False):
 
9
  CITATION_BUTTON_TEXT,
10
  EVALUATION_QUEUE_TEXT,
11
  INTRODUCTION_TEXT,
12
+ DETECTOR_BENCHMARKS_TEXT,
13
  TITLE,
14
  )
15
  from src.display.css_html_js import custom_css
 
63
  return Leaderboard(
64
  value=dataframe,
65
  datatype=[c.type for c in fields(AutoEvalColumn)],
66
+ # select_columns=SelectColumns(
67
+ # default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
+ # cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
+ # label="Select Columns to Display:",
70
+ # ),
71
+ search_columns=[AutoEvalColumn.model.name],
72
+ # hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
+ # filter_columns=[
74
+ # ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
+ # ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
+ # ColumnFilter(
77
+ # AutoEvalColumn.params.name,
78
+ # type="slider",
79
+ # min=0.01,
80
+ # max=150,
81
+ # label="Select the number of parameters (B)",
82
+ # ),
83
+ # ColumnFilter(
84
+ # AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
+ # ),
86
+ # ],
87
+ # bool_checkboxgroup_label="Hide models",
88
  interactive=False,
89
  )
90
 
 
95
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
 
97
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
+ with gr.TabItem("πŸ… Detector Leaderboard", elem_id="detector-benchmark-tab-table", id=0):
99
  leaderboard = init_leaderboard(LEADERBOARD_DF)
100
 
101
+ with gr.TabItem("πŸ” Detector Playground ", elem_id="detector-playground-tab-table", id=1):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  with gr.Row():
103
  with gr.Column():
104
+ # print(LEADERBOARD_DF.keys())
105
+ gr.Dropdown(LEADERBOARD_DF['Model'].tolist())
106
+ gr.Image()
107
+ gr.Button("Submit")
108
+
109
+ with gr.TabItem("πŸ“ About", elem_id="detector-benchmark-tab-table", id=2):
110
+ gr.Markdown(DETECTOR_BENCHMARKS_TEXT, elem_classes="markdown-text")
111
+
112
+ # with gr.TabItem("πŸš€ Submit here! ", elem_id="detector-benchmark-tab-table", id=3):
113
+ # with gr.Column():
114
+ # with gr.Row():
115
+ # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
116
+
117
+ # with gr.Column():
118
+ # with gr.Accordion(
119
+ # f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})",
120
+ # open=False,
121
+ # ):
122
+ # with gr.Row():
123
+ # finished_eval_table = gr.components.Dataframe(
124
+ # value=finished_eval_queue_df,
125
+ # headers=EVAL_COLS,
126
+ # datatype=EVAL_TYPES,
127
+ # row_count=5,
128
+ # )
129
+ # with gr.Accordion(
130
+ # f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})",
131
+ # open=False,
132
+ # ):
133
+ # with gr.Row():
134
+ # running_eval_table = gr.components.Dataframe(
135
+ # value=running_eval_queue_df,
136
+ # headers=EVAL_COLS,
137
+ # datatype=EVAL_TYPES,
138
+ # row_count=5,
139
+ # )
140
+
141
+ # with gr.Accordion(
142
+ # f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
143
+ # open=False,
144
+ # ):
145
+ # with gr.Row():
146
+ # pending_eval_table = gr.components.Dataframe(
147
+ # value=pending_eval_queue_df,
148
+ # headers=EVAL_COLS,
149
+ # datatype=EVAL_TYPES,
150
+ # row_count=5,
151
+ # )
152
+ # with gr.Row():
153
+ # gr.Markdown("# βœ‰οΈβœ¨ Submit your model here!", elem_classes="markdown-text")
154
+
155
+ # with gr.Row():
156
+ # with gr.Column():
157
+ # model_name_textbox = gr.Textbox(label="Model name")
158
+ # revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
159
+ # model_type = gr.Dropdown(
160
+ # choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
161
+ # label="Model type",
162
+ # multiselect=False,
163
+ # value=None,
164
+ # interactive=True,
165
+ # )
166
+
167
+ # with gr.Column():
168
+ # precision = gr.Dropdown(
169
+ # choices=[i.value.name for i in Precision if i != Precision.Unknown],
170
+ # label="Precision",
171
+ # multiselect=False,
172
+ # value="float16",
173
+ # interactive=True,
174
+ # )
175
+ # weight_type = gr.Dropdown(
176
+ # choices=[i.value.name for i in WeightType],
177
+ # label="Weights type",
178
+ # multiselect=False,
179
+ # value="Original",
180
+ # interactive=True,
181
+ # )
182
+ # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
183
+
184
+ # submit_button = gr.Button("Submit Eval")
185
+ # submission_result = gr.Markdown()
186
+ # submit_button.click(
187
+ # add_new_eval,
188
+ # [
189
+ # model_name_textbox,
190
+ # base_model_name_textbox,
191
+ # revision_name_textbox,
192
+ # precision,
193
+ # weight_type,
194
+ # model_type,
195
+ # ],
196
+ # submission_result,
197
+ # )
198
 
199
  with gr.Row():
200
  with gr.Accordion("πŸ“™ Citation", open=False):
src/about.py CHANGED
@@ -12,8 +12,11 @@ class Task:
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
- task0 = Task("anli_r1", "acc", "ANLI")
16
- task1 = Task("logiqa", "acc_norm", "LogiQA")
 
 
 
17
 
18
  NUM_FEWSHOT = 0 # Change with your few shot
19
  # ---------------------------------------------------
@@ -21,15 +24,15 @@ NUM_FEWSHOT = 0 # Change with your few shot
21
 
22
 
23
  # Your leaderboard name
24
- TITLE = """<h1 align="center" id="space-title">Demo leaderboard</h1>"""
25
 
26
  # What does your leaderboard evaluate?
27
  INTRODUCTION_TEXT = """
28
- Intro text
29
  """
30
 
31
  # Which evaluations are you running? how can people reproduce what you have?
32
- LLM_BENCHMARKS_TEXT = f"""
33
  ## How it works
34
 
35
  ## Reproducibility
 
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
+ task0 = Task("miragenews", "acc", "MiRAGeNews")
16
+ task1 = Task("genimage", "acc", "GenImage")
17
+ # task2 = Task("cnn_det", "acc_norm", "CNN Detection")
18
+ # task3 = Task("forgery_net", "acc_norm", "Forgery Net")
19
+ # task4 = Task("deepfake_det", "acc_norm", "Deepfake Detection")
20
 
21
  NUM_FEWSHOT = 0 # Change with your few shot
22
  # ---------------------------------------------------
 
24
 
25
 
26
  # Your leaderboard name
27
+ TITLE = """<h1 align="center" id="space-title"> πŸ•΅οΈ AI-Generated Image Detector Benchmark</h1>"""
28
 
29
  # What does your leaderboard evaluate?
30
  INTRODUCTION_TEXT = """
31
+ AI-Generated Image Detector Benchmark is a platform for evaluating the performance of existing detectors on various data sources and tasks. We collected X images from Y generators with 2 different tasks: Full Image Generation Detection and Partial Image Manipulation Detection.
32
  """
33
 
34
  # Which evaluations are you running? how can people reproduce what you have?
35
+ DETECTOR_BENCHMARKS_TEXT = f"""
36
  ## How it works
37
 
38
  ## Reproducibility
src/display/utils.py CHANGED
@@ -23,22 +23,22 @@ class ColumnContent:
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
26
- auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
  # Model information
33
- auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
34
- auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
35
- auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
36
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
37
- auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
38
- auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
39
- auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❀️", "number", False)])
40
- auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
41
- auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
42
 
43
  # We use make dataclass to dynamically fill the scores from Tasks
44
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
26
+ # auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
  # Model information
33
+ # auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
34
+ # auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
35
+ # auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
36
+ # auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
37
+ # auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
38
+ # auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
39
+ # auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❀️", "number", False)])
40
+ # auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
41
+ # auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
42
 
43
  # We use make dataclass to dynamically fill the scores from Tasks
44
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
src/leaderboard/read_evals.py CHANGED
@@ -20,17 +20,17 @@ class EvalResult:
20
  full_model: str # org/model (path on hub)
21
  org: str
22
  model: str
23
- revision: str # commit hash, "" if main
24
  results: dict
25
- precision: Precision = Precision.Unknown
26
- model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
27
- weight_type: WeightType = WeightType.Original # Original or Adapter
28
- architecture: str = "Unknown"
29
- license: str = "?"
30
- likes: int = 0
31
- num_params: int = 0
32
- date: str = "" # submission date of request file
33
- still_on_hub: bool = False
34
 
35
  @classmethod
36
  def init_from_json_file(self, json_filepath):
@@ -41,7 +41,7 @@ class EvalResult:
41
  config = data.get("config")
42
 
43
  # Precision
44
- precision = Precision.from_str(config.get("model_dtype"))
45
 
46
  # Get model and org
47
  org_and_model = config.get("model_name", config.get("model_args", None))
@@ -50,11 +50,11 @@ class EvalResult:
50
  if len(org_and_model) == 1:
51
  org = None
52
  model = org_and_model[0]
53
- result_key = f"{model}_{precision.value.name}"
54
  else:
55
  org = org_and_model[0]
56
  model = org_and_model[1]
57
- result_key = f"{org}_{model}_{precision.value.name}"
58
  full_model = "/".join(org_and_model)
59
 
60
  still_on_hub, _, model_config = is_model_on_hub(
@@ -85,15 +85,15 @@ class EvalResult:
85
  org=org,
86
  model=model,
87
  results=results,
88
- precision=precision,
89
- revision= config.get("model_sha", ""),
90
- still_on_hub=still_on_hub,
91
- architecture=architecture
92
  )
93
 
94
  def update_with_request_file(self, requests_path):
95
  """Finds the relevant request file for the current model and updates info with it"""
96
- request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
97
 
98
  try:
99
  with open(request_file, "r") as f:
@@ -112,27 +112,25 @@ class EvalResult:
112
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
113
  data_dict = {
114
  "eval_name": self.eval_name, # not a column, just a save name,
115
- AutoEvalColumn.precision.name: self.precision.value.name,
116
- AutoEvalColumn.model_type.name: self.model_type.value.name,
117
- AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
118
- AutoEvalColumn.weight_type.name: self.weight_type.value.name,
119
- AutoEvalColumn.architecture.name: self.architecture,
120
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
121
- AutoEvalColumn.revision.name: self.revision,
122
  AutoEvalColumn.average.name: average,
123
- AutoEvalColumn.license.name: self.license,
124
- AutoEvalColumn.likes.name: self.likes,
125
- AutoEvalColumn.params.name: self.num_params,
126
- AutoEvalColumn.still_on_hub.name: self.still_on_hub,
127
  }
128
-
129
  for task in Tasks:
130
  data_dict[task.value.col_name] = self.results[task.value.benchmark]
131
-
132
  return data_dict
133
 
134
 
135
- def get_request_file_for_model(requests_path, model_name, precision):
136
  """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
137
  request_files = os.path.join(
138
  requests_path,
@@ -148,7 +146,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
148
  req_content = json.load(f)
149
  if (
150
  req_content["status"] in ["FINISHED"]
151
- and req_content["precision"] == precision.split(".")[-1]
152
  ):
153
  request_file = tmp_request_file
154
  return request_file
@@ -176,7 +174,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
176
  for model_result_filepath in model_result_filepaths:
177
  # Creation of result
178
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
179
- eval_result.update_with_request_file(requests_path)
180
 
181
  # Store results of same eval together
182
  eval_name = eval_result.eval_name
@@ -187,10 +185,12 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
187
 
188
  results = []
189
  for v in eval_results.values():
 
190
  try:
191
  v.to_dict() # we test if the dict version is complete
192
  results.append(v)
193
  except KeyError: # not all eval values present
 
194
  continue
195
-
196
  return results
 
20
  full_model: str # org/model (path on hub)
21
  org: str
22
  model: str
23
+ # revision: str # commit hash, "" if main
24
  results: dict
25
+ # precision: Precision = Precision.Unknown
26
+ # model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
27
+ # weight_type: WeightType = WeightType.Original # Original or Adapter
28
+ # architecture: str = "Unknown"
29
+ # license: str = "?"
30
+ # likes: int = 0
31
+ # num_params: int = 0
32
+ # date: str = "" # submission date of request file
33
+ # still_on_hub: bool = False
34
 
35
  @classmethod
36
  def init_from_json_file(self, json_filepath):
 
41
  config = data.get("config")
42
 
43
  # Precision
44
+ # precision = Precision.from_str(config.get("model_dtype"))
45
 
46
  # Get model and org
47
  org_and_model = config.get("model_name", config.get("model_args", None))
 
50
  if len(org_and_model) == 1:
51
  org = None
52
  model = org_and_model[0]
53
+ result_key = f"{model}"
54
  else:
55
  org = org_and_model[0]
56
  model = org_and_model[1]
57
+ result_key = f"{org}_{model}"
58
  full_model = "/".join(org_and_model)
59
 
60
  still_on_hub, _, model_config = is_model_on_hub(
 
85
  org=org,
86
  model=model,
87
  results=results,
88
+ # precision=precision,
89
+ # revision= config.get("model_sha", ""),
90
+ # still_on_hub=still_on_hub,
91
+ # architecture=architecture
92
  )
93
 
94
  def update_with_request_file(self, requests_path):
95
  """Finds the relevant request file for the current model and updates info with it"""
96
+ request_file = get_request_file_for_model(requests_path, self.full_model)
97
 
98
  try:
99
  with open(request_file, "r") as f:
 
112
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
113
  data_dict = {
114
  "eval_name": self.eval_name, # not a column, just a save name,
115
+ # AutoEvalColumn.precision.name: self.precision.value.name,
116
+ # AutoEvalColumn.model_type.name: self.model_type.value.name,
117
+ # AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
118
+ # AutoEvalColumn.weight_type.name: self.weight_type.value.name,
119
+ # AutoEvalColumn.architecture.name: self.architecture,
120
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
121
+ # AutoEvalColumn.revision.name: self.revision,
122
  AutoEvalColumn.average.name: average,
123
+ # AutoEvalColumn.license.name: self.license,
124
+ # AutoEvalColumn.likes.name: self.likes,
125
+ # AutoEvalColumn.params.name: self.num_params,
126
+ # AutoEvalColumn.still_on_hub.name: self.still_on_hub,
127
  }
 
128
  for task in Tasks:
129
  data_dict[task.value.col_name] = self.results[task.value.benchmark]
 
130
  return data_dict
131
 
132
 
133
+ def get_request_file_for_model(requests_path, model_name):
134
  """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
135
  request_files = os.path.join(
136
  requests_path,
 
146
  req_content = json.load(f)
147
  if (
148
  req_content["status"] in ["FINISHED"]
149
+ # and req_content["precision"] == precision.split(".")[-1]
150
  ):
151
  request_file = tmp_request_file
152
  return request_file
 
174
  for model_result_filepath in model_result_filepaths:
175
  # Creation of result
176
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
177
+ # eval_result.update_with_request_file(requests_path)
178
 
179
  # Store results of same eval together
180
  eval_name = eval_result.eval_name
 
185
 
186
  results = []
187
  for v in eval_results.values():
188
+ # print(v)
189
  try:
190
  v.to_dict() # we test if the dict version is complete
191
  results.append(v)
192
  except KeyError: # not all eval values present
193
+ # print(v)
194
  continue
195
+ # print("RES", results)
196
  return results