Sasha commited on
Commit
d8eab79
1 Parent(s): 4474a2c

adding some fixes (paw still isn't working though)

Browse files
Files changed (1) hide show
  1. app.py +23 -12
app.py CHANGED
@@ -9,7 +9,7 @@ import numpy as np
9
  import statistics
10
 
11
  st.set_page_config(
12
- page_title="Evaluation Buddy",
13
  page_icon="./robot.png",
14
  layout="wide",
15
  )
@@ -25,7 +25,7 @@ top_datasets= ['glue', 'super_glue', 'wikitext', 'imdb', 'squad', 'squad_es', \
25
 
26
  tasks= ['text classification', 'question answering', 'automatic speech recognition', 'natural language inference', \
27
  'machine translation', 'sentiment analysis', 'text simplification', 'named entity recognition', \
28
- 'reading comprehension']
29
  metrics= ['matthews_correlation', 'perplexity', 'meteor', 'code_eval', 'super_glue', 'rouge', 'mauve', 'cer', 'accuracy', 'recall', 'bleurt', 'sari', 'precision', 'mean_iou', 'squad', 'mahalanobis', 'chrf', 'mae', 'squad_v2', 'seqeval', 'cuad', 'wiki_split', 'google_bleu', 'competition_math', 'pearsonr', 'xtreme_s', 'comet', 'gleu', 'spearmanr', 'f1', 'frugalscore', 'bertscore', 'indic_glue', 'mse', 'xnli', 'ter', 'coval', 'wer', 'bleu', 'glue', 'sacrebleu']
30
 
31
  with st.sidebar.expander("Datasets", expanded=True):
@@ -50,22 +50,26 @@ st.markdown("## Here is some information about your dataset:")
50
  st.markdown("### Description")
51
 
52
  st.markdown(dataset_builder.info.description)
 
 
 
53
  st.markdown("For more information about this dataset, check out [its website](https://huggingface.co/datasets/"+dataset_name+")")
54
 
55
 
56
  st.markdown("### Dataset-Specific Metrics")
57
  if dataset_name in metrics:
58
- st.markdown("Great news! Your dataset has a dedicated metric for it! You can use it like this:")
59
  code = ''' from datasets import load_metric
60
  metric = load_metric('''+dataset_name+''', '''+dataset_config+''')'''
61
  st.code(code, language='python')
62
  dedicated_metric = True
63
  else:
64
- st.markdown("Your dataset doesn't have a dedicated metric, but that's ok!")
65
  dedicated_metric = False
66
 
67
  st.markdown("### Task-Specific Metrics")
68
 
 
69
  try:
70
  task = dataset_builder.info.task_templates[0].task
71
  except:
@@ -73,14 +77,20 @@ except:
73
  if t in str(dataset_builder.info.description).lower():
74
  task = t
75
  else:
76
- task = None
77
 
78
  if task is not None:
79
  st.markdown("The task associated to it your dataset is: " + task.replace('-',' '))
80
  if task == 'automatic-speech-recognition':
81
  st.markdown('Automatic Speech Recognition has some dedicated metrics such as:')
82
  st.markdown('[Word Error Rate](https://huggingface.co/metrics/wer)')
 
 
 
83
  st.markdown('[Character Error Rate](https://huggingface.co/metrics/cer)')
 
 
 
84
  else:
85
  st.markdown("The task for your dataset doesn't have any dedicated metrics, but you can still use general ones!")
86
 
@@ -129,10 +139,11 @@ try:
129
  st.code(accuracy_code, language='python')
130
  st.markdown('Since it takes into account both precision and recall, which works well to evaluate model performance on minority classes.')
131
  except:
132
- st.markdown("### Unsupervised Metrics")
133
- st.markdown("Since dataset doesn't have any labels, so the metrics that you can use for evaluation are:")
134
- st.markdown('[Perplexity](https://huggingface.co/metrics/perplexity)')
135
- perplexity_code = '''from datasets import load_metric
136
- metric = load_metric("perplexity")'''
137
- st.code(perplexity_code, language='python')
138
- st.markdown('If you choose a model that was trained on **' + dataset_name + '** and use it to compute perplexity on text generated by your model, this can help determine how similar the two are.')
 
 
9
  import statistics
10
 
11
  st.set_page_config(
12
+ page_title="HuggingFace Evaluation Buddy",
13
  page_icon="./robot.png",
14
  layout="wide",
15
  )
 
25
 
26
  tasks= ['text classification', 'question answering', 'automatic speech recognition', 'natural language inference', \
27
  'machine translation', 'sentiment analysis', 'text simplification', 'named entity recognition', \
28
+ 'reading comprehension', 'paraphrase identification', 'natural language understanding']
29
  metrics= ['matthews_correlation', 'perplexity', 'meteor', 'code_eval', 'super_glue', 'rouge', 'mauve', 'cer', 'accuracy', 'recall', 'bleurt', 'sari', 'precision', 'mean_iou', 'squad', 'mahalanobis', 'chrf', 'mae', 'squad_v2', 'seqeval', 'cuad', 'wiki_split', 'google_bleu', 'competition_math', 'pearsonr', 'xtreme_s', 'comet', 'gleu', 'spearmanr', 'f1', 'frugalscore', 'bertscore', 'indic_glue', 'mse', 'xnli', 'ter', 'coval', 'wer', 'bleu', 'glue', 'sacrebleu']
30
 
31
  with st.sidebar.expander("Datasets", expanded=True):
 
50
  st.markdown("### Description")
51
 
52
  st.markdown(dataset_builder.info.description)
53
+
54
+ if len(dataset_builder.info.description) == 1:
55
+ st.markdown("This dataset does not have a description. :no_mouth:")
56
  st.markdown("For more information about this dataset, check out [its website](https://huggingface.co/datasets/"+dataset_name+")")
57
 
58
 
59
  st.markdown("### Dataset-Specific Metrics")
60
  if dataset_name in metrics:
61
+ st.markdown("Great news! Your dataset has a dedicated metric for it! You can use it like this: :point_down:")
62
  code = ''' from datasets import load_metric
63
  metric = load_metric('''+dataset_name+''', '''+dataset_config+''')'''
64
  st.code(code, language='python')
65
  dedicated_metric = True
66
  else:
67
+ st.markdown("Your dataset doesn't have a dedicated metric, but that's ok! :wink:")
68
  dedicated_metric = False
69
 
70
  st.markdown("### Task-Specific Metrics")
71
 
72
+ task = None
73
  try:
74
  task = dataset_builder.info.task_templates[0].task
75
  except:
 
77
  if t in str(dataset_builder.info.description).lower():
78
  task = t
79
  else:
80
+ continue
81
 
82
  if task is not None:
83
  st.markdown("The task associated to it your dataset is: " + task.replace('-',' '))
84
  if task == 'automatic-speech-recognition':
85
  st.markdown('Automatic Speech Recognition has some dedicated metrics such as:')
86
  st.markdown('[Word Error Rate](https://huggingface.co/metrics/wer)')
87
+ wer_code = '''from datasets import load_metric
88
+ metric = load_metric("wer")'''
89
+ st.code(wer_code, language='python')
90
  st.markdown('[Character Error Rate](https://huggingface.co/metrics/cer)')
91
+ cer_code = '''from datasets import load_metric
92
+ metric = load_metric("cer")'''
93
+ st.code(cer_code, language='python')
94
  else:
95
  st.markdown("The task for your dataset doesn't have any dedicated metrics, but you can still use general ones!")
96
 
 
139
  st.code(accuracy_code, language='python')
140
  st.markdown('Since it takes into account both precision and recall, which works well to evaluate model performance on minority classes.')
141
  except:
142
+ if task != 'automatic-speech-recognition':
143
+ st.markdown("### Unsupervised Metrics")
144
+ st.markdown("Since dataset doesn't have any labels, so the metrics that you can use for evaluation are:")
145
+ st.markdown('[Perplexity](https://huggingface.co/metrics/perplexity)')
146
+ perplexity_code = '''from datasets import load_metric
147
+ metric = load_metric("perplexity")'''
148
+ st.code(perplexity_code, language='python')
149
+ st.markdown('If you choose a model that was trained on **' + dataset_name + '** and use it to compute perplexity on text generated by your model, this can help determine how similar the two are.')