Spaces:

sasha
/

evaluation-buddy

Sleeping

App Files Files Community

Sasha commited on Mar 17, 2022

Commit

d8eab79

•

1 Parent(s): 4474a2c

adding some fixes (paw still isn't working though)

Browse files

Files changed (1) hide show

app.py +23 -12

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import numpy as np
 import statistics
 st.set_page_config(
-    page_title="Evaluation Buddy",
     page_icon="./robot.png",
     layout="wide",
 )
@@ -25,7 +25,7 @@ top_datasets= ['glue', 'super_glue', 'wikitext', 'imdb', 'squad', 'squad_es', \
 tasks= ['text classification', 'question answering', 'automatic speech recognition', 'natural language inference', \
         'machine translation', 'sentiment analysis', 'text simplification', 'named entity recognition', \
-        'reading comprehension']
 metrics= ['matthews_correlation', 'perplexity', 'meteor', 'code_eval', 'super_glue', 'rouge', 'mauve', 'cer', 'accuracy', 'recall', 'bleurt', 'sari', 'precision', 'mean_iou', 'squad', 'mahalanobis', 'chrf', 'mae', 'squad_v2', 'seqeval', 'cuad', 'wiki_split', 'google_bleu', 'competition_math', 'pearsonr', 'xtreme_s', 'comet', 'gleu', 'spearmanr', 'f1', 'frugalscore', 'bertscore', 'indic_glue', 'mse', 'xnli', 'ter', 'coval', 'wer', 'bleu', 'glue', 'sacrebleu']
 with st.sidebar.expander("Datasets", expanded=True):
@@ -50,22 +50,26 @@ st.markdown("## Here is some information about your dataset:")
 st.markdown("### Description")
 st.markdown(dataset_builder.info.description)
 st.markdown("For more information about this dataset, check out [its website](https://huggingface.co/datasets/"+dataset_name+")")
 st.markdown("### Dataset-Specific Metrics")
 if dataset_name in metrics:
-    st.markdown("Great news! Your dataset has a dedicated metric for it! You can use it like this:")
     code = ''' from datasets import load_metric
  metric = load_metric('''+dataset_name+''', '''+dataset_config+''')'''
     st.code(code, language='python')
     dedicated_metric = True
 else:
-    st.markdown("Your dataset doesn't have a dedicated metric, but that's ok!")
     dedicated_metric = False
 st.markdown("### Task-Specific Metrics")
 try:
     task = dataset_builder.info.task_templates[0].task
 except:
@@ -73,14 +77,20 @@ except:
         if t in str(dataset_builder.info.description).lower():
             task = t
         else:
-            task = None
 if task is not None:
     st.markdown("The task associated to it your dataset is: " + task.replace('-',' '))
     if task == 'automatic-speech-recognition':
         st.markdown('Automatic Speech Recognition has some dedicated metrics such as:')
         st.markdown('[Word Error Rate](https://huggingface.co/metrics/wer)')
         st.markdown('[Character Error Rate](https://huggingface.co/metrics/cer)')
 else:
     st.markdown("The task for your dataset doesn't have any dedicated metrics, but you can still use general ones!")
@@ -129,10 +139,11 @@ try:
             st.code(accuracy_code, language='python')
             st.markdown('Since it takes into account both precision and recall, which works well to evaluate model performance on minority classes.')
 except:
-    st.markdown("### Unsupervised  Metrics")
-    st.markdown("Since dataset doesn't have any labels, so the metrics that you can use for evaluation are:")
-    st.markdown('[Perplexity](https://huggingface.co/metrics/perplexity)')
-    perplexity_code = '''from datasets import load_metric
-metric = load_metric("perplexity")'''
-    st.code(perplexity_code, language='python')
-    st.markdown('If you choose a model that was trained on **' + dataset_name + '** and use it to compute perplexity on text generated by your model, this can help determine how similar the two are.')

 import statistics
 st.set_page_config(
+    page_title="HuggingFace Evaluation Buddy",
     page_icon="./robot.png",
     layout="wide",
 )
 tasks= ['text classification', 'question answering', 'automatic speech recognition', 'natural language inference', \
         'machine translation', 'sentiment analysis', 'text simplification', 'named entity recognition', \
+        'reading comprehension', 'paraphrase identification', 'natural language understanding']
 metrics= ['matthews_correlation', 'perplexity', 'meteor', 'code_eval', 'super_glue', 'rouge', 'mauve', 'cer', 'accuracy', 'recall', 'bleurt', 'sari', 'precision', 'mean_iou', 'squad', 'mahalanobis', 'chrf', 'mae', 'squad_v2', 'seqeval', 'cuad', 'wiki_split', 'google_bleu', 'competition_math', 'pearsonr', 'xtreme_s', 'comet', 'gleu', 'spearmanr', 'f1', 'frugalscore', 'bertscore', 'indic_glue', 'mse', 'xnli', 'ter', 'coval', 'wer', 'bleu', 'glue', 'sacrebleu']
 with st.sidebar.expander("Datasets", expanded=True):
 st.markdown("### Description")
 st.markdown(dataset_builder.info.description)
+if len(dataset_builder.info.description) == 1:
+    st.markdown("This dataset does not have a description. :no_mouth:")
 st.markdown("For more information about this dataset, check out [its website](https://huggingface.co/datasets/"+dataset_name+")")
 st.markdown("### Dataset-Specific Metrics")
 if dataset_name in metrics:
+    st.markdown("Great news! Your dataset has a dedicated metric for it! You can use it like this: :point_down:")
     code = ''' from datasets import load_metric
  metric = load_metric('''+dataset_name+''', '''+dataset_config+''')'''
     st.code(code, language='python')
     dedicated_metric = True
 else:
+    st.markdown("Your dataset doesn't have a dedicated metric, but that's ok! :wink:")
     dedicated_metric = False
 st.markdown("### Task-Specific Metrics")
+task = None
 try:
     task = dataset_builder.info.task_templates[0].task
 except:
         if t in str(dataset_builder.info.description).lower():
             task = t
         else:
+            continue
 if task is not None:
     st.markdown("The task associated to it your dataset is: " + task.replace('-',' '))
     if task == 'automatic-speech-recognition':
         st.markdown('Automatic Speech Recognition has some dedicated metrics such as:')
         st.markdown('[Word Error Rate](https://huggingface.co/metrics/wer)')
+        wer_code = '''from datasets import load_metric
+    metric = load_metric("wer")'''
+        st.code(wer_code, language='python')
         st.markdown('[Character Error Rate](https://huggingface.co/metrics/cer)')
+        cer_code = '''from datasets import load_metric
+    metric = load_metric("cer")'''
+        st.code(cer_code, language='python')
 else:
     st.markdown("The task for your dataset doesn't have any dedicated metrics, but you can still use general ones!")
             st.code(accuracy_code, language='python')
             st.markdown('Since it takes into account both precision and recall, which works well to evaluate model performance on minority classes.')
 except:
+    if task != 'automatic-speech-recognition':
+        st.markdown("### Unsupervised  Metrics")
+        st.markdown("Since dataset doesn't have any labels, so the metrics that you can use for evaluation are:")
+        st.markdown('[Perplexity](https://huggingface.co/metrics/perplexity)')
+        perplexity_code = '''from datasets import load_metric
+    metric = load_metric("perplexity")'''
+        st.code(perplexity_code, language='python')
+        st.markdown('If you choose a model that was trained on **' + dataset_name + '** and use it to compute perplexity on text generated by your model, this can help determine how similar the two are.')