Sasha commited on
Commit
74e9f8c
1 Parent(s): 779cfb9

making small text changes

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -50,7 +50,7 @@ with st.sidebar.expander("Datasets", expanded=True):
50
  sorted(top_datasets))
51
  configs = get_dataset_config_names(dataset_name)
52
  dataset_config = st.selectbox(
53
- f"Choose a configuration of your dataset:",
54
  configs)
55
  dataset_builder = load_dataset_builder(dataset_name, dataset_config)
56
  splits = [s for s in dataset_builder.info.splits]
@@ -61,7 +61,7 @@ with st.sidebar.expander("Datasets", expanded=True):
61
 
62
 
63
 
64
- st.markdown("## Here is some information about your dataset:")
65
 
66
 
67
  st.markdown(dataset_builder.info.description)
@@ -74,7 +74,7 @@ st.markdown("## Now let's see what metrics we can use to evaluate models on this
74
 
75
  st.markdown("### Dataset-Specific Metrics")
76
  if dataset_name in metrics:
77
- st.markdown("Great news! Your dataset has a dedicated metric for it!:partying_face: You can use it like this: :point_down:")
78
  if "glue" in dataset_name:
79
  code = ''' from datasets import load_metric
80
  metric = load_metric(\"'''+dataset_name+'''\", \"'''+dataset_config+'''\")'''
@@ -84,7 +84,7 @@ if dataset_name in metrics:
84
  metric = load_metric(\"'''+dataset_name+'''\")'''
85
  st.code(code, language='python')
86
  else:
87
- st.markdown("Your dataset doesn't have a dedicated metric, but that's ok! :wink:")
88
  dedicated_metric = False
89
 
90
  st.markdown("### Task-Specific Metrics")
@@ -92,7 +92,7 @@ st.markdown("### Task-Specific Metrics")
92
  task = find_task(dataset_name)
93
 
94
  if task is not None:
95
- st.markdown("The task associated to it your dataset is: " + task.replace('-',' '))
96
  if task == 'automatic-speech-recognition':
97
  st.markdown('Automatic Speech Recognition has some dedicated metrics such as:')
98
  st.markdown('[Word Error Rate](https://huggingface.co/metrics/wer)')
@@ -104,7 +104,7 @@ if task is not None:
104
  metric = load_metric("cer")'''
105
  st.code(cer_code, language='python')
106
  else:
107
- st.markdown("The task for your dataset doesn't have any dedicated metrics, but you can still use general ones! :cowboy_hat_face:")
108
 
109
 
110
  #print(dataset_builder.info.task_templates)
@@ -130,7 +130,7 @@ try:
130
  labels = labels.rename(columns={"count_star()": "count"})
131
  labels.index = dataset_builder.info.features['label'].names
132
  st.markdown("### Labelled Metrics")
133
- st.markdown("Your dataset has "+ str(dataset_builder.info.features['label'].num_classes) + " labels : " + ', '.join(dataset_builder.info.features['label'].names))
134
  #TODO : figure out how to make a label plot
135
  st.plotly_chart(px.pie(labels, values = "count", names = labels.index, width=800, height=400))
136
  total = sum(c for c in labels['count'])
@@ -138,14 +138,14 @@ try:
138
  #proportion = [0.85, 0.15]
139
  stdev_dataset= statistics.stdev(proportion)
140
  if stdev_dataset <= balanced_stdev:
141
- st.markdown("Since your dataset is well-balanced (with a standard deviation of " + str(round(stdev_dataset,2)) +"), you can look at using:")
142
  st.markdown('[Accuracy](https://huggingface.co/metrics/accuracy)')
143
  accuracy_code = '''from datasets import load_metric
144
  metric = load_metric("accuracy")'''
145
  st.code(accuracy_code, language='python')
146
 
147
  else:
148
- st.markdown("Since your dataset is not well-balanced (with a standard deviation of " + str(round(stdev_dataset,2)) +"), you can look at using:")
149
  st.markdown('[F1 Score](https://huggingface.co/metrics/f1)')
150
  accuracy_code = '''from datasets import load_metric
151
  metric = load_metric("accuracy")'''
@@ -154,7 +154,7 @@ try:
154
  except:
155
  if task != 'automatic-speech-recognition':
156
  st.markdown("### Unsupervised Metrics")
157
- st.markdown("Since dataset doesn't have any labels, so the metrics that you can use for evaluation are:")
158
  st.markdown('[Perplexity](https://huggingface.co/metrics/perplexity)')
159
  perplexity_code = '''from datasets import load_metric
160
  metric = load_metric("perplexity")'''
 
50
  sorted(top_datasets))
51
  configs = get_dataset_config_names(dataset_name)
52
  dataset_config = st.selectbox(
53
+ f"Choose a configuration of the dataset:",
54
  configs)
55
  dataset_builder = load_dataset_builder(dataset_name, dataset_config)
56
  splits = [s for s in dataset_builder.info.splits]
 
61
 
62
 
63
 
64
+ st.markdown("## Here is some information about this dataset:")
65
 
66
 
67
  st.markdown(dataset_builder.info.description)
 
74
 
75
  st.markdown("### Dataset-Specific Metrics")
76
  if dataset_name in metrics:
77
+ st.markdown("Great news! This dataset has a dedicated metric for it!:partying_face: You can use it like this: :point_down:")
78
  if "glue" in dataset_name:
79
  code = ''' from datasets import load_metric
80
  metric = load_metric(\"'''+dataset_name+'''\", \"'''+dataset_config+'''\")'''
 
84
  metric = load_metric(\"'''+dataset_name+'''\")'''
85
  st.code(code, language='python')
86
  else:
87
+ st.markdown("This dataset doesn't have a dedicated metric, but that's ok! :wink:")
88
  dedicated_metric = False
89
 
90
  st.markdown("### Task-Specific Metrics")
 
92
  task = find_task(dataset_name)
93
 
94
  if task is not None:
95
+ st.markdown("The task associated to it this dataset is: " + task.replace('-',' '))
96
  if task == 'automatic-speech-recognition':
97
  st.markdown('Automatic Speech Recognition has some dedicated metrics such as:')
98
  st.markdown('[Word Error Rate](https://huggingface.co/metrics/wer)')
 
104
  metric = load_metric("cer")'''
105
  st.code(cer_code, language='python')
106
  else:
107
+ st.markdown("The task for this dataset doesn't have any dedicated metrics, but you can still use general ones! :cowboy_hat_face:")
108
 
109
 
110
  #print(dataset_builder.info.task_templates)
 
130
  labels = labels.rename(columns={"count_star()": "count"})
131
  labels.index = dataset_builder.info.features['label'].names
132
  st.markdown("### Labelled Metrics")
133
+ st.markdown("This dataset has "+ str(dataset_builder.info.features['label'].num_classes) + " labels : " + ', '.join(dataset_builder.info.features['label'].names))
134
  #TODO : figure out how to make a label plot
135
  st.plotly_chart(px.pie(labels, values = "count", names = labels.index, width=800, height=400))
136
  total = sum(c for c in labels['count'])
 
138
  #proportion = [0.85, 0.15]
139
  stdev_dataset= statistics.stdev(proportion)
140
  if stdev_dataset <= balanced_stdev:
141
+ st.markdown("Since this dataset is well-balanced (with a standard deviation of " + str(round(stdev_dataset,2)) +"), you can look at using:")
142
  st.markdown('[Accuracy](https://huggingface.co/metrics/accuracy)')
143
  accuracy_code = '''from datasets import load_metric
144
  metric = load_metric("accuracy")'''
145
  st.code(accuracy_code, language='python')
146
 
147
  else:
148
+ st.markdown("Since this dataset is not well-balanced (with a standard deviation of " + str(round(stdev_dataset,2)) +"), you can look at using:")
149
  st.markdown('[F1 Score](https://huggingface.co/metrics/f1)')
150
  accuracy_code = '''from datasets import load_metric
151
  metric = load_metric("accuracy")'''
 
154
  except:
155
  if task != 'automatic-speech-recognition':
156
  st.markdown("### Unsupervised Metrics")
157
+ st.markdown("Since this dataset doesn't have any labels, the metrics that you can use for evaluation are:")
158
  st.markdown('[Perplexity](https://huggingface.co/metrics/perplexity)')
159
  perplexity_code = '''from datasets import load_metric
160
  metric = load_metric("perplexity")'''