Update README.md
Browse files
README.md
CHANGED
@@ -138,6 +138,83 @@ But Llama 3 is a new technology, and like any new technology, there are risks as
|
|
138 |
|
139 |
Please see the Responsible Use Guide available at [http://llama.meta.com/responsible-use-guide](http://llama.meta.com/responsible-use-guide)
|
140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
|
142 |
## Citation instructions
|
143 |
|
|
|
138 |
|
139 |
Please see the Responsible Use Guide available at [http://llama.meta.com/responsible-use-guide](http://llama.meta.com/responsible-use-guide)
|
140 |
|
141 |
+
## Benchmark Scores
|
142 |
+
|
143 |
+
- vllm (pretrained=beomi/Llama-3-Open-Ko-8B,revision=081e85a,tensor_parallel_size=1,dtype=bfloat16,data_parallel_size=2,gpu_memory_utilization=0.8), gen_kwargs: (None), limit: None, num_fewshot: 5, batch_size: auto
|
144 |
+
|
145 |
+
| Tasks |Version|Filter|n-shot| Metric |Value | |Stderr|
|
146 |
+
|----------------------------------------------------------|-------|------|-----:|-----------|-----:|---|------|
|
147 |
+
|haerae |N/A |none | 5|acc |0.6801|± |0.0138|
|
148 |
+
| | |none | 5|acc_norm |0.6801|± |0.0138|
|
149 |
+
| - haerae_general_knowledge | 1|none | 5|acc |0.4375|± |0.0375|
|
150 |
+
| | |none | 5|acc_norm |0.4375|± |0.0375|
|
151 |
+
| - haerae_history | 1|none | 5|acc |0.7340|± |0.0323|
|
152 |
+
| | |none | 5|acc_norm |0.7340|± |0.0323|
|
153 |
+
| - haerae_loan_word | 1|none | 5|acc |0.7870|± |0.0316|
|
154 |
+
| | |none | 5|acc_norm |0.7870|± |0.0316|
|
155 |
+
| - haerae_rare_word | 1|none | 5|acc |0.7012|± |0.0228|
|
156 |
+
| | |none | 5|acc_norm |0.7012|± |0.0228|
|
157 |
+
| - haerae_standard_nomenclature | 1|none | 5|acc |0.7190|± |0.0365|
|
158 |
+
| | |none | 5|acc_norm |0.7190|± |0.0365|
|
159 |
+
|kmmlu_direct |N/A |none | 5|exact_match|0.4054|± |0.0026|
|
160 |
+
| - kmmlu_direct_accounting | 2|none | 5|exact_match|0.3600|± |0.0482|
|
161 |
+
| - kmmlu_direct_agricultural_sciences | 2|none | 5|exact_match|0.3130|± |0.0147|
|
162 |
+
| - kmmlu_direct_aviation_engineering_and_maintenance | 2|none | 5|exact_match|0.3690|± |0.0153|
|
163 |
+
| - kmmlu_direct_biology | 2|none | 5|exact_match|0.3330|± |0.0149|
|
164 |
+
| - kmmlu_direct_chemical_engineering | 2|none | 5|exact_match|0.4190|± |0.0156|
|
165 |
+
| - kmmlu_direct_chemistry | 2|none | 5|exact_match|0.3833|± |0.0199|
|
166 |
+
| - kmmlu_direct_civil_engineering | 2|none | 5|exact_match|0.3870|± |0.0154|
|
167 |
+
| - kmmlu_direct_computer_science | 2|none | 5|exact_match|0.6340|± |0.0152|
|
168 |
+
| - kmmlu_direct_construction | 2|none | 5|exact_match|0.3340|± |0.0149|
|
169 |
+
| - kmmlu_direct_criminal_law | 2|none | 5|exact_match|0.2850|± |0.0320|
|
170 |
+
| - kmmlu_direct_ecology | 2|none | 5|exact_match|0.4210|± |0.0156|
|
171 |
+
| - kmmlu_direct_economics | 2|none | 5|exact_match|0.4077|± |0.0433|
|
172 |
+
| - kmmlu_direct_education | 2|none | 5|exact_match|0.5000|± |0.0503|
|
173 |
+
| - kmmlu_direct_electrical_engineering | 2|none | 5|exact_match|0.3620|± |0.0152|
|
174 |
+
| - kmmlu_direct_electronics_engineering | 2|none | 5|exact_match|0.4790|± |0.0158|
|
175 |
+
| - kmmlu_direct_energy_management | 2|none | 5|exact_match|0.3110|± |0.0146|
|
176 |
+
| - kmmlu_direct_environmental_science | 2|none | 5|exact_match|0.3210|± |0.0148|
|
177 |
+
| - kmmlu_direct_fashion | 2|none | 5|exact_match|0.4190|± |0.0156|
|
178 |
+
| - kmmlu_direct_food_processing | 2|none | 5|exact_match|0.3600|± |0.0152|
|
179 |
+
| - kmmlu_direct_gas_technology_and_engineering | 2|none | 5|exact_match|0.3320|± |0.0149|
|
180 |
+
| - kmmlu_direct_geomatics | 2|none | 5|exact_match|0.3640|± |0.0152|
|
181 |
+
| - kmmlu_direct_health | 2|none | 5|exact_match|0.5100|± |0.0502|
|
182 |
+
| - kmmlu_direct_industrial_engineer | 2|none | 5|exact_match|0.3970|± |0.0155|
|
183 |
+
| - kmmlu_direct_information_technology | 2|none | 5|exact_match|0.5720|± |0.0157|
|
184 |
+
| - kmmlu_direct_interior_architecture_and_design | 2|none | 5|exact_match|0.4740|± |0.0158|
|
185 |
+
| - kmmlu_direct_korean_history | 2|none | 5|exact_match|0.2700|± |0.0446|
|
186 |
+
| - kmmlu_direct_law | 2|none | 5|exact_match|0.3990|± |0.0155|
|
187 |
+
| - kmmlu_direct_machine_design_and_manufacturing | 2|none | 5|exact_match|0.4080|± |0.0155|
|
188 |
+
| - kmmlu_direct_management | 2|none | 5|exact_match|0.4660|± |0.0158|
|
189 |
+
| - kmmlu_direct_maritime_engineering | 2|none | 5|exact_match|0.4417|± |0.0203|
|
190 |
+
| - kmmlu_direct_marketing | 2|none | 5|exact_match|0.6720|± |0.0149|
|
191 |
+
| - kmmlu_direct_materials_engineering | 2|none | 5|exact_match|0.4130|± |0.0156|
|
192 |
+
| - kmmlu_direct_math | 2|none | 5|exact_match|0.2567|± |0.0253|
|
193 |
+
| - kmmlu_direct_mechanical_engineering | 2|none | 5|exact_match|0.3800|± |0.0154|
|
194 |
+
| - kmmlu_direct_nondestructive_testing | 2|none | 5|exact_match|0.3890|± |0.0154|
|
195 |
+
| - kmmlu_direct_patent | 2|none | 5|exact_match|0.2700|± |0.0446|
|
196 |
+
| - kmmlu_direct_political_science_and_sociology | 2|none | 5|exact_match|0.4433|± |0.0287|
|
197 |
+
| - kmmlu_direct_psychology | 2|none | 5|exact_match|0.3620|± |0.0152|
|
198 |
+
| - kmmlu_direct_public_safety | 2|none | 5|exact_match|0.3200|± |0.0148|
|
199 |
+
| - kmmlu_direct_railway_and_automotive_engineering | 2|none | 5|exact_match|0.3200|± |0.0148|
|
200 |
+
| - kmmlu_direct_real_estate | 2|none | 5|exact_match|0.3650|± |0.0341|
|
201 |
+
| - kmmlu_direct_refrigerating_machinery | 2|none | 5|exact_match|0.3210|± |0.0148|
|
202 |
+
| - kmmlu_direct_social_welfare | 2|none | 5|exact_match|0.4500|± |0.0157|
|
203 |
+
| - kmmlu_direct_taxation | 2|none | 5|exact_match|0.3550|± |0.0339|
|
204 |
+
| - kmmlu_direct_telecommunications_and_wireless_technology| 2|none | 5|exact_match|0.5490|± |0.0157|
|
205 |
+
|kobest_boolq | 1|none | 5|acc |0.7984|± |0.0107|
|
206 |
+
| | |none | 5|f1 |0.7961|± |N/A |
|
207 |
+
|kobest_copa | 1|none | 5|acc |0.8150|± |0.0123|
|
208 |
+
| | |none | 5|f1 |0.8148|± |N/A |
|
209 |
+
|kobest_hellaswag | 1|none | 5|acc |0.4800|± |0.0224|
|
210 |
+
| | |none | 5|f1 |0.4771|± |N/A |
|
211 |
+
| | |none | 5|acc_norm |0.6120|± |0.0218|
|
212 |
+
|kobest_sentineg | 1|none | 5|acc |0.9597|± |0.0099|
|
213 |
+
| | |none | 5|f1 |0.9597|± |N/A |
|
214 |
+
|haerae |N/A |none | 5|acc |0.6801|± |0.0138|
|
215 |
+
| | |none | 5|acc_norm |0.6801|± |0.0138|
|
216 |
+
|kmmlu_direct|N/A |none | 5|exact_match|0.4054|± |0.0026|
|
217 |
+
|
218 |
|
219 |
## Citation instructions
|
220 |
|