Update README.md
Browse files
README.md
CHANGED
@@ -115,20 +115,23 @@ cd auto-round/examples/language-modeling
|
|
115 |
python3 eval_042/evluation.py --model_name "Intel/Meta-Llama-3.1-70B-Instruct-int4-inc" --eval_bs 16 --tasks lambada_openai,hellaswag,piqa,winogrande,truthfulqa_mc1,openbookqa,boolq,arc_easy,arc_challenge,mmlu,gsm8k --trust_remote_code
|
116 |
```
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
|
122 |
-
|
|
123 |
-
|
|
124 |
-
|
|
125 |
-
|
|
126 |
-
|
|
127 |
-
|
|
128 |
-
|
|
129 |
-
|
|
130 |
-
|
|
131 |
-
|
|
|
|
|
|
|
|
132 |
|
133 |
## Ethical Considerations and Limitations
|
134 |
|
|
|
115 |
python3 eval_042/evluation.py --model_name "Intel/Meta-Llama-3.1-70B-Instruct-int4-inc" --eval_bs 16 --tasks lambada_openai,hellaswag,piqa,winogrande,truthfulqa_mc1,openbookqa,boolq,arc_easy,arc_challenge,mmlu,gsm8k --trust_remote_code
|
116 |
```
|
117 |
|
118 |
+
|
119 |
+
| Metric | BF16 | INT4(iters 200) | INT4(iters1000) |
|
120 |
+
|:---------------|:------ |:---------------|:-----------------|
|
121 |
+
| avg | 0.7182 | 0.7119 | 0.7165 |
|
122 |
+
| mmlu | 0.8221 | 0.8136 | 0.8145 |
|
123 |
+
| lambada_openai | 0.7566 | 0.7448 | 0.7565 |
|
124 |
+
| hellaswag | 0.6522 | 0.6474 | 0.6492 |
|
125 |
+
| winogrande | 0.7901 | 0.7845 | 0.8090 |
|
126 |
+
| piqa | 0.8308 | 0.8286 | 0.8270 |
|
127 |
+
| truthfulqa_mc1 | 0.4064 | 0.4002 | 0.4051 |
|
128 |
+
| openbookqa | 0.3720 | 0.3720 | 0.3760 |
|
129 |
+
| boolq | 0.8777 | 0.8780 | 0.8768 |
|
130 |
+
| arc_easy | 0.8674 | 0.8590 | 0.8565 |
|
131 |
+
| arc_challenge | 0.6246 | 0.6109 | 0.6160 |
|
132 |
+
| gsm8k(5shot) strict match | 0.8999 | 0.8923 | 0.8954 |
|
133 |
+
|
134 |
+
|
135 |
|
136 |
## Ethical Considerations and Limitations
|
137 |
|