Model save
Browse files- README.md +22 -26
- adapter_model.safetensors +1 -1
- all_results.json +16 -16
- eval_results.json +12 -12
- train_results.json +4 -4
- trainer_state.json +0 -0
README.md
CHANGED
@@ -2,13 +2,9 @@
|
|
2 |
license: apache-2.0
|
3 |
library_name: peft
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
-
- generated_from_trainer
|
7 |
- trl
|
8 |
- dpo
|
9 |
- generated_from_trainer
|
10 |
-
datasets:
|
11 |
-
- snorkelai/Snorkel-Mistral-PairRM-DPO-Dataset
|
12 |
base_model: mistralai/Mistral-7B-Instruct-v0.2
|
13 |
model-index:
|
14 |
- name: zephyr-7b-dpo-lora-pairrm
|
@@ -20,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
|
|
20 |
|
21 |
# zephyr-7b-dpo-lora-pairrm
|
22 |
|
23 |
-
This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the
|
24 |
It achieves the following results on the evaluation set:
|
25 |
-
- Loss: 0.
|
26 |
-
- Rewards/chosen: -
|
27 |
-
- Rewards/rejected: -1.
|
28 |
-
- Rewards/accuracies: 0.
|
29 |
-
- Rewards/margins: 0.
|
30 |
-
- Logps/rejected: -
|
31 |
-
- Logps/chosen: -
|
32 |
-
- Logits/rejected: -
|
33 |
-
- Logits/chosen: -
|
34 |
|
35 |
## Model description
|
36 |
|
@@ -65,18 +61,18 @@ The following hyperparameters were used during training:
|
|
65 |
|
66 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
67 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
68 |
-
| 0.
|
69 |
-
| 0.
|
70 |
-
| 0.
|
71 |
-
| 0.
|
72 |
-
| 0.
|
73 |
-
| 0.
|
74 |
-
| 0.
|
75 |
-
| 0.
|
76 |
-
| 0.
|
77 |
-
| 0.
|
78 |
-
| 0.
|
79 |
-
| 0.
|
80 |
|
81 |
|
82 |
### Framework versions
|
|
|
2 |
license: apache-2.0
|
3 |
library_name: peft
|
4 |
tags:
|
|
|
|
|
5 |
- trl
|
6 |
- dpo
|
7 |
- generated_from_trainer
|
|
|
|
|
8 |
base_model: mistralai/Mistral-7B-Instruct-v0.2
|
9 |
model-index:
|
10 |
- name: zephyr-7b-dpo-lora-pairrm
|
|
|
16 |
|
17 |
# zephyr-7b-dpo-lora-pairrm
|
18 |
|
19 |
+
This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.6764
|
22 |
+
- Rewards/chosen: -0.9885
|
23 |
+
- Rewards/rejected: -1.0650
|
24 |
+
- Rewards/accuracies: 0.5657
|
25 |
+
- Rewards/margins: 0.0765
|
26 |
+
- Logps/rejected: -320.4450
|
27 |
+
- Logps/chosen: -307.4615
|
28 |
+
- Logits/rejected: -2.7535
|
29 |
+
- Logits/chosen: -2.7599
|
30 |
|
31 |
## Model description
|
32 |
|
|
|
61 |
|
62 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
63 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
64 |
+
| 0.6916 | 0.08 | 100 | 0.6925 | -0.0162 | -0.0177 | 0.5280 | 0.0015 | -215.7187 | -210.2296 | -2.5058 | -2.5086 |
|
65 |
+
| 0.6855 | 0.16 | 200 | 0.6880 | -0.0651 | -0.0772 | 0.5613 | 0.0121 | -221.6710 | -215.1240 | -2.5152 | -2.5178 |
|
66 |
+
| 0.6825 | 0.24 | 300 | 0.6854 | -0.1874 | -0.2081 | 0.5473 | 0.0207 | -234.7546 | -227.3457 | -2.5175 | -2.5192 |
|
67 |
+
| 0.6676 | 0.32 | 400 | 0.6827 | -0.2909 | -0.3222 | 0.5477 | 0.0313 | -246.1682 | -237.7042 | -2.5347 | -2.5368 |
|
68 |
+
| 0.6458 | 0.4 | 500 | 0.6805 | -0.3693 | -0.4104 | 0.5567 | 0.0410 | -254.9852 | -245.5435 | -2.6328 | -2.6364 |
|
69 |
+
| 0.6592 | 0.48 | 600 | 0.6789 | -0.6010 | -0.6528 | 0.5560 | 0.0518 | -279.2278 | -268.7087 | -2.6805 | -2.6845 |
|
70 |
+
| 0.6107 | 0.56 | 700 | 0.6785 | -0.8159 | -0.8786 | 0.5550 | 0.0627 | -301.8047 | -290.1964 | -2.6914 | -2.6969 |
|
71 |
+
| 0.6475 | 0.64 | 800 | 0.6770 | -0.8845 | -0.9544 | 0.5610 | 0.0699 | -309.3867 | -297.0627 | -2.7237 | -2.7295 |
|
72 |
+
| 0.6639 | 0.72 | 900 | 0.6766 | -0.9705 | -1.0450 | 0.5667 | 0.0746 | -318.4507 | -305.6558 | -2.7464 | -2.7525 |
|
73 |
+
| 0.6305 | 0.8 | 1000 | 0.6764 | -0.9844 | -1.0603 | 0.5680 | 0.0759 | -319.9799 | -307.0536 | -2.7543 | -2.7606 |
|
74 |
+
| 0.6754 | 0.88 | 1100 | 0.6763 | -0.9882 | -1.0648 | 0.5687 | 0.0766 | -320.4283 | -307.4264 | -2.7538 | -2.7602 |
|
75 |
+
| 0.6577 | 0.96 | 1200 | 0.6764 | -0.9885 | -1.0649 | 0.5663 | 0.0764 | -320.4412 | -307.4615 | -2.7538 | -2.7602 |
|
76 |
|
77 |
|
78 |
### Framework versions
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671150064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cab7cdc14994d01df2444438178c50ef99e56ba6a159f575b5ec4bfbd04d646
|
3 |
size 671150064
|
all_results.json
CHANGED
@@ -1,21 +1,21 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"eval_logits/chosen": -
|
4 |
-
"eval_logits/rejected": -
|
5 |
-
"eval_logps/chosen": -
|
6 |
-
"eval_logps/rejected": -
|
7 |
-
"eval_loss": 0.
|
8 |
-
"eval_rewards/accuracies": 0.
|
9 |
-
"eval_rewards/chosen": -
|
10 |
-
"eval_rewards/margins": 0.
|
11 |
-
"eval_rewards/rejected": -1.
|
12 |
-
"eval_runtime":
|
13 |
"eval_samples": 2994,
|
14 |
-
"eval_samples_per_second": 1.
|
15 |
-
"eval_steps_per_second": 0.
|
16 |
-
"train_loss": 0.
|
17 |
-
"train_runtime":
|
18 |
"train_samples": 19996,
|
19 |
-
"train_samples_per_second": 0.
|
20 |
-
"train_steps_per_second": 0.
|
21 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"eval_logits/chosen": -2.75989031791687,
|
4 |
+
"eval_logits/rejected": -2.753535509109497,
|
5 |
+
"eval_logps/chosen": -307.4615173339844,
|
6 |
+
"eval_logps/rejected": -320.4450378417969,
|
7 |
+
"eval_loss": 0.6764041185379028,
|
8 |
+
"eval_rewards/accuracies": 0.565666675567627,
|
9 |
+
"eval_rewards/chosen": -0.988508403301239,
|
10 |
+
"eval_rewards/margins": 0.07645130157470703,
|
11 |
+
"eval_rewards/rejected": -1.0649596452713013,
|
12 |
+
"eval_runtime": 1679.0616,
|
13 |
"eval_samples": 2994,
|
14 |
+
"eval_samples_per_second": 1.783,
|
15 |
+
"eval_steps_per_second": 0.223,
|
16 |
+
"train_loss": 0.6566078573155155,
|
17 |
+
"train_runtime": 38377.8314,
|
18 |
"train_samples": 19996,
|
19 |
+
"train_samples_per_second": 0.521,
|
20 |
+
"train_steps_per_second": 0.033
|
21 |
}
|
eval_results.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"eval_logits/chosen": -
|
4 |
-
"eval_logits/rejected": -
|
5 |
-
"eval_logps/chosen": -
|
6 |
-
"eval_logps/rejected": -
|
7 |
-
"eval_loss": 0.
|
8 |
-
"eval_rewards/accuracies": 0.
|
9 |
-
"eval_rewards/chosen": -
|
10 |
-
"eval_rewards/margins": 0.
|
11 |
-
"eval_rewards/rejected": -1.
|
12 |
-
"eval_runtime":
|
13 |
"eval_samples": 2994,
|
14 |
-
"eval_samples_per_second": 1.
|
15 |
-
"eval_steps_per_second": 0.
|
16 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"eval_logits/chosen": -2.75989031791687,
|
4 |
+
"eval_logits/rejected": -2.753535509109497,
|
5 |
+
"eval_logps/chosen": -307.4615173339844,
|
6 |
+
"eval_logps/rejected": -320.4450378417969,
|
7 |
+
"eval_loss": 0.6764041185379028,
|
8 |
+
"eval_rewards/accuracies": 0.565666675567627,
|
9 |
+
"eval_rewards/chosen": -0.988508403301239,
|
10 |
+
"eval_rewards/margins": 0.07645130157470703,
|
11 |
+
"eval_rewards/rejected": -1.0649596452713013,
|
12 |
+
"eval_runtime": 1679.0616,
|
13 |
"eval_samples": 2994,
|
14 |
+
"eval_samples_per_second": 1.783,
|
15 |
+
"eval_steps_per_second": 0.223
|
16 |
}
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 19996,
|
6 |
-
"train_samples_per_second": 0.
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.6566078573155155,
|
4 |
+
"train_runtime": 38377.8314,
|
5 |
"train_samples": 19996,
|
6 |
+
"train_samples_per_second": 0.521,
|
7 |
+
"train_steps_per_second": 0.033
|
8 |
}
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|