MathGenie commited on
Commit
c8da03c
1 Parent(s): 32f8c56

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -61
README.md CHANGED
@@ -65,67 +65,6 @@ The following hyperparameters were used during training:
65
  - lr_scheduler_warmup_ratio: 0.1
66
  - num_epochs: 2
67
 
68
- ### Training results
69
-
70
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
71
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
72
- | 0.6953 | 0.04 | 100 | 0.6942 | 0.0113 | 0.0155 | 0.5357 | -0.0042 | -125.2162 | -99.8434 | -2.2407 | -2.5020 |
73
- | 0.6799 | 0.07 | 200 | 0.6807 | 0.1508 | 0.0991 | 0.7857 | 0.0517 | -124.3804 | -98.4486 | -2.2421 | -2.5021 |
74
- | 0.6328 | 0.11 | 300 | 0.6358 | 0.6189 | 0.4340 | 0.8214 | 0.1850 | -121.0318 | -93.7668 | -2.2536 | -2.5140 |
75
- | 0.5964 | 0.15 | 400 | 0.5837 | 1.3765 | 0.9356 | 0.7679 | 0.4409 | -116.0156 | -86.1915 | -2.2736 | -2.5321 |
76
- | 0.5567 | 0.18 | 500 | 0.5355 | 1.8935 | 1.1726 | 0.8393 | 0.7208 | -113.6451 | -81.0216 | -2.3121 | -2.5696 |
77
- | 0.5234 | 0.22 | 600 | 0.4918 | 2.0220 | 1.0473 | 0.8571 | 0.9747 | -114.8989 | -79.7362 | -2.3776 | -2.6315 |
78
- | 0.4468 | 0.25 | 700 | 0.4557 | 2.0942 | 0.8329 | 0.8571 | 1.2613 | -117.0424 | -79.0143 | -2.4109 | -2.6510 |
79
- | 0.4445 | 0.29 | 800 | 0.4324 | 1.6535 | 0.1909 | 0.8929 | 1.4627 | -123.4627 | -83.4209 | -2.3914 | -2.6052 |
80
- | 0.4219 | 0.33 | 900 | 0.4065 | 1.6377 | -0.0568 | 0.9107 | 1.6945 | -125.9393 | -83.5790 | -2.3703 | -2.5723 |
81
- | 0.4453 | 0.36 | 1000 | 0.3895 | 1.5452 | -0.2159 | 0.9286 | 1.7611 | -127.5303 | -84.5038 | -2.4480 | -2.6886 |
82
- | 0.372 | 0.4 | 1100 | 0.3739 | 1.6809 | -0.3796 | 0.8929 | 2.0605 | -129.1675 | -83.1477 | -2.3805 | -2.6049 |
83
- | 0.3191 | 0.44 | 1200 | 0.3635 | 1.3180 | -0.7374 | 0.875 | 2.0554 | -132.7456 | -86.7765 | -2.3742 | -2.6045 |
84
- | 0.3319 | 0.47 | 1300 | 0.3556 | 1.9033 | -0.2537 | 0.875 | 2.1570 | -127.9082 | -80.9230 | -2.4013 | -2.6385 |
85
- | 0.3469 | 0.51 | 1400 | 0.3462 | 1.4997 | -0.7342 | 0.8929 | 2.2339 | -132.7133 | -84.9589 | -2.3969 | -2.6331 |
86
- | 0.2976 | 0.54 | 1500 | 0.3363 | 1.3404 | -0.9610 | 0.8929 | 2.3014 | -134.9813 | -86.5523 | -2.3936 | -2.6264 |
87
- | 0.2839 | 0.58 | 1600 | 0.3325 | 1.5509 | -0.9441 | 0.8571 | 2.4950 | -134.8124 | -84.4469 | -2.3639 | -2.5862 |
88
- | 0.3095 | 0.62 | 1700 | 0.3237 | 1.1956 | -1.3235 | 0.8929 | 2.5190 | -138.6060 | -88.0005 | -2.3813 | -2.6024 |
89
- | 0.2593 | 0.65 | 1800 | 0.3188 | 1.2642 | -1.2408 | 0.8393 | 2.5050 | -137.7795 | -87.3140 | -2.3843 | -2.6131 |
90
- | 0.2394 | 0.69 | 1900 | 0.3111 | 1.3483 | -1.1915 | 0.8393 | 2.5398 | -137.2868 | -86.4737 | -2.3883 | -2.6142 |
91
- | 0.3234 | 0.73 | 2000 | 0.3054 | 1.3375 | -1.2938 | 0.8393 | 2.6313 | -138.3099 | -86.5813 | -2.3702 | -2.5965 |
92
- | 0.2532 | 0.76 | 2100 | 0.3038 | 1.3038 | -1.4223 | 0.8571 | 2.7261 | -139.5943 | -86.9184 | -2.3411 | -2.5582 |
93
- | 0.2862 | 0.8 | 2200 | 0.2990 | 0.9190 | -1.6769 | 0.8929 | 2.5959 | -142.1404 | -90.7659 | -2.3869 | -2.6118 |
94
- | 0.2972 | 0.83 | 2300 | 0.2962 | 1.3084 | -1.4975 | 0.8571 | 2.8059 | -140.3468 | -86.8725 | -2.3515 | -2.5680 |
95
- | 0.2819 | 0.87 | 2400 | 0.2932 | 1.0128 | -1.7007 | 0.875 | 2.7134 | -142.3783 | -89.8287 | -2.3793 | -2.5986 |
96
- | 0.2523 | 0.91 | 2500 | 0.2887 | 1.2417 | -1.6991 | 0.8571 | 2.9408 | -142.3625 | -87.5393 | -2.3238 | -2.5295 |
97
- | 0.2534 | 0.94 | 2600 | 0.2876 | 1.0492 | -1.8303 | 0.8214 | 2.8795 | -143.6740 | -89.4638 | -2.3452 | -2.5586 |
98
- | 0.2065 | 0.98 | 2700 | 0.2806 | 0.8720 | -1.9390 | 0.8571 | 2.8109 | -144.7613 | -91.2366 | -2.3644 | -2.5848 |
99
- | 0.1669 | 1.02 | 2800 | 0.2792 | 1.1195 | -2.0235 | 0.875 | 3.1430 | -145.6067 | -88.7613 | -2.2594 | -2.4376 |
100
- | 0.2042 | 1.05 | 2900 | 0.2784 | 0.9707 | -2.2345 | 0.8929 | 3.2052 | -147.7169 | -90.2493 | -2.2788 | -2.4633 |
101
- | 0.1529 | 1.09 | 3000 | 0.2779 | 1.0919 | -2.2812 | 0.8929 | 3.3732 | -148.1836 | -89.0369 | -2.2437 | -2.4170 |
102
- | 0.1675 | 1.13 | 3100 | 0.2778 | 1.1553 | -2.3082 | 0.8929 | 3.4635 | -148.4539 | -88.4035 | -2.2311 | -2.3938 |
103
- | 0.1542 | 1.16 | 3200 | 0.2764 | 0.9350 | -2.5717 | 0.875 | 3.5067 | -151.0882 | -90.6065 | -2.2590 | -2.4346 |
104
- | 0.1694 | 1.2 | 3300 | 0.2728 | 0.8873 | -2.5803 | 0.8929 | 3.4676 | -151.1744 | -91.0834 | -2.2580 | -2.4308 |
105
- | 0.1763 | 1.23 | 3400 | 0.2699 | 0.8933 | -2.6673 | 0.8929 | 3.5606 | -152.0449 | -91.0235 | -2.2436 | -2.4141 |
106
- | 0.1526 | 1.27 | 3500 | 0.2666 | 0.8111 | -2.8087 | 0.875 | 3.6198 | -153.4587 | -91.8457 | -2.2513 | -2.4212 |
107
- | 0.1819 | 1.31 | 3600 | 0.2657 | 0.8766 | -2.7215 | 0.8929 | 3.5981 | -152.5868 | -91.1904 | -2.2677 | -2.4482 |
108
- | 0.1192 | 1.34 | 3700 | 0.2634 | 0.8643 | -2.7682 | 0.8929 | 3.6325 | -153.0537 | -91.3131 | -2.2649 | -2.4439 |
109
- | 0.144 | 1.38 | 3800 | 0.2639 | 0.7928 | -2.8695 | 0.8929 | 3.6623 | -154.0669 | -92.0286 | -2.2687 | -2.4451 |
110
- | 0.1603 | 1.42 | 3900 | 0.2631 | 0.9961 | -2.6996 | 0.875 | 3.6957 | -152.3678 | -89.9953 | -2.2720 | -2.4476 |
111
- | 0.2054 | 1.45 | 4000 | 0.2591 | 0.7753 | -2.8107 | 0.8929 | 3.5860 | -153.4788 | -92.2032 | -2.3164 | -2.5069 |
112
- | 0.1413 | 1.49 | 4100 | 0.2604 | 0.7969 | -2.8696 | 0.8929 | 3.6664 | -154.0672 | -91.9876 | -2.2993 | -2.4855 |
113
- | 0.1498 | 1.52 | 4200 | 0.2579 | 0.7998 | -2.8686 | 0.8929 | 3.6684 | -154.0579 | -91.9587 | -2.3021 | -2.4863 |
114
- | 0.174 | 1.56 | 4300 | 0.2609 | 0.7800 | -3.0276 | 0.8929 | 3.8076 | -155.6475 | -92.1565 | -2.2400 | -2.4000 |
115
- | 0.1129 | 1.6 | 4400 | 0.2576 | 0.7289 | -2.9876 | 0.8929 | 3.7165 | -155.2476 | -92.6674 | -2.2932 | -2.4758 |
116
- | 0.1424 | 1.63 | 4500 | 0.2585 | 0.7887 | -2.9462 | 0.8929 | 3.7349 | -154.8336 | -92.0694 | -2.3023 | -2.4859 |
117
- | 0.1531 | 1.67 | 4600 | 0.2570 | 0.7661 | -2.9310 | 0.8929 | 3.6971 | -154.6814 | -92.2956 | -2.3090 | -2.4970 |
118
- | 0.1295 | 1.71 | 4700 | 0.2564 | 0.6730 | -3.0214 | 0.8929 | 3.6944 | -155.5855 | -93.2258 | -2.3086 | -2.4952 |
119
- | 0.1277 | 1.74 | 4800 | 0.2575 | 0.6849 | -2.9809 | 0.8929 | 3.6658 | -155.1802 | -93.1070 | -2.3162 | -2.5054 |
120
- | 0.1166 | 1.78 | 4900 | 0.2568 | 0.7091 | -2.9715 | 0.8929 | 3.6807 | -155.0869 | -92.8652 | -2.3099 | -2.4983 |
121
- | 0.1273 | 1.81 | 5000 | 0.2564 | 0.7316 | -2.9732 | 0.8929 | 3.7049 | -155.1039 | -92.6402 | -2.3100 | -2.4976 |
122
- | 0.1221 | 1.85 | 5100 | 0.2575 | 0.7393 | -2.9714 | 0.9107 | 3.7107 | -155.0852 | -92.5628 | -2.2967 | -2.4797 |
123
- | 0.1752 | 1.89 | 5200 | 0.2568 | 0.7439 | -2.9732 | 0.9107 | 3.7171 | -155.1031 | -92.5173 | -2.2978 | -2.4828 |
124
- | 0.1235 | 1.92 | 5300 | 0.2566 | 0.7413 | -2.9777 | 0.8929 | 3.7190 | -155.1483 | -92.5434 | -2.3077 | -2.4947 |
125
- | 0.1288 | 1.96 | 5400 | 0.2569 | 0.7385 | -2.9689 | 0.9107 | 3.7074 | -155.0609 | -92.5715 | -2.3040 | -2.4889 |
126
- | 0.145 | 2.0 | 5500 | 0.2572 | 0.7366 | -2.9817 | 0.8929 | 3.7183 | -155.1884 | -92.5904 | -2.3032 | -2.4880 |
127
-
128
-
129
  ### Framework versions
130
 
131
  - Transformers 4.38.2
 
65
  - lr_scheduler_warmup_ratio: 0.1
66
  - num_epochs: 2
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  ### Framework versions
69
 
70
  - Transformers 4.38.2