tiagoblima commited on
Commit
378a488
1 Parent(s): ab5985e

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -1
  2. all_results.json +11 -11
  3. eval_results.json +6 -6
  4. train_results.json +6 -6
  5. trainer_state.json +650 -364
README.md CHANGED
@@ -3,6 +3,8 @@ license: mit
3
  base_model: unicamp-dl/ptt5-base-portuguese-vocab
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: t5_base-qg-ap-test
8
  results: []
@@ -13,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # t5_base-qg-ap-test
15
 
16
- This model is a fine-tuned version of [unicamp-dl/ptt5-base-portuguese-vocab](https://huggingface.co/unicamp-dl/ptt5-base-portuguese-vocab) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
  - Loss: 0.0163
19
 
 
3
  base_model: unicamp-dl/ptt5-base-portuguese-vocab
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - tiagoblima/du-qg-squadv1_pt
8
  model-index:
9
  - name: t5_base-qg-ap-test
10
  results: []
 
15
 
16
  # t5_base-qg-ap-test
17
 
18
+ This model is a fine-tuned version of [unicamp-dl/ptt5-base-portuguese-vocab](https://huggingface.co/unicamp-dl/ptt5-base-portuguese-vocab) on the tiagoblima/du-qg-squadv1_pt dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.0163
21
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_loss": 1.1012225151062012,
4
- "eval_runtime": 102.3632,
5
- "eval_samples": 3636,
6
- "eval_samples_per_second": 35.521,
7
- "eval_steps_per_second": 8.88,
8
- "train_loss": 0.9537819692403964,
9
- "train_runtime": 26215.2524,
10
- "train_samples": 51704,
11
- "train_samples_per_second": 9.861,
12
- "train_steps_per_second": 0.308
13
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.016336046159267426,
4
+ "eval_runtime": 0.2314,
5
+ "eval_samples": 8,
6
+ "eval_samples_per_second": 34.579,
7
+ "eval_steps_per_second": 4.322,
8
+ "train_loss": 1.40092041015625,
9
+ "train_runtime": 1064.353,
10
+ "train_samples": 8,
11
+ "train_samples_per_second": 0.752,
12
+ "train_steps_per_second": 0.094
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_loss": 1.1012225151062012,
4
- "eval_runtime": 102.3632,
5
- "eval_samples": 3636,
6
- "eval_samples_per_second": 35.521,
7
- "eval_steps_per_second": 8.88
8
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.016336046159267426,
4
+ "eval_runtime": 0.2314,
5
+ "eval_samples": 8,
6
+ "eval_samples_per_second": 34.579,
7
+ "eval_steps_per_second": 4.322
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "train_loss": 0.9537819692403964,
4
- "train_runtime": 26215.2524,
5
- "train_samples": 51704,
6
- "train_samples_per_second": 9.861,
7
- "train_steps_per_second": 0.308
8
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "train_loss": 1.40092041015625,
4
+ "train_runtime": 1064.353,
5
+ "train_samples": 8,
6
+ "train_samples_per_second": 0.752,
7
+ "train_steps_per_second": 0.094
8
  }
trainer_state.json CHANGED
@@ -1,548 +1,834 @@
1
  {
2
- "best_metric": 1.1012225151062012,
3
- "best_model_checkpoint": "/temp/t5_base-qg-ap-test/checkpoint-6464",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 8080,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.06,
13
- "learning_rate": 9.876237623762377e-05,
14
- "loss": 1.9841,
15
- "step": 100
 
 
16
  },
17
  {
18
- "epoch": 0.12,
19
- "learning_rate": 9.752475247524753e-05,
20
- "loss": 1.2569,
21
- "step": 200
 
 
22
  },
23
  {
24
- "epoch": 0.19,
25
- "learning_rate": 9.628712871287129e-05,
26
- "loss": 1.2054,
27
- "step": 300
 
 
28
  },
29
  {
30
- "epoch": 0.25,
31
- "learning_rate": 9.504950495049505e-05,
32
- "loss": 1.1704,
33
- "step": 400
 
 
34
  },
35
  {
36
- "epoch": 0.31,
37
- "learning_rate": 9.381188118811881e-05,
38
- "loss": 1.1504,
39
- "step": 500
 
 
40
  },
41
  {
42
- "epoch": 0.37,
43
- "learning_rate": 9.257425742574259e-05,
44
- "loss": 1.14,
45
- "step": 600
 
 
46
  },
47
  {
48
- "epoch": 0.43,
49
- "learning_rate": 9.133663366336635e-05,
50
- "loss": 1.1303,
51
- "step": 700
 
 
52
  },
53
  {
54
- "epoch": 0.5,
55
- "learning_rate": 9.009900990099011e-05,
56
- "loss": 1.1022,
57
- "step": 800
 
 
58
  },
59
  {
60
- "epoch": 0.56,
61
- "learning_rate": 8.886138613861387e-05,
62
- "loss": 1.0985,
63
- "step": 900
 
 
64
  },
65
  {
66
- "epoch": 0.62,
67
- "learning_rate": 8.762376237623763e-05,
68
- "loss": 1.0944,
69
- "step": 1000
 
 
70
  },
71
  {
72
- "epoch": 0.68,
73
- "learning_rate": 8.638613861386139e-05,
74
- "loss": 1.0762,
75
- "step": 1100
 
 
76
  },
77
  {
78
- "epoch": 0.74,
79
- "learning_rate": 8.514851485148515e-05,
80
- "loss": 1.0784,
81
- "step": 1200
 
 
82
  },
83
  {
84
- "epoch": 0.8,
85
- "learning_rate": 8.391089108910891e-05,
86
- "loss": 1.076,
87
- "step": 1300
 
 
88
  },
89
  {
90
- "epoch": 0.87,
91
- "learning_rate": 8.267326732673268e-05,
92
- "loss": 1.0672,
93
- "step": 1400
 
 
94
  },
95
  {
96
- "epoch": 0.93,
97
- "learning_rate": 8.143564356435644e-05,
98
- "loss": 1.0673,
99
- "step": 1500
 
 
100
  },
101
  {
102
- "epoch": 0.99,
103
- "learning_rate": 8.019801980198021e-05,
104
- "loss": 1.0561,
105
- "step": 1600
 
 
106
  },
107
  {
108
- "epoch": 1.0,
109
- "eval_loss": 1.1370612382888794,
110
- "eval_runtime": 102.4778,
111
- "eval_samples_per_second": 35.481,
112
- "eval_steps_per_second": 8.87,
113
- "step": 1616
114
  },
115
  {
116
- "epoch": 1.05,
117
- "learning_rate": 7.896039603960397e-05,
118
- "loss": 0.989,
119
- "step": 1700
 
 
120
  },
121
  {
122
- "epoch": 1.11,
123
- "learning_rate": 7.772277227722773e-05,
124
- "loss": 0.9881,
125
- "step": 1800
 
 
126
  },
127
  {
128
- "epoch": 1.18,
129
- "learning_rate": 7.64851485148515e-05,
130
- "loss": 0.987,
131
- "step": 1900
 
 
132
  },
133
  {
134
- "epoch": 1.24,
135
- "learning_rate": 7.524752475247526e-05,
136
- "loss": 0.9849,
137
- "step": 2000
 
 
138
  },
139
  {
140
- "epoch": 1.3,
141
- "learning_rate": 7.400990099009902e-05,
142
- "loss": 0.9807,
143
- "step": 2100
 
 
144
  },
145
  {
146
- "epoch": 1.36,
147
- "learning_rate": 7.277227722772278e-05,
148
- "loss": 0.9784,
149
- "step": 2200
 
 
150
  },
151
  {
152
- "epoch": 1.42,
153
- "learning_rate": 7.153465346534654e-05,
154
- "loss": 0.9868,
155
- "step": 2300
 
 
156
  },
157
  {
158
- "epoch": 1.49,
159
- "learning_rate": 7.02970297029703e-05,
160
- "loss": 0.9792,
161
- "step": 2400
 
 
162
  },
163
  {
164
- "epoch": 1.55,
165
- "learning_rate": 6.905940594059406e-05,
166
- "loss": 0.988,
167
- "step": 2500
 
 
168
  },
169
  {
170
- "epoch": 1.61,
171
- "learning_rate": 6.782178217821783e-05,
172
- "loss": 0.9806,
173
- "step": 2600
 
 
174
  },
175
  {
176
- "epoch": 1.67,
177
- "learning_rate": 6.658415841584159e-05,
178
- "loss": 0.9612,
179
- "step": 2700
 
 
180
  },
181
  {
182
- "epoch": 1.73,
183
- "learning_rate": 6.534653465346535e-05,
184
- "loss": 0.9632,
185
- "step": 2800
 
 
186
  },
187
  {
188
- "epoch": 1.79,
189
- "learning_rate": 6.410891089108911e-05,
190
- "loss": 0.9861,
191
- "step": 2900
 
 
192
  },
193
  {
194
- "epoch": 1.86,
195
- "learning_rate": 6.287128712871287e-05,
196
- "loss": 0.968,
197
- "step": 3000
 
 
198
  },
199
  {
200
- "epoch": 1.92,
201
- "learning_rate": 6.163366336633663e-05,
202
- "loss": 0.9727,
203
- "step": 3100
 
 
204
  },
205
  {
206
- "epoch": 1.98,
207
- "learning_rate": 6.03960396039604e-05,
208
- "loss": 0.9695,
209
- "step": 3200
 
 
210
  },
211
  {
212
- "epoch": 2.0,
213
- "eval_loss": 1.1089671850204468,
214
- "eval_runtime": 102.4537,
215
- "eval_samples_per_second": 35.489,
216
- "eval_steps_per_second": 8.872,
217
- "step": 3232
218
  },
219
  {
220
- "epoch": 2.04,
221
- "learning_rate": 5.915841584158416e-05,
222
- "loss": 0.9333,
223
- "step": 3300
 
 
224
  },
225
  {
226
- "epoch": 2.1,
227
- "learning_rate": 5.792079207920792e-05,
228
- "loss": 0.9114,
229
- "step": 3400
 
 
230
  },
231
  {
232
- "epoch": 2.17,
233
- "learning_rate": 5.668316831683168e-05,
234
- "loss": 0.9173,
235
- "step": 3500
 
 
236
  },
237
  {
238
- "epoch": 2.23,
239
- "learning_rate": 5.544554455445545e-05,
240
- "loss": 0.9131,
241
- "step": 3600
 
 
242
  },
243
  {
244
- "epoch": 2.29,
245
- "learning_rate": 5.420792079207921e-05,
246
- "loss": 0.9064,
247
- "step": 3700
 
 
248
  },
249
  {
250
- "epoch": 2.35,
251
- "learning_rate": 5.2970297029702974e-05,
252
- "loss": 0.9113,
253
- "step": 3800
 
 
254
  },
255
  {
256
- "epoch": 2.41,
257
- "learning_rate": 5.1732673267326735e-05,
258
- "loss": 0.8984,
259
- "step": 3900
 
 
260
  },
261
  {
262
- "epoch": 2.48,
263
- "learning_rate": 5.0495049504950497e-05,
264
- "loss": 0.9149,
265
- "step": 4000
 
 
266
  },
267
  {
268
- "epoch": 2.54,
269
- "learning_rate": 4.925742574257426e-05,
270
- "loss": 0.9041,
271
- "step": 4100
 
 
272
  },
273
  {
274
- "epoch": 2.6,
275
- "learning_rate": 4.801980198019802e-05,
276
- "loss": 0.9137,
277
- "step": 4200
 
 
278
  },
279
  {
280
- "epoch": 2.66,
281
- "learning_rate": 4.678217821782179e-05,
282
- "loss": 0.9117,
283
- "step": 4300
 
 
284
  },
285
  {
286
- "epoch": 2.72,
287
- "learning_rate": 4.554455445544555e-05,
288
- "loss": 0.9024,
289
- "step": 4400
 
 
290
  },
291
  {
292
- "epoch": 2.78,
293
- "learning_rate": 4.430693069306931e-05,
294
- "loss": 0.914,
295
- "step": 4500
 
 
296
  },
297
  {
298
- "epoch": 2.85,
299
- "learning_rate": 4.306930693069307e-05,
300
- "loss": 0.9295,
301
- "step": 4600
 
 
302
  },
303
  {
304
- "epoch": 2.91,
305
- "learning_rate": 4.183168316831683e-05,
306
- "loss": 0.9088,
307
- "step": 4700
 
 
308
  },
309
  {
310
- "epoch": 2.97,
311
- "learning_rate": 4.05940594059406e-05,
312
- "loss": 0.9111,
313
- "step": 4800
 
 
314
  },
315
  {
316
- "epoch": 3.0,
317
- "eval_loss": 1.101236343383789,
318
- "eval_runtime": 102.4816,
319
- "eval_samples_per_second": 35.48,
320
- "eval_steps_per_second": 8.87,
321
- "step": 4848
322
  },
323
  {
324
- "epoch": 3.03,
325
- "learning_rate": 3.935643564356436e-05,
326
- "loss": 0.8905,
327
- "step": 4900
 
 
328
  },
329
  {
330
- "epoch": 3.09,
331
- "learning_rate": 3.811881188118812e-05,
332
- "loss": 0.8628,
333
- "step": 5000
 
 
334
  },
335
  {
336
- "epoch": 3.16,
337
- "learning_rate": 3.6881188118811884e-05,
338
- "loss": 0.8712,
339
- "step": 5100
 
 
340
  },
341
  {
342
- "epoch": 3.22,
343
- "learning_rate": 3.5643564356435645e-05,
344
- "loss": 0.8545,
345
- "step": 5200
 
 
346
  },
347
  {
348
- "epoch": 3.28,
349
- "learning_rate": 3.440594059405941e-05,
350
- "loss": 0.8793,
351
- "step": 5300
 
 
352
  },
353
  {
354
- "epoch": 3.34,
355
- "learning_rate": 3.3168316831683175e-05,
356
- "loss": 0.8677,
357
- "step": 5400
 
 
358
  },
359
  {
360
- "epoch": 3.4,
361
- "learning_rate": 3.1930693069306936e-05,
362
- "loss": 0.8716,
363
- "step": 5500
 
 
364
  },
365
  {
366
- "epoch": 3.47,
367
- "learning_rate": 3.06930693069307e-05,
368
- "loss": 0.8708,
369
- "step": 5600
 
 
370
  },
371
  {
372
- "epoch": 3.53,
373
- "learning_rate": 2.9455445544554455e-05,
374
- "loss": 0.8782,
375
- "step": 5700
 
 
376
  },
377
  {
378
- "epoch": 3.59,
379
- "learning_rate": 2.8217821782178216e-05,
380
- "loss": 0.8651,
381
- "step": 5800
 
 
382
  },
383
  {
384
- "epoch": 3.65,
385
- "learning_rate": 2.6980198019801985e-05,
386
- "loss": 0.8597,
387
- "step": 5900
 
 
388
  },
389
  {
390
- "epoch": 3.71,
391
- "learning_rate": 2.5742574257425746e-05,
392
- "loss": 0.8516,
393
- "step": 6000
 
 
394
  },
395
  {
396
- "epoch": 3.77,
397
- "learning_rate": 2.4504950495049507e-05,
398
- "loss": 0.8759,
399
- "step": 6100
 
 
400
  },
401
  {
402
- "epoch": 3.84,
403
- "learning_rate": 2.326732673267327e-05,
404
- "loss": 0.8631,
405
- "step": 6200
 
 
406
  },
407
  {
408
- "epoch": 3.9,
409
- "learning_rate": 2.202970297029703e-05,
410
- "loss": 0.8636,
411
- "step": 6300
 
 
412
  },
413
  {
414
- "epoch": 3.96,
415
- "learning_rate": 2.079207920792079e-05,
416
- "loss": 0.8691,
417
- "step": 6400
 
 
418
  },
419
  {
420
- "epoch": 4.0,
421
- "eval_loss": 1.1012225151062012,
422
- "eval_runtime": 102.0015,
423
- "eval_samples_per_second": 35.647,
424
- "eval_steps_per_second": 8.912,
425
- "step": 6464
426
  },
427
  {
428
- "epoch": 4.02,
429
- "learning_rate": 1.9554455445544556e-05,
430
- "loss": 0.8573,
431
- "step": 6500
 
 
432
  },
433
  {
434
- "epoch": 4.08,
435
- "learning_rate": 1.8316831683168317e-05,
436
- "loss": 0.8536,
437
- "step": 6600
 
 
438
  },
439
  {
440
- "epoch": 4.15,
441
- "learning_rate": 1.707920792079208e-05,
442
- "loss": 0.8329,
443
- "step": 6700
 
 
444
  },
445
  {
446
- "epoch": 4.21,
447
- "learning_rate": 1.5841584158415843e-05,
448
- "loss": 0.8407,
449
- "step": 6800
 
 
450
  },
451
  {
452
- "epoch": 4.27,
453
- "learning_rate": 1.4603960396039604e-05,
454
- "loss": 0.8525,
455
- "step": 6900
 
 
456
  },
457
  {
458
- "epoch": 4.33,
459
- "learning_rate": 1.3366336633663367e-05,
460
- "loss": 0.8473,
461
- "step": 7000
 
 
462
  },
463
  {
464
- "epoch": 4.39,
465
- "learning_rate": 1.2128712871287128e-05,
466
- "loss": 0.825,
467
- "step": 7100
 
 
468
  },
469
  {
470
- "epoch": 4.46,
471
- "learning_rate": 1.0891089108910891e-05,
472
- "loss": 0.834,
473
- "step": 7200
 
 
474
  },
475
  {
476
- "epoch": 4.52,
477
- "learning_rate": 9.653465346534654e-06,
478
- "loss": 0.8397,
479
- "step": 7300
 
 
480
  },
481
  {
482
- "epoch": 4.58,
483
- "learning_rate": 8.415841584158417e-06,
484
- "loss": 0.8436,
485
- "step": 7400
 
 
486
  },
487
  {
488
- "epoch": 4.64,
489
- "learning_rate": 7.178217821782178e-06,
490
- "loss": 0.8353,
491
- "step": 7500
 
 
492
  },
493
  {
494
- "epoch": 4.7,
495
- "learning_rate": 5.940594059405941e-06,
496
- "loss": 0.8332,
497
- "step": 7600
 
 
498
  },
499
  {
500
- "epoch": 4.76,
501
- "learning_rate": 4.702970297029704e-06,
502
- "loss": 0.8402,
503
- "step": 7700
 
 
504
  },
505
  {
506
- "epoch": 4.83,
507
- "learning_rate": 3.4653465346534657e-06,
508
- "loss": 0.8526,
509
- "step": 7800
 
 
510
  },
511
  {
512
- "epoch": 4.89,
513
- "learning_rate": 2.227722772277228e-06,
514
- "loss": 0.827,
515
- "step": 7900
 
 
516
  },
517
  {
518
- "epoch": 4.95,
519
- "learning_rate": 9.900990099009902e-07,
520
- "loss": 0.8543,
521
- "step": 8000
 
 
522
  },
523
  {
524
- "epoch": 5.0,
525
- "eval_loss": 1.103607177734375,
526
- "eval_runtime": 102.3147,
527
- "eval_samples_per_second": 35.537,
528
- "eval_steps_per_second": 8.884,
529
- "step": 8080
530
  },
531
  {
532
- "epoch": 5.0,
533
- "step": 8080,
534
- "total_flos": 1.574277938675712e+17,
535
- "train_loss": 0.9537819692403964,
536
- "train_runtime": 26215.2524,
537
- "train_samples_per_second": 9.861,
538
- "train_steps_per_second": 0.308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  }
540
  ],
541
  "logging_steps": 100,
542
- "max_steps": 8080,
543
- "num_train_epochs": 5,
544
  "save_steps": 500,
545
- "total_flos": 1.574277938675712e+17,
546
  "trial_name": null,
547
  "trial_params": null
548
  }
 
1
  {
2
+ "best_metric": 0.016336046159267426,
3
+ "best_model_checkpoint": "/temp/t5_base-qg-ap-test/checkpoint-100",
4
+ "epoch": 100.0,
5
  "eval_steps": 500,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "eval_loss": 12.805366516113281,
14
+ "eval_runtime": 0.2208,
15
+ "eval_samples_per_second": 36.238,
16
+ "eval_steps_per_second": 4.53,
17
+ "step": 1
18
  },
19
  {
20
+ "epoch": 2.0,
21
+ "eval_loss": 10.788012504577637,
22
+ "eval_runtime": 0.2356,
23
+ "eval_samples_per_second": 33.954,
24
+ "eval_steps_per_second": 4.244,
25
+ "step": 2
26
  },
27
  {
28
+ "epoch": 3.0,
29
+ "eval_loss": 8.873129844665527,
30
+ "eval_runtime": 0.2362,
31
+ "eval_samples_per_second": 33.872,
32
+ "eval_steps_per_second": 4.234,
33
+ "step": 3
34
  },
35
  {
36
+ "epoch": 4.0,
37
+ "eval_loss": 7.4068284034729,
38
+ "eval_runtime": 0.2351,
39
+ "eval_samples_per_second": 34.03,
40
+ "eval_steps_per_second": 4.254,
41
+ "step": 4
42
  },
43
  {
44
+ "epoch": 5.0,
45
+ "eval_loss": 6.458061218261719,
46
+ "eval_runtime": 0.2334,
47
+ "eval_samples_per_second": 34.275,
48
+ "eval_steps_per_second": 4.284,
49
+ "step": 5
50
  },
51
  {
52
+ "epoch": 6.0,
53
+ "eval_loss": 5.647502422332764,
54
+ "eval_runtime": 0.2335,
55
+ "eval_samples_per_second": 34.259,
56
+ "eval_steps_per_second": 4.282,
57
+ "step": 6
58
  },
59
  {
60
+ "epoch": 7.0,
61
+ "eval_loss": 4.959558486938477,
62
+ "eval_runtime": 0.2256,
63
+ "eval_samples_per_second": 35.464,
64
+ "eval_steps_per_second": 4.433,
65
+ "step": 7
66
  },
67
  {
68
+ "epoch": 8.0,
69
+ "eval_loss": 4.50578498840332,
70
+ "eval_runtime": 0.2359,
71
+ "eval_samples_per_second": 33.916,
72
+ "eval_steps_per_second": 4.239,
73
+ "step": 8
74
  },
75
  {
76
+ "epoch": 9.0,
77
+ "eval_loss": 4.076832294464111,
78
+ "eval_runtime": 0.2354,
79
+ "eval_samples_per_second": 33.984,
80
+ "eval_steps_per_second": 4.248,
81
+ "step": 9
82
  },
83
  {
84
+ "epoch": 10.0,
85
+ "eval_loss": 3.704714059829712,
86
+ "eval_runtime": 0.2353,
87
+ "eval_samples_per_second": 33.999,
88
+ "eval_steps_per_second": 4.25,
89
+ "step": 10
90
  },
91
  {
92
+ "epoch": 11.0,
93
+ "eval_loss": 3.4143009185791016,
94
+ "eval_runtime": 0.2359,
95
+ "eval_samples_per_second": 33.914,
96
+ "eval_steps_per_second": 4.239,
97
+ "step": 11
98
  },
99
  {
100
+ "epoch": 12.0,
101
+ "eval_loss": 3.1360208988189697,
102
+ "eval_runtime": 0.2359,
103
+ "eval_samples_per_second": 33.913,
104
+ "eval_steps_per_second": 4.239,
105
+ "step": 12
106
  },
107
  {
108
+ "epoch": 13.0,
109
+ "eval_loss": 2.8865740299224854,
110
+ "eval_runtime": 0.2356,
111
+ "eval_samples_per_second": 33.955,
112
+ "eval_steps_per_second": 4.244,
113
+ "step": 13
114
  },
115
  {
116
+ "epoch": 14.0,
117
+ "eval_loss": 2.6324548721313477,
118
+ "eval_runtime": 0.234,
119
+ "eval_samples_per_second": 34.185,
120
+ "eval_steps_per_second": 4.273,
121
+ "step": 14
122
  },
123
  {
124
+ "epoch": 15.0,
125
+ "eval_loss": 2.388902187347412,
126
+ "eval_runtime": 0.2365,
127
+ "eval_samples_per_second": 33.826,
128
+ "eval_steps_per_second": 4.228,
129
+ "step": 15
130
  },
131
  {
132
+ "epoch": 16.0,
133
+ "eval_loss": 2.1914408206939697,
134
+ "eval_runtime": 0.2324,
135
+ "eval_samples_per_second": 34.427,
136
+ "eval_steps_per_second": 4.303,
137
+ "step": 16
138
  },
139
  {
140
+ "epoch": 17.0,
141
+ "eval_loss": 2.0423526763916016,
142
+ "eval_runtime": 0.2363,
143
+ "eval_samples_per_second": 33.85,
144
+ "eval_steps_per_second": 4.231,
145
+ "step": 17
146
  },
147
  {
148
+ "epoch": 18.0,
149
+ "eval_loss": 1.9110742807388306,
150
+ "eval_runtime": 0.2247,
151
+ "eval_samples_per_second": 35.611,
152
+ "eval_steps_per_second": 4.451,
153
+ "step": 18
154
  },
155
  {
156
+ "epoch": 19.0,
157
+ "eval_loss": 1.7762634754180908,
158
+ "eval_runtime": 0.2365,
159
+ "eval_samples_per_second": 33.825,
160
+ "eval_steps_per_second": 4.228,
161
+ "step": 19
162
  },
163
  {
164
+ "epoch": 20.0,
165
+ "eval_loss": 1.6505231857299805,
166
+ "eval_runtime": 0.2253,
167
+ "eval_samples_per_second": 35.504,
168
+ "eval_steps_per_second": 4.438,
169
+ "step": 20
170
  },
171
  {
172
+ "epoch": 21.0,
173
+ "eval_loss": 1.525721788406372,
174
+ "eval_runtime": 0.2362,
175
+ "eval_samples_per_second": 33.872,
176
+ "eval_steps_per_second": 4.234,
177
+ "step": 21
178
  },
179
  {
180
+ "epoch": 22.0,
181
+ "eval_loss": 1.4125868082046509,
182
+ "eval_runtime": 0.235,
183
+ "eval_samples_per_second": 34.046,
184
+ "eval_steps_per_second": 4.256,
185
+ "step": 22
186
  },
187
  {
188
+ "epoch": 23.0,
189
+ "eval_loss": 1.3109110593795776,
190
+ "eval_runtime": 0.2369,
191
+ "eval_samples_per_second": 33.769,
192
+ "eval_steps_per_second": 4.221,
193
+ "step": 23
194
  },
195
  {
196
+ "epoch": 24.0,
197
+ "eval_loss": 1.2188917398452759,
198
+ "eval_runtime": 0.2319,
199
+ "eval_samples_per_second": 34.5,
200
+ "eval_steps_per_second": 4.312,
201
+ "step": 24
202
  },
203
  {
204
+ "epoch": 25.0,
205
+ "eval_loss": 1.1338324546813965,
206
+ "eval_runtime": 0.2362,
207
+ "eval_samples_per_second": 33.873,
208
+ "eval_steps_per_second": 4.234,
209
+ "step": 25
210
  },
211
  {
212
+ "epoch": 26.0,
213
+ "eval_loss": 1.0485577583312988,
214
+ "eval_runtime": 0.2311,
215
+ "eval_samples_per_second": 34.62,
216
+ "eval_steps_per_second": 4.328,
217
+ "step": 26
218
  },
219
  {
220
+ "epoch": 27.0,
221
+ "eval_loss": 0.9640414118766785,
222
+ "eval_runtime": 0.2359,
223
+ "eval_samples_per_second": 33.914,
224
+ "eval_steps_per_second": 4.239,
225
+ "step": 27
226
  },
227
  {
228
+ "epoch": 28.0,
229
+ "eval_loss": 0.8827559947967529,
230
+ "eval_runtime": 0.2268,
231
+ "eval_samples_per_second": 35.271,
232
+ "eval_steps_per_second": 4.409,
233
+ "step": 28
234
  },
235
  {
236
+ "epoch": 29.0,
237
+ "eval_loss": 0.8060356378555298,
238
+ "eval_runtime": 0.2364,
239
+ "eval_samples_per_second": 33.848,
240
+ "eval_steps_per_second": 4.231,
241
+ "step": 29
242
  },
243
  {
244
+ "epoch": 30.0,
245
+ "eval_loss": 0.7329221367835999,
246
+ "eval_runtime": 0.2336,
247
+ "eval_samples_per_second": 34.244,
248
+ "eval_steps_per_second": 4.281,
249
+ "step": 30
250
  },
251
  {
252
+ "epoch": 31.0,
253
+ "eval_loss": 0.6638815402984619,
254
+ "eval_runtime": 0.2359,
255
+ "eval_samples_per_second": 33.912,
256
+ "eval_steps_per_second": 4.239,
257
+ "step": 31
258
  },
259
  {
260
+ "epoch": 32.0,
261
+ "eval_loss": 0.6010197997093201,
262
+ "eval_runtime": 0.235,
263
+ "eval_samples_per_second": 34.042,
264
+ "eval_steps_per_second": 4.255,
265
+ "step": 32
266
  },
267
  {
268
+ "epoch": 33.0,
269
+ "eval_loss": 0.5438850522041321,
270
+ "eval_runtime": 0.2331,
271
+ "eval_samples_per_second": 34.324,
272
+ "eval_steps_per_second": 4.29,
273
+ "step": 33
274
  },
275
  {
276
+ "epoch": 34.0,
277
+ "eval_loss": 0.4924549162387848,
278
+ "eval_runtime": 0.2353,
279
+ "eval_samples_per_second": 34.004,
280
+ "eval_steps_per_second": 4.25,
281
+ "step": 34
282
  },
283
  {
284
+ "epoch": 35.0,
285
+ "eval_loss": 0.4471151828765869,
286
+ "eval_runtime": 0.2413,
287
+ "eval_samples_per_second": 33.158,
288
+ "eval_steps_per_second": 4.145,
289
+ "step": 35
290
  },
291
  {
292
+ "epoch": 36.0,
293
+ "eval_loss": 0.4065961539745331,
294
+ "eval_runtime": 0.2355,
295
+ "eval_samples_per_second": 33.966,
296
+ "eval_steps_per_second": 4.246,
297
+ "step": 36
298
  },
299
  {
300
+ "epoch": 37.0,
301
+ "eval_loss": 0.3690074682235718,
302
+ "eval_runtime": 0.2363,
303
+ "eval_samples_per_second": 33.853,
304
+ "eval_steps_per_second": 4.232,
305
+ "step": 37
306
  },
307
  {
308
+ "epoch": 38.0,
309
+ "eval_loss": 0.3340989649295807,
310
+ "eval_runtime": 0.2363,
311
+ "eval_samples_per_second": 33.86,
312
+ "eval_steps_per_second": 4.232,
313
+ "step": 38
314
  },
315
  {
316
+ "epoch": 39.0,
317
+ "eval_loss": 0.3023061454296112,
318
+ "eval_runtime": 0.2338,
319
+ "eval_samples_per_second": 34.218,
320
+ "eval_steps_per_second": 4.277,
321
+ "step": 39
322
  },
323
  {
324
+ "epoch": 40.0,
325
+ "eval_loss": 0.27456292510032654,
326
+ "eval_runtime": 0.2359,
327
+ "eval_samples_per_second": 33.907,
328
+ "eval_steps_per_second": 4.238,
329
+ "step": 40
330
  },
331
  {
332
+ "epoch": 41.0,
333
+ "eval_loss": 0.24695347249507904,
334
+ "eval_runtime": 0.2296,
335
+ "eval_samples_per_second": 34.843,
336
+ "eval_steps_per_second": 4.355,
337
+ "step": 41
338
  },
339
  {
340
+ "epoch": 42.0,
341
+ "eval_loss": 0.220541313290596,
342
+ "eval_runtime": 0.2354,
343
+ "eval_samples_per_second": 33.99,
344
+ "eval_steps_per_second": 4.249,
345
+ "step": 42
346
  },
347
  {
348
+ "epoch": 43.0,
349
+ "eval_loss": 0.19677509367465973,
350
+ "eval_runtime": 0.2327,
351
+ "eval_samples_per_second": 34.379,
352
+ "eval_steps_per_second": 4.297,
353
+ "step": 43
354
  },
355
  {
356
+ "epoch": 44.0,
357
+ "eval_loss": 0.17713746428489685,
358
+ "eval_runtime": 0.2348,
359
+ "eval_samples_per_second": 34.066,
360
+ "eval_steps_per_second": 4.258,
361
+ "step": 44
362
  },
363
  {
364
+ "epoch": 45.0,
365
+ "eval_loss": 0.15933585166931152,
366
+ "eval_runtime": 0.2234,
367
+ "eval_samples_per_second": 35.814,
368
+ "eval_steps_per_second": 4.477,
369
+ "step": 45
370
  },
371
  {
372
+ "epoch": 46.0,
373
+ "eval_loss": 0.14242056012153625,
374
+ "eval_runtime": 0.2366,
375
+ "eval_samples_per_second": 33.807,
376
+ "eval_steps_per_second": 4.226,
377
+ "step": 46
378
  },
379
  {
380
+ "epoch": 47.0,
381
+ "eval_loss": 0.1287701427936554,
382
+ "eval_runtime": 0.2313,
383
+ "eval_samples_per_second": 34.594,
384
+ "eval_steps_per_second": 4.324,
385
+ "step": 47
386
  },
387
  {
388
+ "epoch": 48.0,
389
+ "eval_loss": 0.11695855855941772,
390
+ "eval_runtime": 0.2269,
391
+ "eval_samples_per_second": 35.251,
392
+ "eval_steps_per_second": 4.406,
393
+ "step": 48
394
  },
395
  {
396
+ "epoch": 49.0,
397
+ "eval_loss": 0.1070137694478035,
398
+ "eval_runtime": 0.2278,
399
+ "eval_samples_per_second": 35.111,
400
+ "eval_steps_per_second": 4.389,
401
+ "step": 49
402
  },
403
  {
404
+ "epoch": 50.0,
405
+ "eval_loss": 0.09962165355682373,
406
+ "eval_runtime": 0.2347,
407
+ "eval_samples_per_second": 34.085,
408
+ "eval_steps_per_second": 4.261,
409
+ "step": 50
410
  },
411
  {
412
+ "epoch": 51.0,
413
+ "eval_loss": 0.09394610673189163,
414
+ "eval_runtime": 0.2348,
415
+ "eval_samples_per_second": 34.077,
416
+ "eval_steps_per_second": 4.26,
417
+ "step": 51
418
  },
419
  {
420
+ "epoch": 52.0,
421
+ "eval_loss": 0.08877500891685486,
422
+ "eval_runtime": 0.2326,
423
+ "eval_samples_per_second": 34.394,
424
+ "eval_steps_per_second": 4.299,
425
+ "step": 52
426
  },
427
  {
428
+ "epoch": 53.0,
429
+ "eval_loss": 0.08450286090373993,
430
+ "eval_runtime": 0.2348,
431
+ "eval_samples_per_second": 34.07,
432
+ "eval_steps_per_second": 4.259,
433
+ "step": 53
434
  },
435
  {
436
+ "epoch": 54.0,
437
+ "eval_loss": 0.0817728266119957,
438
+ "eval_runtime": 0.2313,
439
+ "eval_samples_per_second": 34.588,
440
+ "eval_steps_per_second": 4.324,
441
+ "step": 54
442
  },
443
  {
444
+ "epoch": 55.0,
445
+ "eval_loss": 0.07895343005657196,
446
+ "eval_runtime": 0.2359,
447
+ "eval_samples_per_second": 33.915,
448
+ "eval_steps_per_second": 4.239,
449
+ "step": 55
450
  },
451
  {
452
+ "epoch": 56.0,
453
+ "eval_loss": 0.07630708068609238,
454
+ "eval_runtime": 0.2283,
455
+ "eval_samples_per_second": 35.038,
456
+ "eval_steps_per_second": 4.38,
457
+ "step": 56
458
  },
459
  {
460
+ "epoch": 57.0,
461
+ "eval_loss": 0.0731731578707695,
462
+ "eval_runtime": 0.2364,
463
+ "eval_samples_per_second": 33.835,
464
+ "eval_steps_per_second": 4.229,
465
+ "step": 57
466
  },
467
  {
468
+ "epoch": 58.0,
469
+ "eval_loss": 0.06972303986549377,
470
+ "eval_runtime": 0.2275,
471
+ "eval_samples_per_second": 35.171,
472
+ "eval_steps_per_second": 4.396,
473
+ "step": 58
474
  },
475
  {
476
+ "epoch": 59.0,
477
+ "eval_loss": 0.06655264645814896,
478
+ "eval_runtime": 0.2357,
479
+ "eval_samples_per_second": 33.941,
480
+ "eval_steps_per_second": 4.243,
481
+ "step": 59
482
  },
483
  {
484
+ "epoch": 60.0,
485
+ "eval_loss": 0.06421676278114319,
486
+ "eval_runtime": 0.2353,
487
+ "eval_samples_per_second": 34.001,
488
+ "eval_steps_per_second": 4.25,
489
+ "step": 60
490
  },
491
  {
492
+ "epoch": 61.0,
493
+ "eval_loss": 0.06110429763793945,
494
+ "eval_runtime": 0.2361,
495
+ "eval_samples_per_second": 33.886,
496
+ "eval_steps_per_second": 4.236,
497
+ "step": 61
498
  },
499
  {
500
+ "epoch": 62.0,
501
+ "eval_loss": 0.05834279954433441,
502
+ "eval_runtime": 0.2379,
503
+ "eval_samples_per_second": 33.624,
504
+ "eval_steps_per_second": 4.203,
505
+ "step": 62
506
  },
507
  {
508
+ "epoch": 63.0,
509
+ "eval_loss": 0.055961962789297104,
510
+ "eval_runtime": 0.2264,
511
+ "eval_samples_per_second": 35.335,
512
+ "eval_steps_per_second": 4.417,
513
+ "step": 63
514
  },
515
  {
516
+ "epoch": 64.0,
517
+ "eval_loss": 0.05323232710361481,
518
+ "eval_runtime": 0.2359,
519
+ "eval_samples_per_second": 33.916,
520
+ "eval_steps_per_second": 4.24,
521
+ "step": 64
522
  },
523
  {
524
+ "epoch": 65.0,
525
+ "eval_loss": 0.051185671240091324,
526
+ "eval_runtime": 0.2338,
527
+ "eval_samples_per_second": 34.213,
528
+ "eval_steps_per_second": 4.277,
529
+ "step": 65
530
  },
531
  {
532
+ "epoch": 66.0,
533
+ "eval_loss": 0.04865783825516701,
534
+ "eval_runtime": 0.2358,
535
+ "eval_samples_per_second": 33.928,
536
+ "eval_steps_per_second": 4.241,
537
+ "step": 66
538
  },
539
  {
540
+ "epoch": 67.0,
541
+ "eval_loss": 0.04639100283384323,
542
+ "eval_runtime": 0.2281,
543
+ "eval_samples_per_second": 35.066,
544
+ "eval_steps_per_second": 4.383,
545
+ "step": 67
546
  },
547
  {
548
+ "epoch": 68.0,
549
+ "eval_loss": 0.04309353977441788,
550
+ "eval_runtime": 0.2365,
551
+ "eval_samples_per_second": 33.827,
552
+ "eval_steps_per_second": 4.228,
553
+ "step": 68
554
  },
555
  {
556
+ "epoch": 69.0,
557
+ "eval_loss": 0.03992551565170288,
558
+ "eval_runtime": 0.2324,
559
+ "eval_samples_per_second": 34.422,
560
+ "eval_steps_per_second": 4.303,
561
+ "step": 69
562
  },
563
  {
564
+ "epoch": 70.0,
565
+ "eval_loss": 0.03812782093882561,
566
+ "eval_runtime": 0.2236,
567
+ "eval_samples_per_second": 35.783,
568
+ "eval_steps_per_second": 4.473,
569
+ "step": 70
570
  },
571
  {
572
+ "epoch": 71.0,
573
+ "eval_loss": 0.03636465594172478,
574
+ "eval_runtime": 0.2325,
575
+ "eval_samples_per_second": 34.401,
576
+ "eval_steps_per_second": 4.3,
577
+ "step": 71
578
  },
579
  {
580
+ "epoch": 72.0,
581
+ "eval_loss": 0.034834641963243484,
582
+ "eval_runtime": 0.2358,
583
+ "eval_samples_per_second": 33.926,
584
+ "eval_steps_per_second": 4.241,
585
+ "step": 72
586
  },
587
  {
588
+ "epoch": 73.0,
589
+ "eval_loss": 0.03329307958483696,
590
+ "eval_runtime": 0.2345,
591
+ "eval_samples_per_second": 34.111,
592
+ "eval_steps_per_second": 4.264,
593
+ "step": 73
594
  },
595
  {
596
+ "epoch": 74.0,
597
+ "eval_loss": 0.031552691012620926,
598
+ "eval_runtime": 0.2364,
599
+ "eval_samples_per_second": 33.845,
600
+ "eval_steps_per_second": 4.231,
601
+ "step": 74
602
  },
603
  {
604
+ "epoch": 75.0,
605
+ "eval_loss": 0.029882650822401047,
606
+ "eval_runtime": 0.2329,
607
+ "eval_samples_per_second": 34.351,
608
+ "eval_steps_per_second": 4.294,
609
+ "step": 75
610
  },
611
  {
612
+ "epoch": 76.0,
613
+ "eval_loss": 0.028516214340925217,
614
+ "eval_runtime": 0.2359,
615
+ "eval_samples_per_second": 33.907,
616
+ "eval_steps_per_second": 4.238,
617
+ "step": 76
618
  },
619
  {
620
+ "epoch": 77.0,
621
+ "eval_loss": 0.027370158582925797,
622
+ "eval_runtime": 0.2246,
623
+ "eval_samples_per_second": 35.624,
624
+ "eval_steps_per_second": 4.453,
625
+ "step": 77
626
  },
627
  {
628
+ "epoch": 78.0,
629
+ "eval_loss": 0.026426443830132484,
630
+ "eval_runtime": 0.2254,
631
+ "eval_samples_per_second": 35.492,
632
+ "eval_steps_per_second": 4.436,
633
+ "step": 78
634
  },
635
  {
636
+ "epoch": 79.0,
637
+ "eval_loss": 0.02534804865717888,
638
+ "eval_runtime": 0.2295,
639
+ "eval_samples_per_second": 34.856,
640
+ "eval_steps_per_second": 4.357,
641
+ "step": 79
642
  },
643
  {
644
+ "epoch": 80.0,
645
+ "eval_loss": 0.024182336404919624,
646
+ "eval_runtime": 0.2363,
647
+ "eval_samples_per_second": 33.856,
648
+ "eval_steps_per_second": 4.232,
649
+ "step": 80
650
  },
651
  {
652
+ "epoch": 81.0,
653
+ "eval_loss": 0.023593546822667122,
654
+ "eval_runtime": 0.2361,
655
+ "eval_samples_per_second": 33.881,
656
+ "eval_steps_per_second": 4.235,
657
+ "step": 81
658
  },
659
  {
660
+ "epoch": 82.0,
661
+ "eval_loss": 0.023052040487527847,
662
+ "eval_runtime": 0.2361,
663
+ "eval_samples_per_second": 33.879,
664
+ "eval_steps_per_second": 4.235,
665
+ "step": 82
666
  },
667
  {
668
+ "epoch": 83.0,
669
+ "eval_loss": 0.02290300466120243,
670
+ "eval_runtime": 0.2361,
671
+ "eval_samples_per_second": 33.884,
672
+ "eval_steps_per_second": 4.236,
673
+ "step": 83
674
  },
675
  {
676
+ "epoch": 84.0,
677
+ "eval_loss": 0.02261677198112011,
678
+ "eval_runtime": 0.2286,
679
+ "eval_samples_per_second": 35.002,
680
+ "eval_steps_per_second": 4.375,
681
+ "step": 84
682
  },
683
  {
684
+ "epoch": 85.0,
685
+ "eval_loss": 0.022289568558335304,
686
+ "eval_runtime": 0.2353,
687
+ "eval_samples_per_second": 34.003,
688
+ "eval_steps_per_second": 4.25,
689
+ "step": 85
690
  },
691
  {
692
+ "epoch": 86.0,
693
+ "eval_loss": 0.02184910513460636,
694
+ "eval_runtime": 0.2287,
695
+ "eval_samples_per_second": 34.986,
696
+ "eval_steps_per_second": 4.373,
697
+ "step": 86
698
+ },
699
+ {
700
+ "epoch": 87.0,
701
+ "eval_loss": 0.021228935569524765,
702
+ "eval_runtime": 0.2349,
703
+ "eval_samples_per_second": 34.051,
704
+ "eval_steps_per_second": 4.256,
705
+ "step": 87
706
+ },
707
+ {
708
+ "epoch": 88.0,
709
+ "eval_loss": 0.020517783239483833,
710
+ "eval_runtime": 0.2353,
711
+ "eval_samples_per_second": 34.002,
712
+ "eval_steps_per_second": 4.25,
713
+ "step": 88
714
+ },
715
+ {
716
+ "epoch": 89.0,
717
+ "eval_loss": 0.019832810387015343,
718
+ "eval_runtime": 0.2232,
719
+ "eval_samples_per_second": 35.839,
720
+ "eval_steps_per_second": 4.48,
721
+ "step": 89
722
+ },
723
+ {
724
+ "epoch": 90.0,
725
+ "eval_loss": 0.0191506277769804,
726
+ "eval_runtime": 0.2312,
727
+ "eval_samples_per_second": 34.605,
728
+ "eval_steps_per_second": 4.326,
729
+ "step": 90
730
+ },
731
+ {
732
+ "epoch": 91.0,
733
+ "eval_loss": 0.018617864698171616,
734
+ "eval_runtime": 0.2243,
735
+ "eval_samples_per_second": 35.667,
736
+ "eval_steps_per_second": 4.458,
737
+ "step": 91
738
+ },
739
+ {
740
+ "epoch": 92.0,
741
+ "eval_loss": 0.01811818592250347,
742
+ "eval_runtime": 0.2348,
743
+ "eval_samples_per_second": 34.07,
744
+ "eval_steps_per_second": 4.259,
745
+ "step": 92
746
+ },
747
+ {
748
+ "epoch": 93.0,
749
+ "eval_loss": 0.01765601523220539,
750
+ "eval_runtime": 0.2363,
751
+ "eval_samples_per_second": 33.853,
752
+ "eval_steps_per_second": 4.232,
753
+ "step": 93
754
+ },
755
+ {
756
+ "epoch": 94.0,
757
+ "eval_loss": 0.017278417944908142,
758
+ "eval_runtime": 0.2264,
759
+ "eval_samples_per_second": 35.331,
760
+ "eval_steps_per_second": 4.416,
761
+ "step": 94
762
+ },
763
+ {
764
+ "epoch": 95.0,
765
+ "eval_loss": 0.016984442248940468,
766
+ "eval_runtime": 0.2349,
767
+ "eval_samples_per_second": 34.053,
768
+ "eval_steps_per_second": 4.257,
769
+ "step": 95
770
+ },
771
+ {
772
+ "epoch": 96.0,
773
+ "eval_loss": 0.01675889454782009,
774
+ "eval_runtime": 0.2343,
775
+ "eval_samples_per_second": 34.141,
776
+ "eval_steps_per_second": 4.268,
777
+ "step": 96
778
+ },
779
+ {
780
+ "epoch": 97.0,
781
+ "eval_loss": 0.016593070700764656,
782
+ "eval_runtime": 0.2359,
783
+ "eval_samples_per_second": 33.915,
784
+ "eval_steps_per_second": 4.239,
785
+ "step": 97
786
+ },
787
+ {
788
+ "epoch": 98.0,
789
+ "eval_loss": 0.016466278582811356,
790
+ "eval_runtime": 0.2296,
791
+ "eval_samples_per_second": 34.851,
792
+ "eval_steps_per_second": 4.356,
793
+ "step": 98
794
+ },
795
+ {
796
+ "epoch": 99.0,
797
+ "eval_loss": 0.016385838389396667,
798
+ "eval_runtime": 0.2364,
799
+ "eval_samples_per_second": 33.84,
800
+ "eval_steps_per_second": 4.23,
801
+ "step": 99
802
+ },
803
+ {
804
+ "epoch": 100.0,
805
+ "learning_rate": 0.0,
806
+ "loss": 1.4009,
807
+ "step": 100
808
+ },
809
+ {
810
+ "epoch": 100.0,
811
+ "eval_loss": 0.016336046159267426,
812
+ "eval_runtime": 0.2203,
813
+ "eval_samples_per_second": 36.315,
814
+ "eval_steps_per_second": 4.539,
815
+ "step": 100
816
+ },
817
+ {
818
+ "epoch": 100.0,
819
+ "step": 100,
820
+ "total_flos": 487166312448000.0,
821
+ "train_loss": 1.40092041015625,
822
+ "train_runtime": 1064.353,
823
+ "train_samples_per_second": 0.752,
824
+ "train_steps_per_second": 0.094
825
  }
826
  ],
827
  "logging_steps": 100,
828
+ "max_steps": 100,
829
+ "num_train_epochs": 100,
830
  "save_steps": 500,
831
+ "total_flos": 487166312448000.0,
832
  "trial_name": null,
833
  "trial_params": null
834
  }