patrickvonplaten commited on
Commit
93a9573
1 Parent(s): b2ede7e

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 1.742971658706665,
4
- "eval_runtime": 62.6586,
5
  "eval_samples": 1680,
6
- "eval_samples_per_second": 26.812,
7
- "eval_steps_per_second": 26.812,
8
- "eval_wer": 0.808972503617945,
9
- "train_loss": 2.308333969445064,
10
- "train_runtime": 4034.046,
11
  "train_samples": 4620,
12
- "train_samples_per_second": 22.905,
13
- "train_steps_per_second": 0.719
14
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 1.7481727600097656,
4
+ "eval_runtime": 63.6288,
5
  "eval_samples": 1680,
6
+ "eval_samples_per_second": 26.403,
7
+ "eval_steps_per_second": 26.403,
8
+ "eval_wer": 0.7987044311212184,
9
+ "train_loss": 2.305506002491918,
10
+ "train_runtime": 4265.6036,
11
  "train_samples": 4620,
12
+ "train_samples_per_second": 21.662,
13
+ "train_steps_per_second": 0.68
14
  }
emissions.csv CHANGED
@@ -1,2 +1,3 @@
1
  timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
2
  2021-10-28T14:07:16,c1359372-7a2f-4c35-8894-485368db7654,codecarbon,4031.0849454402924,0.009986100533709014,0.04744533240091166,United States,USA,new york,N,,
 
 
1
  timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
2
  2021-10-28T14:07:16,c1359372-7a2f-4c35-8894-485368db7654,codecarbon,4031.0849454402924,0.009986100533709014,0.04744533240091166,United States,USA,new york,N,,
3
+ 2021-10-28T15:24:13,dfd85941-742c-4041-8873-eb0fc7f517aa,codecarbon,4262.816283941269,0.010563398417267292,0.05018815377421022,United States,USA,new york,N,,
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 1.742971658706665,
4
- "eval_runtime": 62.6586,
5
  "eval_samples": 1680,
6
- "eval_samples_per_second": 26.812,
7
- "eval_steps_per_second": 26.812,
8
- "eval_wer": 0.808972503617945
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 1.7481727600097656,
4
+ "eval_runtime": 63.6288,
5
  "eval_samples": 1680,
6
+ "eval_samples_per_second": 26.403,
7
+ "eval_steps_per_second": 26.403,
8
+ "eval_wer": 0.7987044311212184
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27eaa3571c7de058069859c3ca0a47cf9c3332f5e005ace596cd29c0ac5cedde
3
  size 164046635
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d055e70a0bb80565ab20248a8c7638e55789cc3b48c9dbd0dcd6ade7cfc11286
3
  size 164046635
runs/Oct28_14-11-54_brutasse/events.out.tfevents.1635430384.brutasse.22330.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e81d12c8e29e2fab6bc5a1eed454a0235c0254addbbe58a68bca6e90fda5456
3
- size 57216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6005f973af9755164116075861924729cec76e698b42253d172b8b95c933d37f
3
+ size 59458
runs/Oct28_14-11-54_brutasse/events.out.tfevents.1635434718.brutasse.22330.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c505fce74b247973919ab606b9f69d61c41d39206fb6d8fb8944f307ee8415bf
3
+ size 358
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 2.308333969445064,
4
- "train_runtime": 4034.046,
5
  "train_samples": 4620,
6
- "train_samples_per_second": 22.905,
7
- "train_steps_per_second": 0.719
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 2.305506002491918,
4
+ "train_runtime": 4265.6036,
5
  "train_samples": 4620,
6
+ "train_samples_per_second": 21.662,
7
+ "train_steps_per_second": 0.68
8
  }
trainer_state.json CHANGED
@@ -34,25 +34,25 @@
34
  {
35
  "epoch": 0.34,
36
  "learning_rate": 4.800000000000001e-06,
37
- "loss": 7.1211,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.41,
42
  "learning_rate": 5.8e-06,
43
- "loss": 6.1102,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.48,
48
  "learning_rate": 6.800000000000001e-06,
49
- "loss": 5.2533,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.55,
54
  "learning_rate": 7.7e-06,
55
- "loss": 5.3192,
56
  "step": 80
57
  },
58
  {
@@ -69,10 +69,10 @@
69
  },
70
  {
71
  "epoch": 0.69,
72
- "eval_loss": 4.080197811126709,
73
- "eval_runtime": 61.9535,
74
- "eval_samples_per_second": 27.117,
75
- "eval_steps_per_second": 27.117,
76
  "eval_wer": 1.0,
77
  "step": 100
78
  },
@@ -85,7 +85,7 @@
85
  {
86
  "epoch": 0.83,
87
  "learning_rate": 1.1700000000000001e-05,
88
- "loss": 3.7734,
89
  "step": 120
90
  },
91
  {
@@ -133,15 +133,15 @@
133
  {
134
  "epoch": 1.38,
135
  "learning_rate": 1.97e-05,
136
- "loss": 2.9806,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.38,
141
- "eval_loss": 2.979235887527466,
142
- "eval_runtime": 61.729,
143
- "eval_samples_per_second": 27.216,
144
- "eval_steps_per_second": 27.216,
145
  "eval_wer": 1.0,
146
  "step": 200
147
  },
@@ -207,10 +207,10 @@
207
  },
208
  {
209
  "epoch": 2.07,
210
- "eval_loss": 2.9407591819763184,
211
- "eval_runtime": 62.0192,
212
- "eval_samples_per_second": 27.088,
213
- "eval_steps_per_second": 27.088,
214
  "eval_wer": 1.0,
215
  "step": 300
216
  },
@@ -271,15 +271,15 @@
271
  {
272
  "epoch": 2.76,
273
  "learning_rate": 3.97e-05,
274
- "loss": 2.9656,
275
  "step": 400
276
  },
277
  {
278
  "epoch": 2.76,
279
- "eval_loss": 2.914335012435913,
280
- "eval_runtime": 61.8283,
281
- "eval_samples_per_second": 27.172,
282
- "eval_steps_per_second": 27.172,
283
  "eval_wer": 1.0,
284
  "step": 400
285
  },
@@ -322,13 +322,13 @@
322
  {
323
  "epoch": 3.24,
324
  "learning_rate": 4.6700000000000003e-05,
325
- "loss": 2.8962,
326
  "step": 470
327
  },
328
  {
329
  "epoch": 3.31,
330
  "learning_rate": 4.77e-05,
331
- "loss": 2.9122,
332
  "step": 480
333
  },
334
  {
@@ -345,10 +345,10 @@
345
  },
346
  {
347
  "epoch": 3.45,
348
- "eval_loss": 2.8774032592773438,
349
- "eval_runtime": 61.89,
350
- "eval_samples_per_second": 27.145,
351
- "eval_steps_per_second": 27.145,
352
  "eval_wer": 1.0,
353
  "step": 500
354
  },
@@ -361,7 +361,7 @@
361
  {
362
  "epoch": 3.59,
363
  "learning_rate": 5.17e-05,
364
- "loss": 2.8849,
365
  "step": 520
366
  },
367
  {
@@ -373,1024 +373,1024 @@
373
  {
374
  "epoch": 3.72,
375
  "learning_rate": 5.3700000000000004e-05,
376
- "loss": 2.8881,
377
  "step": 540
378
  },
379
  {
380
  "epoch": 3.79,
381
  "learning_rate": 5.470000000000001e-05,
382
- "loss": 2.9057,
383
  "step": 550
384
  },
385
  {
386
  "epoch": 3.86,
387
  "learning_rate": 5.5700000000000005e-05,
388
- "loss": 2.8631,
389
  "step": 560
390
  },
391
  {
392
  "epoch": 3.93,
393
  "learning_rate": 5.6699999999999996e-05,
394
- "loss": 2.844,
395
  "step": 570
396
  },
397
  {
398
  "epoch": 4.0,
399
  "learning_rate": 5.77e-05,
400
- "loss": 2.8526,
401
  "step": 580
402
  },
403
  {
404
  "epoch": 4.07,
405
  "learning_rate": 5.87e-05,
406
- "loss": 2.8439,
407
  "step": 590
408
  },
409
  {
410
  "epoch": 4.14,
411
  "learning_rate": 5.97e-05,
412
- "loss": 2.7714,
413
  "step": 600
414
  },
415
  {
416
  "epoch": 4.14,
417
- "eval_loss": 2.777448892593384,
418
- "eval_runtime": 61.9349,
419
- "eval_samples_per_second": 27.125,
420
- "eval_steps_per_second": 27.125,
421
  "eval_wer": 0.9999310867617669,
422
  "step": 600
423
  },
424
  {
425
  "epoch": 4.21,
426
  "learning_rate": 6.07e-05,
427
- "loss": 2.772,
428
  "step": 610
429
  },
430
  {
431
  "epoch": 4.28,
432
  "learning_rate": 6.170000000000001e-05,
433
- "loss": 2.8175,
434
  "step": 620
435
  },
436
  {
437
  "epoch": 4.34,
438
  "learning_rate": 6.27e-05,
439
- "loss": 2.7637,
440
  "step": 630
441
  },
442
  {
443
  "epoch": 4.41,
444
  "learning_rate": 6.37e-05,
445
- "loss": 2.7476,
446
  "step": 640
447
  },
448
  {
449
  "epoch": 4.48,
450
  "learning_rate": 6.47e-05,
451
- "loss": 2.7451,
452
  "step": 650
453
  },
454
  {
455
  "epoch": 4.55,
456
  "learning_rate": 6.570000000000001e-05,
457
- "loss": 2.7698,
458
  "step": 660
459
  },
460
  {
461
  "epoch": 4.62,
462
  "learning_rate": 6.670000000000001e-05,
463
- "loss": 2.6918,
464
  "step": 670
465
  },
466
  {
467
  "epoch": 4.69,
468
  "learning_rate": 6.77e-05,
469
- "loss": 2.7045,
470
  "step": 680
471
  },
472
  {
473
  "epoch": 4.76,
474
  "learning_rate": 6.87e-05,
475
- "loss": 2.775,
476
  "step": 690
477
  },
478
  {
479
  "epoch": 4.83,
480
  "learning_rate": 6.97e-05,
481
- "loss": 2.6883,
482
  "step": 700
483
  },
484
  {
485
  "epoch": 4.83,
486
- "eval_loss": 2.655168294906616,
487
- "eval_runtime": 61.9601,
488
- "eval_samples_per_second": 27.114,
489
- "eval_steps_per_second": 27.114,
490
- "eval_wer": 0.9853903934945903,
491
  "step": 700
492
  },
493
  {
494
  "epoch": 4.9,
495
  "learning_rate": 7.07e-05,
496
- "loss": 2.6235,
497
  "step": 710
498
  },
499
  {
500
  "epoch": 4.97,
501
  "learning_rate": 7.17e-05,
502
- "loss": 2.6454,
503
  "step": 720
504
  },
505
  {
506
  "epoch": 5.03,
507
  "learning_rate": 7.27e-05,
508
- "loss": 2.7134,
509
  "step": 730
510
  },
511
  {
512
  "epoch": 5.1,
513
  "learning_rate": 7.37e-05,
514
- "loss": 2.6114,
515
  "step": 740
516
  },
517
  {
518
  "epoch": 5.17,
519
  "learning_rate": 7.47e-05,
520
- "loss": 2.5768,
521
  "step": 750
522
  },
523
  {
524
  "epoch": 5.24,
525
  "learning_rate": 7.570000000000001e-05,
526
- "loss": 2.6549,
527
  "step": 760
528
  },
529
  {
530
  "epoch": 5.31,
531
  "learning_rate": 7.670000000000001e-05,
532
- "loss": 2.6445,
533
  "step": 770
534
  },
535
  {
536
  "epoch": 5.38,
537
  "learning_rate": 7.77e-05,
538
- "loss": 2.5427,
539
  "step": 780
540
  },
541
  {
542
  "epoch": 5.45,
543
  "learning_rate": 7.87e-05,
544
- "loss": 2.5433,
545
  "step": 790
546
  },
547
  {
548
  "epoch": 5.52,
549
  "learning_rate": 7.970000000000001e-05,
550
- "loss": 2.6477,
551
  "step": 800
552
  },
553
  {
554
  "epoch": 5.52,
555
- "eval_loss": 2.5424375534057617,
556
- "eval_runtime": 62.1641,
557
- "eval_samples_per_second": 27.025,
558
- "eval_steps_per_second": 27.025,
559
- "eval_wer": 1.0113706843084556,
560
  "step": 800
561
  },
562
  {
563
  "epoch": 5.59,
564
  "learning_rate": 8.070000000000001e-05,
565
- "loss": 2.5609,
566
  "step": 810
567
  },
568
  {
569
  "epoch": 5.66,
570
  "learning_rate": 8.17e-05,
571
- "loss": 2.4538,
572
  "step": 820
573
  },
574
  {
575
  "epoch": 5.72,
576
  "learning_rate": 8.27e-05,
577
- "loss": 2.521,
578
  "step": 830
579
  },
580
  {
581
  "epoch": 5.79,
582
  "learning_rate": 8.37e-05,
583
- "loss": 2.6379,
584
  "step": 840
585
  },
586
  {
587
  "epoch": 5.86,
588
  "learning_rate": 8.47e-05,
589
- "loss": 2.4359,
590
  "step": 850
591
  },
592
  {
593
  "epoch": 5.93,
594
  "learning_rate": 8.57e-05,
595
- "loss": 2.4555,
596
  "step": 860
597
  },
598
  {
599
  "epoch": 6.0,
600
  "learning_rate": 8.67e-05,
601
- "loss": 2.542,
602
  "step": 870
603
  },
604
  {
605
  "epoch": 6.07,
606
  "learning_rate": 8.77e-05,
607
- "loss": 2.5067,
608
  "step": 880
609
  },
610
  {
611
  "epoch": 6.14,
612
  "learning_rate": 8.87e-05,
613
- "loss": 2.3783,
614
  "step": 890
615
  },
616
  {
617
  "epoch": 6.21,
618
  "learning_rate": 8.970000000000001e-05,
619
- "loss": 2.3607,
620
  "step": 900
621
  },
622
  {
623
  "epoch": 6.21,
624
- "eval_loss": 2.4291484355926514,
625
- "eval_runtime": 62.2969,
626
- "eval_samples_per_second": 26.968,
627
- "eval_steps_per_second": 26.968,
628
- "eval_wer": 1.101233546964372,
629
  "step": 900
630
  },
631
  {
632
  "epoch": 6.28,
633
  "learning_rate": 9.070000000000001e-05,
634
- "loss": 2.584,
635
  "step": 910
636
  },
637
  {
638
  "epoch": 6.34,
639
  "learning_rate": 9.17e-05,
640
- "loss": 2.4121,
641
  "step": 920
642
  },
643
  {
644
  "epoch": 6.41,
645
  "learning_rate": 9.27e-05,
646
- "loss": 2.363,
647
  "step": 930
648
  },
649
  {
650
  "epoch": 6.48,
651
  "learning_rate": 9.370000000000001e-05,
652
- "loss": 2.4513,
653
  "step": 940
654
  },
655
  {
656
  "epoch": 6.55,
657
  "learning_rate": 9.47e-05,
658
- "loss": 2.5266,
659
  "step": 950
660
  },
661
  {
662
  "epoch": 6.62,
663
  "learning_rate": 9.57e-05,
664
- "loss": 2.261,
665
  "step": 960
666
  },
667
  {
668
  "epoch": 6.69,
669
  "learning_rate": 9.67e-05,
670
- "loss": 2.2193,
671
  "step": 970
672
  },
673
  {
674
  "epoch": 6.76,
675
  "learning_rate": 9.77e-05,
676
- "loss": 2.5063,
677
  "step": 980
678
  },
679
  {
680
  "epoch": 6.83,
681
  "learning_rate": 9.87e-05,
682
- "loss": 2.3737,
683
  "step": 990
684
  },
685
  {
686
  "epoch": 6.9,
687
  "learning_rate": 9.970000000000001e-05,
688
- "loss": 2.1048,
689
  "step": 1000
690
  },
691
  {
692
  "epoch": 6.9,
693
- "eval_loss": 2.2525901794433594,
694
- "eval_runtime": 62.0939,
695
- "eval_samples_per_second": 27.056,
696
- "eval_steps_per_second": 27.056,
697
- "eval_wer": 0.9743642753773,
698
  "step": 1000
699
  },
700
  {
701
  "epoch": 6.97,
702
  "learning_rate": 9.963157894736843e-05,
703
- "loss": 2.2737,
704
  "step": 1010
705
  },
706
  {
707
  "epoch": 7.03,
708
  "learning_rate": 9.910526315789475e-05,
709
- "loss": 2.4889,
710
  "step": 1020
711
  },
712
  {
713
  "epoch": 7.1,
714
  "learning_rate": 9.857894736842106e-05,
715
- "loss": 2.1847,
716
  "step": 1030
717
  },
718
  {
719
  "epoch": 7.17,
720
  "learning_rate": 9.805263157894737e-05,
721
- "loss": 2.135,
722
  "step": 1040
723
  },
724
  {
725
  "epoch": 7.24,
726
  "learning_rate": 9.752631578947369e-05,
727
- "loss": 2.3103,
728
  "step": 1050
729
  },
730
  {
731
  "epoch": 7.31,
732
  "learning_rate": 9.7e-05,
733
- "loss": 2.3921,
734
  "step": 1060
735
  },
736
  {
737
  "epoch": 7.38,
738
  "learning_rate": 9.647368421052631e-05,
739
- "loss": 2.0108,
740
  "step": 1070
741
  },
742
  {
743
  "epoch": 7.45,
744
  "learning_rate": 9.594736842105264e-05,
745
- "loss": 2.1143,
746
  "step": 1080
747
  },
748
  {
749
  "epoch": 7.52,
750
  "learning_rate": 9.542105263157895e-05,
751
- "loss": 2.4364,
752
  "step": 1090
753
  },
754
  {
755
  "epoch": 7.59,
756
  "learning_rate": 9.489473684210527e-05,
757
- "loss": 2.2448,
758
  "step": 1100
759
  },
760
  {
761
  "epoch": 7.59,
762
- "eval_loss": 2.169701099395752,
763
- "eval_runtime": 62.1876,
764
- "eval_samples_per_second": 27.015,
765
- "eval_steps_per_second": 27.015,
766
- "eval_wer": 0.9332230721521604,
767
  "step": 1100
768
  },
769
  {
770
  "epoch": 7.66,
771
  "learning_rate": 9.436842105263158e-05,
772
- "loss": 1.9274,
773
  "step": 1110
774
  },
775
  {
776
  "epoch": 7.72,
777
  "learning_rate": 9.384210526315789e-05,
778
- "loss": 2.1163,
779
  "step": 1120
780
  },
781
  {
782
  "epoch": 7.79,
783
  "learning_rate": 9.331578947368422e-05,
784
- "loss": 2.3358,
785
  "step": 1130
786
  },
787
  {
788
  "epoch": 7.86,
789
  "learning_rate": 9.278947368421053e-05,
790
- "loss": 1.9315,
791
  "step": 1140
792
  },
793
  {
794
  "epoch": 7.93,
795
  "learning_rate": 9.226315789473686e-05,
796
- "loss": 1.9978,
797
  "step": 1150
798
  },
799
  {
800
  "epoch": 8.0,
801
  "learning_rate": 9.173684210526317e-05,
802
- "loss": 2.338,
803
  "step": 1160
804
  },
805
  {
806
  "epoch": 8.07,
807
  "learning_rate": 9.121052631578948e-05,
808
- "loss": 2.2001,
809
  "step": 1170
810
  },
811
  {
812
  "epoch": 8.14,
813
  "learning_rate": 9.06842105263158e-05,
814
- "loss": 1.894,
815
  "step": 1180
816
  },
817
  {
818
  "epoch": 8.21,
819
  "learning_rate": 9.015789473684211e-05,
820
- "loss": 2.0173,
821
  "step": 1190
822
  },
823
  {
824
  "epoch": 8.28,
825
  "learning_rate": 8.963157894736842e-05,
826
- "loss": 2.3525,
827
  "step": 1200
828
  },
829
  {
830
  "epoch": 8.28,
831
- "eval_loss": 2.042870044708252,
832
- "eval_runtime": 62.0448,
833
- "eval_samples_per_second": 27.077,
834
- "eval_steps_per_second": 27.077,
835
- "eval_wer": 0.8720970298394322,
836
  "step": 1200
837
  },
838
  {
839
  "epoch": 8.34,
840
  "learning_rate": 8.910526315789474e-05,
841
- "loss": 1.8758,
842
  "step": 1210
843
  },
844
  {
845
  "epoch": 8.41,
846
  "learning_rate": 8.857894736842106e-05,
847
- "loss": 1.8701,
848
  "step": 1220
849
  },
850
  {
851
  "epoch": 8.48,
852
  "learning_rate": 8.805263157894737e-05,
853
- "loss": 2.1876,
854
  "step": 1230
855
  },
856
  {
857
  "epoch": 8.55,
858
  "learning_rate": 8.752631578947369e-05,
859
- "loss": 2.2548,
860
  "step": 1240
861
  },
862
  {
863
  "epoch": 8.62,
864
  "learning_rate": 8.7e-05,
865
- "loss": 1.8274,
866
  "step": 1250
867
  },
868
  {
869
  "epoch": 8.69,
870
  "learning_rate": 8.647368421052631e-05,
871
- "loss": 1.9417,
872
  "step": 1260
873
  },
874
  {
875
  "epoch": 8.76,
876
  "learning_rate": 8.594736842105263e-05,
877
- "loss": 2.3158,
878
  "step": 1270
879
  },
880
  {
881
  "epoch": 8.83,
882
  "learning_rate": 8.542105263157894e-05,
883
- "loss": 2.0282,
884
  "step": 1280
885
  },
886
  {
887
  "epoch": 8.9,
888
  "learning_rate": 8.489473684210527e-05,
889
- "loss": 1.7798,
890
  "step": 1290
891
  },
892
  {
893
  "epoch": 8.97,
894
  "learning_rate": 8.436842105263158e-05,
895
- "loss": 2.099,
896
  "step": 1300
897
  },
898
  {
899
  "epoch": 8.97,
900
- "eval_loss": 2.0076003074645996,
901
- "eval_runtime": 62.3855,
902
- "eval_samples_per_second": 26.929,
903
- "eval_steps_per_second": 26.929,
904
- "eval_wer": 0.9182688994555854,
905
  "step": 1300
906
  },
907
  {
908
  "epoch": 9.03,
909
  "learning_rate": 8.38421052631579e-05,
910
- "loss": 2.2983,
911
  "step": 1310
912
  },
913
  {
914
  "epoch": 9.1,
915
  "learning_rate": 8.331578947368422e-05,
916
- "loss": 1.7612,
917
  "step": 1320
918
  },
919
  {
920
  "epoch": 9.17,
921
  "learning_rate": 8.278947368421053e-05,
922
- "loss": 1.7982,
923
  "step": 1330
924
  },
925
  {
926
  "epoch": 9.24,
927
  "learning_rate": 8.226315789473684e-05,
928
- "loss": 2.1745,
929
  "step": 1340
930
  },
931
  {
932
  "epoch": 9.31,
933
  "learning_rate": 8.173684210526317e-05,
934
- "loss": 2.1586,
935
  "step": 1350
936
  },
937
  {
938
  "epoch": 9.38,
939
  "learning_rate": 8.121052631578948e-05,
940
- "loss": 1.8252,
941
  "step": 1360
942
  },
943
  {
944
  "epoch": 9.45,
945
  "learning_rate": 8.06842105263158e-05,
946
- "loss": 1.9301,
947
  "step": 1370
948
  },
949
  {
950
  "epoch": 9.52,
951
  "learning_rate": 8.015789473684211e-05,
952
- "loss": 2.2251,
953
  "step": 1380
954
  },
955
  {
956
  "epoch": 9.59,
957
  "learning_rate": 7.963157894736842e-05,
958
- "loss": 1.8685,
959
  "step": 1390
960
  },
961
  {
962
  "epoch": 9.66,
963
  "learning_rate": 7.910526315789474e-05,
964
- "loss": 1.8236,
965
  "step": 1400
966
  },
967
  {
968
  "epoch": 9.66,
969
- "eval_loss": 1.9694862365722656,
970
- "eval_runtime": 62.2724,
971
- "eval_samples_per_second": 26.978,
972
- "eval_steps_per_second": 26.978,
973
- "eval_wer": 0.8262697264144442,
974
  "step": 1400
975
  },
976
  {
977
  "epoch": 9.72,
978
  "learning_rate": 7.857894736842105e-05,
979
- "loss": 2.1524,
980
  "step": 1410
981
  },
982
  {
983
  "epoch": 9.79,
984
  "learning_rate": 7.805263157894738e-05,
985
- "loss": 2.2737,
986
  "step": 1420
987
  },
988
  {
989
  "epoch": 9.86,
990
  "learning_rate": 7.752631578947369e-05,
991
- "loss": 1.7647,
992
  "step": 1430
993
  },
994
  {
995
  "epoch": 9.93,
996
  "learning_rate": 7.7e-05,
997
- "loss": 1.857,
998
  "step": 1440
999
  },
1000
  {
1001
  "epoch": 10.0,
1002
  "learning_rate": 7.647368421052631e-05,
1003
- "loss": 2.1688,
1004
  "step": 1450
1005
  },
1006
  {
1007
  "epoch": 10.07,
1008
  "learning_rate": 7.594736842105263e-05,
1009
- "loss": 2.0013,
1010
  "step": 1460
1011
  },
1012
  {
1013
  "epoch": 10.14,
1014
  "learning_rate": 7.542105263157895e-05,
1015
- "loss": 1.6881,
1016
  "step": 1470
1017
  },
1018
  {
1019
  "epoch": 10.21,
1020
  "learning_rate": 7.489473684210527e-05,
1021
- "loss": 1.8441,
1022
  "step": 1480
1023
  },
1024
  {
1025
  "epoch": 10.28,
1026
  "learning_rate": 7.43684210526316e-05,
1027
- "loss": 2.2788,
1028
  "step": 1490
1029
  },
1030
  {
1031
  "epoch": 10.34,
1032
  "learning_rate": 7.38421052631579e-05,
1033
- "loss": 1.6602,
1034
  "step": 1500
1035
  },
1036
  {
1037
  "epoch": 10.34,
1038
- "eval_loss": 1.932761311531067,
1039
- "eval_runtime": 62.3067,
1040
- "eval_samples_per_second": 26.963,
1041
- "eval_steps_per_second": 26.963,
1042
- "eval_wer": 0.8536971952312039,
1043
  "step": 1500
1044
  },
1045
  {
1046
  "epoch": 10.41,
1047
  "learning_rate": 7.331578947368422e-05,
1048
- "loss": 1.7309,
1049
  "step": 1510
1050
  },
1051
  {
1052
  "epoch": 10.48,
1053
  "learning_rate": 7.278947368421053e-05,
1054
- "loss": 2.0171,
1055
  "step": 1520
1056
  },
1057
  {
1058
  "epoch": 10.55,
1059
  "learning_rate": 7.226315789473685e-05,
1060
- "loss": 2.1863,
1061
  "step": 1530
1062
  },
1063
  {
1064
  "epoch": 10.62,
1065
  "learning_rate": 7.173684210526316e-05,
1066
- "loss": 1.7332,
1067
  "step": 1540
1068
  },
1069
  {
1070
  "epoch": 10.69,
1071
  "learning_rate": 7.121052631578947e-05,
1072
- "loss": 1.9525,
1073
  "step": 1550
1074
  },
1075
  {
1076
  "epoch": 10.76,
1077
  "learning_rate": 7.06842105263158e-05,
1078
- "loss": 2.2204,
1079
  "step": 1560
1080
  },
1081
  {
1082
  "epoch": 10.83,
1083
  "learning_rate": 7.015789473684211e-05,
1084
- "loss": 1.8776,
1085
  "step": 1570
1086
  },
1087
  {
1088
  "epoch": 10.9,
1089
  "learning_rate": 6.963157894736842e-05,
1090
- "loss": 1.7232,
1091
  "step": 1580
1092
  },
1093
  {
1094
  "epoch": 10.97,
1095
  "learning_rate": 6.910526315789474e-05,
1096
- "loss": 1.8677,
1097
  "step": 1590
1098
  },
1099
  {
1100
  "epoch": 11.03,
1101
  "learning_rate": 6.857894736842105e-05,
1102
- "loss": 2.2146,
1103
  "step": 1600
1104
  },
1105
  {
1106
  "epoch": 11.03,
1107
- "eval_loss": 1.879468321800232,
1108
- "eval_runtime": 62.2267,
1109
- "eval_samples_per_second": 26.998,
1110
- "eval_steps_per_second": 26.998,
1111
- "eval_wer": 0.8443249948315071,
1112
  "step": 1600
1113
  },
1114
  {
1115
  "epoch": 11.1,
1116
  "learning_rate": 6.805263157894736e-05,
1117
- "loss": 1.7176,
1118
  "step": 1610
1119
  },
1120
  {
1121
  "epoch": 11.17,
1122
  "learning_rate": 6.752631578947368e-05,
1123
- "loss": 1.7268,
1124
  "step": 1620
1125
  },
1126
  {
1127
  "epoch": 11.24,
1128
  "learning_rate": 6.7e-05,
1129
- "loss": 2.0447,
1130
  "step": 1630
1131
  },
1132
  {
1133
  "epoch": 11.31,
1134
  "learning_rate": 6.647368421052632e-05,
1135
- "loss": 2.0887,
1136
  "step": 1640
1137
  },
1138
  {
1139
  "epoch": 11.38,
1140
  "learning_rate": 6.594736842105264e-05,
1141
- "loss": 1.7193,
1142
  "step": 1650
1143
  },
1144
  {
1145
  "epoch": 11.45,
1146
  "learning_rate": 6.542105263157895e-05,
1147
- "loss": 1.8665,
1148
  "step": 1660
1149
  },
1150
  {
1151
  "epoch": 11.52,
1152
  "learning_rate": 6.489473684210527e-05,
1153
- "loss": 2.2249,
1154
  "step": 1670
1155
  },
1156
  {
1157
  "epoch": 11.59,
1158
  "learning_rate": 6.436842105263158e-05,
1159
- "loss": 1.7308,
1160
  "step": 1680
1161
  },
1162
  {
1163
  "epoch": 11.66,
1164
  "learning_rate": 6.384210526315791e-05,
1165
- "loss": 1.6378,
1166
  "step": 1690
1167
  },
1168
  {
1169
  "epoch": 11.72,
1170
  "learning_rate": 6.331578947368422e-05,
1171
- "loss": 1.9278,
1172
  "step": 1700
1173
  },
1174
  {
1175
  "epoch": 11.72,
1176
- "eval_loss": 1.874898076057434,
1177
- "eval_runtime": 62.4749,
1178
- "eval_samples_per_second": 26.891,
1179
- "eval_steps_per_second": 26.891,
1180
- "eval_wer": 0.8074564123768175,
1181
  "step": 1700
1182
  },
1183
  {
1184
  "epoch": 11.79,
1185
  "learning_rate": 6.278947368421053e-05,
1186
- "loss": 2.167,
1187
  "step": 1710
1188
  },
1189
  {
1190
  "epoch": 11.86,
1191
  "learning_rate": 6.226315789473685e-05,
1192
- "loss": 1.6498,
1193
  "step": 1720
1194
  },
1195
  {
1196
  "epoch": 11.93,
1197
  "learning_rate": 6.173684210526316e-05,
1198
- "loss": 1.7843,
1199
  "step": 1730
1200
  },
1201
  {
1202
  "epoch": 12.0,
1203
  "learning_rate": 6.121052631578947e-05,
1204
- "loss": 2.1545,
1205
  "step": 1740
1206
  },
1207
  {
1208
  "epoch": 12.07,
1209
  "learning_rate": 6.0684210526315785e-05,
1210
- "loss": 1.9012,
1211
  "step": 1750
1212
  },
1213
  {
1214
  "epoch": 12.14,
1215
  "learning_rate": 6.015789473684211e-05,
1216
- "loss": 1.6388,
1217
  "step": 1760
1218
  },
1219
  {
1220
  "epoch": 12.21,
1221
  "learning_rate": 5.9631578947368425e-05,
1222
- "loss": 1.8509,
1223
  "step": 1770
1224
  },
1225
  {
1226
  "epoch": 12.28,
1227
  "learning_rate": 5.9105263157894744e-05,
1228
- "loss": 2.1512,
1229
  "step": 1780
1230
  },
1231
  {
1232
  "epoch": 12.34,
1233
  "learning_rate": 5.857894736842106e-05,
1234
- "loss": 1.6984,
1235
  "step": 1790
1236
  },
1237
  {
1238
  "epoch": 12.41,
1239
  "learning_rate": 5.805263157894737e-05,
1240
- "loss": 1.6324,
1241
  "step": 1800
1242
  },
1243
  {
1244
  "epoch": 12.41,
1245
- "eval_loss": 1.850366234779358,
1246
- "eval_runtime": 62.1197,
1247
- "eval_samples_per_second": 27.045,
1248
- "eval_steps_per_second": 27.045,
1249
- "eval_wer": 0.8091103300944111,
1250
  "step": 1800
1251
  },
1252
  {
1253
  "epoch": 12.48,
1254
  "learning_rate": 5.752631578947368e-05,
1255
- "loss": 2.0102,
1256
  "step": 1810
1257
  },
1258
  {
1259
  "epoch": 12.55,
1260
  "learning_rate": 5.6999999999999996e-05,
1261
- "loss": 2.0616,
1262
  "step": 1820
1263
  },
1264
  {
1265
  "epoch": 12.62,
1266
  "learning_rate": 5.647368421052632e-05,
1267
- "loss": 1.6063,
1268
  "step": 1830
1269
  },
1270
  {
1271
  "epoch": 12.69,
1272
  "learning_rate": 5.5947368421052636e-05,
1273
- "loss": 1.718,
1274
  "step": 1840
1275
  },
1276
  {
1277
  "epoch": 12.76,
1278
  "learning_rate": 5.542105263157895e-05,
1279
- "loss": 2.0991,
1280
  "step": 1850
1281
  },
1282
  {
1283
  "epoch": 12.83,
1284
  "learning_rate": 5.489473684210527e-05,
1285
- "loss": 1.841,
1286
  "step": 1860
1287
  },
1288
  {
1289
  "epoch": 12.9,
1290
  "learning_rate": 5.436842105263158e-05,
1291
- "loss": 1.6345,
1292
  "step": 1870
1293
  },
1294
  {
1295
  "epoch": 12.97,
1296
  "learning_rate": 5.3842105263157895e-05,
1297
- "loss": 1.8546,
1298
  "step": 1880
1299
  },
1300
  {
1301
  "epoch": 13.03,
1302
  "learning_rate": 5.331578947368421e-05,
1303
- "loss": 2.1289,
1304
  "step": 1890
1305
  },
1306
  {
1307
  "epoch": 13.1,
1308
  "learning_rate": 5.2789473684210534e-05,
1309
- "loss": 1.6517,
1310
  "step": 1900
1311
  },
1312
  {
1313
  "epoch": 13.1,
1314
- "eval_loss": 1.8227797746658325,
1315
- "eval_runtime": 62.3551,
1316
- "eval_samples_per_second": 26.942,
1317
- "eval_steps_per_second": 26.942,
1318
- "eval_wer": 0.8233064571704224,
1319
  "step": 1900
1320
  },
1321
  {
1322
  "epoch": 13.17,
1323
  "learning_rate": 5.226315789473685e-05,
1324
- "loss": 1.7168,
1325
  "step": 1910
1326
  },
1327
  {
1328
  "epoch": 13.24,
1329
  "learning_rate": 5.173684210526316e-05,
1330
- "loss": 1.924,
1331
  "step": 1920
1332
  },
1333
  {
1334
  "epoch": 13.31,
1335
  "learning_rate": 5.121052631578947e-05,
1336
- "loss": 1.9177,
1337
  "step": 1930
1338
  },
1339
  {
1340
  "epoch": 13.38,
1341
  "learning_rate": 5.068421052631579e-05,
1342
- "loss": 1.64,
1343
  "step": 1940
1344
  },
1345
  {
1346
  "epoch": 13.45,
1347
  "learning_rate": 5.0157894736842106e-05,
1348
- "loss": 1.6953,
1349
  "step": 1950
1350
  },
1351
  {
1352
  "epoch": 13.52,
1353
  "learning_rate": 4.9631578947368426e-05,
1354
- "loss": 2.1591,
1355
  "step": 1960
1356
  },
1357
  {
1358
  "epoch": 13.59,
1359
  "learning_rate": 4.910526315789474e-05,
1360
- "loss": 1.7455,
1361
  "step": 1970
1362
  },
1363
  {
1364
  "epoch": 13.66,
1365
  "learning_rate": 4.857894736842106e-05,
1366
- "loss": 1.5742,
1367
  "step": 1980
1368
  },
1369
  {
1370
  "epoch": 13.72,
1371
  "learning_rate": 4.805263157894737e-05,
1372
- "loss": 1.9511,
1373
  "step": 1990
1374
  },
1375
  {
1376
  "epoch": 13.79,
1377
  "learning_rate": 4.7526315789473684e-05,
1378
- "loss": 2.0463,
1379
  "step": 2000
1380
  },
1381
  {
1382
  "epoch": 13.79,
1383
- "eval_loss": 1.819372296333313,
1384
- "eval_runtime": 62.2592,
1385
- "eval_samples_per_second": 26.984,
1386
- "eval_steps_per_second": 26.984,
1387
- "eval_wer": 0.8563848115222934,
1388
  "step": 2000
1389
  },
1390
  {
1391
  "epoch": 13.86,
1392
  "learning_rate": 4.7e-05,
1393
- "loss": 1.5744,
1394
  "step": 2010
1395
  },
1396
  {
@@ -1402,620 +1402,620 @@
1402
  {
1403
  "epoch": 14.0,
1404
  "learning_rate": 4.594736842105264e-05,
1405
- "loss": 1.9124,
1406
  "step": 2030
1407
  },
1408
  {
1409
  "epoch": 14.07,
1410
  "learning_rate": 4.542105263157895e-05,
1411
- "loss": 1.8303,
1412
  "step": 2040
1413
  },
1414
  {
1415
  "epoch": 14.14,
1416
  "learning_rate": 4.489473684210527e-05,
1417
- "loss": 1.5034,
1418
  "step": 2050
1419
  },
1420
  {
1421
  "epoch": 14.21,
1422
  "learning_rate": 4.436842105263158e-05,
1423
- "loss": 1.7662,
1424
  "step": 2060
1425
  },
1426
  {
1427
  "epoch": 14.28,
1428
  "learning_rate": 4.3842105263157895e-05,
1429
- "loss": 2.1269,
1430
  "step": 2070
1431
  },
1432
  {
1433
  "epoch": 14.34,
1434
  "learning_rate": 4.3315789473684215e-05,
1435
- "loss": 1.6707,
1436
  "step": 2080
1437
  },
1438
  {
1439
  "epoch": 14.41,
1440
  "learning_rate": 4.278947368421053e-05,
1441
- "loss": 1.6393,
1442
  "step": 2090
1443
  },
1444
  {
1445
  "epoch": 14.48,
1446
  "learning_rate": 4.226315789473684e-05,
1447
- "loss": 1.8736,
1448
  "step": 2100
1449
  },
1450
  {
1451
  "epoch": 14.48,
1452
- "eval_loss": 1.7949970960617065,
1453
- "eval_runtime": 62.2709,
1454
- "eval_samples_per_second": 26.979,
1455
- "eval_steps_per_second": 26.979,
1456
- "eval_wer": 0.8224794983116257,
1457
  "step": 2100
1458
  },
1459
  {
1460
  "epoch": 14.55,
1461
  "learning_rate": 4.1736842105263154e-05,
1462
- "loss": 1.9396,
1463
  "step": 2110
1464
  },
1465
  {
1466
  "epoch": 14.62,
1467
  "learning_rate": 4.1210526315789474e-05,
1468
- "loss": 1.555,
1469
  "step": 2120
1470
  },
1471
  {
1472
  "epoch": 14.69,
1473
  "learning_rate": 4.0684210526315794e-05,
1474
- "loss": 1.7577,
1475
  "step": 2130
1476
  },
1477
  {
1478
  "epoch": 14.76,
1479
  "learning_rate": 4.015789473684211e-05,
1480
- "loss": 2.0354,
1481
  "step": 2140
1482
  },
1483
  {
1484
  "epoch": 14.83,
1485
  "learning_rate": 3.9631578947368426e-05,
1486
- "loss": 1.7073,
1487
  "step": 2150
1488
  },
1489
  {
1490
  "epoch": 14.9,
1491
  "learning_rate": 3.910526315789474e-05,
1492
- "loss": 1.4961,
1493
  "step": 2160
1494
  },
1495
  {
1496
  "epoch": 14.97,
1497
  "learning_rate": 3.857894736842105e-05,
1498
- "loss": 1.7068,
1499
  "step": 2170
1500
  },
1501
  {
1502
  "epoch": 15.03,
1503
  "learning_rate": 3.8052631578947365e-05,
1504
- "loss": 2.0397,
1505
  "step": 2180
1506
  },
1507
  {
1508
  "epoch": 15.1,
1509
  "learning_rate": 3.7526315789473685e-05,
1510
- "loss": 1.5849,
1511
  "step": 2190
1512
  },
1513
  {
1514
  "epoch": 15.17,
1515
  "learning_rate": 3.7e-05,
1516
- "loss": 1.6233,
1517
  "step": 2200
1518
  },
1519
  {
1520
  "epoch": 15.17,
1521
- "eval_loss": 1.7896100282669067,
1522
- "eval_runtime": 62.05,
1523
- "eval_samples_per_second": 27.075,
1524
- "eval_steps_per_second": 27.075,
1525
- "eval_wer": 0.8429467300668458,
1526
  "step": 2200
1527
  },
1528
  {
1529
  "epoch": 15.24,
1530
  "learning_rate": 3.647368421052632e-05,
1531
- "loss": 1.8936,
1532
  "step": 2210
1533
  },
1534
  {
1535
  "epoch": 15.31,
1536
  "learning_rate": 3.594736842105264e-05,
1537
- "loss": 1.8176,
1538
  "step": 2220
1539
  },
1540
  {
1541
  "epoch": 15.38,
1542
  "learning_rate": 3.542105263157895e-05,
1543
- "loss": 1.5963,
1544
  "step": 2230
1545
  },
1546
  {
1547
  "epoch": 15.45,
1548
  "learning_rate": 3.4894736842105264e-05,
1549
- "loss": 1.6365,
1550
  "step": 2240
1551
  },
1552
  {
1553
  "epoch": 15.52,
1554
  "learning_rate": 3.436842105263158e-05,
1555
- "loss": 2.0577,
1556
  "step": 2250
1557
  },
1558
  {
1559
  "epoch": 15.59,
1560
  "learning_rate": 3.3842105263157896e-05,
1561
- "loss": 1.5942,
1562
  "step": 2260
1563
  },
1564
  {
1565
  "epoch": 15.66,
1566
  "learning_rate": 3.331578947368421e-05,
1567
- "loss": 1.5867,
1568
  "step": 2270
1569
  },
1570
  {
1571
  "epoch": 15.72,
1572
  "learning_rate": 3.278947368421052e-05,
1573
- "loss": 1.8733,
1574
  "step": 2280
1575
  },
1576
  {
1577
  "epoch": 15.79,
1578
  "learning_rate": 3.226315789473684e-05,
1579
- "loss": 2.0624,
1580
  "step": 2290
1581
  },
1582
  {
1583
  "epoch": 15.86,
1584
  "learning_rate": 3.173684210526316e-05,
1585
- "loss": 1.4982,
1586
  "step": 2300
1587
  },
1588
  {
1589
  "epoch": 15.86,
1590
- "eval_loss": 1.7793470621109009,
1591
- "eval_runtime": 62.0626,
1592
- "eval_samples_per_second": 27.069,
1593
- "eval_steps_per_second": 27.069,
1594
- "eval_wer": 0.8441182551168079,
1595
  "step": 2300
1596
  },
1597
  {
1598
  "epoch": 15.93,
1599
  "learning_rate": 3.1210526315789475e-05,
1600
- "loss": 1.6152,
1601
  "step": 2310
1602
  },
1603
  {
1604
  "epoch": 16.0,
1605
  "learning_rate": 3.0684210526315795e-05,
1606
- "loss": 1.9853,
1607
  "step": 2320
1608
  },
1609
  {
1610
  "epoch": 16.07,
1611
  "learning_rate": 3.0157894736842108e-05,
1612
- "loss": 1.8096,
1613
  "step": 2330
1614
  },
1615
  {
1616
  "epoch": 16.14,
1617
  "learning_rate": 2.963157894736842e-05,
1618
- "loss": 1.4931,
1619
  "step": 2340
1620
  },
1621
  {
1622
  "epoch": 16.21,
1623
  "learning_rate": 2.910526315789474e-05,
1624
- "loss": 1.6762,
1625
  "step": 2350
1626
  },
1627
  {
1628
  "epoch": 16.28,
1629
  "learning_rate": 2.8578947368421057e-05,
1630
- "loss": 2.041,
1631
  "step": 2360
1632
  },
1633
  {
1634
  "epoch": 16.34,
1635
  "learning_rate": 2.805263157894737e-05,
1636
- "loss": 1.5446,
1637
  "step": 2370
1638
  },
1639
  {
1640
  "epoch": 16.41,
1641
  "learning_rate": 2.7526315789473683e-05,
1642
- "loss": 1.5859,
1643
  "step": 2380
1644
  },
1645
  {
1646
  "epoch": 16.48,
1647
  "learning_rate": 2.7000000000000002e-05,
1648
- "loss": 1.804,
1649
  "step": 2390
1650
  },
1651
  {
1652
  "epoch": 16.55,
1653
  "learning_rate": 2.647368421052632e-05,
1654
- "loss": 1.8955,
1655
  "step": 2400
1656
  },
1657
  {
1658
  "epoch": 16.55,
1659
- "eval_loss": 1.7640429735183716,
1660
- "eval_runtime": 62.2013,
1661
- "eval_samples_per_second": 27.009,
1662
- "eval_steps_per_second": 27.009,
1663
- "eval_wer": 0.805802494659224,
1664
  "step": 2400
1665
  },
1666
  {
1667
  "epoch": 16.62,
1668
  "learning_rate": 2.5947368421052632e-05,
1669
- "loss": 1.4801,
1670
  "step": 2410
1671
  },
1672
  {
1673
  "epoch": 16.69,
1674
  "learning_rate": 2.542105263157895e-05,
1675
- "loss": 1.6412,
1676
  "step": 2420
1677
  },
1678
  {
1679
  "epoch": 16.76,
1680
  "learning_rate": 2.4894736842105264e-05,
1681
- "loss": 1.9778,
1682
  "step": 2430
1683
  },
1684
  {
1685
  "epoch": 16.83,
1686
  "learning_rate": 2.436842105263158e-05,
1687
- "loss": 1.6648,
1688
  "step": 2440
1689
  },
1690
  {
1691
  "epoch": 16.9,
1692
  "learning_rate": 2.3842105263157897e-05,
1693
- "loss": 1.5288,
1694
  "step": 2450
1695
  },
1696
  {
1697
  "epoch": 16.97,
1698
  "learning_rate": 2.331578947368421e-05,
1699
- "loss": 1.6835,
1700
  "step": 2460
1701
  },
1702
  {
1703
  "epoch": 17.03,
1704
  "learning_rate": 2.2789473684210527e-05,
1705
- "loss": 1.9791,
1706
  "step": 2470
1707
  },
1708
  {
1709
  "epoch": 17.1,
1710
  "learning_rate": 2.2263157894736843e-05,
1711
- "loss": 1.478,
1712
  "step": 2480
1713
  },
1714
  {
1715
  "epoch": 17.17,
1716
  "learning_rate": 2.173684210526316e-05,
1717
- "loss": 1.5269,
1718
  "step": 2490
1719
  },
1720
  {
1721
  "epoch": 17.24,
1722
  "learning_rate": 2.1210526315789476e-05,
1723
- "loss": 1.8253,
1724
  "step": 2500
1725
  },
1726
  {
1727
  "epoch": 17.24,
1728
- "eval_loss": 1.766750693321228,
1729
- "eval_runtime": 62.056,
1730
- "eval_samples_per_second": 27.072,
1731
- "eval_steps_per_second": 27.072,
1732
- "eval_wer": 0.8133829508648611,
1733
  "step": 2500
1734
  },
1735
  {
1736
  "epoch": 17.31,
1737
  "learning_rate": 2.068421052631579e-05,
1738
- "loss": 1.8498,
1739
  "step": 2510
1740
  },
1741
  {
1742
  "epoch": 17.38,
1743
  "learning_rate": 2.0157894736842105e-05,
1744
- "loss": 1.4217,
1745
  "step": 2520
1746
  },
1747
  {
1748
  "epoch": 17.45,
1749
  "learning_rate": 1.963157894736842e-05,
1750
- "loss": 1.6156,
1751
  "step": 2530
1752
  },
1753
  {
1754
  "epoch": 17.52,
1755
  "learning_rate": 1.9105263157894738e-05,
1756
- "loss": 1.9255,
1757
  "step": 2540
1758
  },
1759
  {
1760
  "epoch": 17.59,
1761
  "learning_rate": 1.8578947368421054e-05,
1762
- "loss": 1.6055,
1763
  "step": 2550
1764
  },
1765
  {
1766
  "epoch": 17.66,
1767
  "learning_rate": 1.8052631578947367e-05,
1768
- "loss": 1.5202,
1769
  "step": 2560
1770
  },
1771
  {
1772
  "epoch": 17.72,
1773
  "learning_rate": 1.7526315789473683e-05,
1774
- "loss": 1.7571,
1775
  "step": 2570
1776
  },
1777
  {
1778
  "epoch": 17.79,
1779
  "learning_rate": 1.7000000000000003e-05,
1780
- "loss": 1.893,
1781
  "step": 2580
1782
  },
1783
  {
1784
  "epoch": 17.86,
1785
  "learning_rate": 1.6473684210526316e-05,
1786
- "loss": 1.4591,
1787
  "step": 2590
1788
  },
1789
  {
1790
  "epoch": 17.93,
1791
  "learning_rate": 1.5947368421052633e-05,
1792
- "loss": 1.5332,
1793
  "step": 2600
1794
  },
1795
  {
1796
  "epoch": 17.93,
1797
- "eval_loss": 1.7611440420150757,
1798
- "eval_runtime": 62.4578,
1799
- "eval_samples_per_second": 26.898,
1800
- "eval_steps_per_second": 26.898,
1801
- "eval_wer": 0.801460960650541,
1802
  "step": 2600
1803
  },
1804
  {
1805
  "epoch": 18.0,
1806
  "learning_rate": 1.5421052631578946e-05,
1807
- "loss": 1.9193,
1808
  "step": 2610
1809
  },
1810
  {
1811
  "epoch": 18.07,
1812
  "learning_rate": 1.4894736842105264e-05,
1813
- "loss": 1.7022,
1814
  "step": 2620
1815
  },
1816
  {
1817
  "epoch": 18.14,
1818
  "learning_rate": 1.4368421052631582e-05,
1819
- "loss": 1.4093,
1820
  "step": 2630
1821
  },
1822
  {
1823
  "epoch": 18.21,
1824
  "learning_rate": 1.3842105263157895e-05,
1825
- "loss": 1.6191,
1826
  "step": 2640
1827
  },
1828
  {
1829
  "epoch": 18.28,
1830
  "learning_rate": 1.3315789473684213e-05,
1831
- "loss": 1.9511,
1832
  "step": 2650
1833
  },
1834
  {
1835
  "epoch": 18.34,
1836
  "learning_rate": 1.2789473684210526e-05,
1837
- "loss": 1.487,
1838
  "step": 2660
1839
  },
1840
  {
1841
  "epoch": 18.41,
1842
  "learning_rate": 1.2263157894736844e-05,
1843
- "loss": 1.4701,
1844
  "step": 2670
1845
  },
1846
  {
1847
  "epoch": 18.48,
1848
  "learning_rate": 1.1736842105263158e-05,
1849
- "loss": 1.7986,
1850
  "step": 2680
1851
  },
1852
  {
1853
  "epoch": 18.55,
1854
  "learning_rate": 1.1210526315789475e-05,
1855
- "loss": 1.7998,
1856
  "step": 2690
1857
  },
1858
  {
1859
  "epoch": 18.62,
1860
  "learning_rate": 1.068421052631579e-05,
1861
- "loss": 1.452,
1862
  "step": 2700
1863
  },
1864
  {
1865
  "epoch": 18.62,
1866
- "eval_loss": 1.7497012615203857,
1867
- "eval_runtime": 62.4753,
1868
- "eval_samples_per_second": 26.891,
1869
- "eval_steps_per_second": 26.891,
1870
- "eval_wer": 0.8071807594238853,
1871
  "step": 2700
1872
  },
1873
  {
1874
  "epoch": 18.69,
1875
  "learning_rate": 1.0157894736842106e-05,
1876
- "loss": 1.5828,
1877
  "step": 2710
1878
  },
1879
  {
1880
  "epoch": 18.76,
1881
  "learning_rate": 9.631578947368422e-06,
1882
- "loss": 1.9231,
1883
  "step": 2720
1884
  },
1885
  {
1886
  "epoch": 18.83,
1887
  "learning_rate": 9.105263157894737e-06,
1888
- "loss": 1.5695,
1889
  "step": 2730
1890
  },
1891
  {
1892
  "epoch": 18.9,
1893
  "learning_rate": 8.578947368421053e-06,
1894
- "loss": 1.5435,
1895
  "step": 2740
1896
  },
1897
  {
1898
  "epoch": 18.97,
1899
  "learning_rate": 8.052631578947368e-06,
1900
- "loss": 1.6403,
1901
  "step": 2750
1902
  },
1903
  {
1904
  "epoch": 19.03,
1905
  "learning_rate": 7.526315789473684e-06,
1906
- "loss": 1.85,
1907
  "step": 2760
1908
  },
1909
  {
1910
  "epoch": 19.1,
1911
  "learning_rate": 7.000000000000001e-06,
1912
- "loss": 1.4523,
1913
  "step": 2770
1914
  },
1915
  {
1916
  "epoch": 19.17,
1917
  "learning_rate": 6.473684210526316e-06,
1918
- "loss": 1.488,
1919
  "step": 2780
1920
  },
1921
  {
1922
  "epoch": 19.24,
1923
  "learning_rate": 5.947368421052632e-06,
1924
- "loss": 1.8365,
1925
  "step": 2790
1926
  },
1927
  {
1928
  "epoch": 19.31,
1929
  "learning_rate": 5.421052631578947e-06,
1930
- "loss": 1.7609,
1931
  "step": 2800
1932
  },
1933
  {
1934
  "epoch": 19.31,
1935
- "eval_loss": 1.7463784217834473,
1936
- "eval_runtime": 62.7099,
1937
- "eval_samples_per_second": 26.79,
1938
- "eval_steps_per_second": 26.79,
1939
- "eval_wer": 0.8062159740886224,
1940
  "step": 2800
1941
  },
1942
  {
1943
  "epoch": 19.38,
1944
  "learning_rate": 4.894736842105263e-06,
1945
- "loss": 1.4567,
1946
  "step": 2810
1947
  },
1948
  {
1949
  "epoch": 19.45,
1950
  "learning_rate": 4.368421052631579e-06,
1951
- "loss": 1.5592,
1952
  "step": 2820
1953
  },
1954
  {
1955
  "epoch": 19.52,
1956
  "learning_rate": 3.842105263157895e-06,
1957
- "loss": 1.961,
1958
  "step": 2830
1959
  },
1960
  {
1961
  "epoch": 19.59,
1962
- "learning_rate": 3.368421052631579e-06,
1963
- "loss": 1.4964,
1964
  "step": 2840
1965
  },
1966
  {
1967
  "epoch": 19.66,
1968
- "learning_rate": 2.842105263157895e-06,
1969
- "loss": 1.4505,
1970
  "step": 2850
1971
  },
1972
  {
1973
  "epoch": 19.72,
1974
- "learning_rate": 2.315789473684211e-06,
1975
- "loss": 1.7141,
1976
  "step": 2860
1977
  },
1978
  {
1979
  "epoch": 19.79,
1980
- "learning_rate": 1.7894736842105262e-06,
1981
- "loss": 1.8701,
1982
  "step": 2870
1983
  },
1984
  {
1985
  "epoch": 19.86,
1986
- "learning_rate": 1.2631578947368422e-06,
1987
- "loss": 1.4216,
1988
  "step": 2880
1989
  },
1990
  {
1991
  "epoch": 19.93,
1992
- "learning_rate": 7.368421052631579e-07,
1993
- "loss": 1.5857,
1994
  "step": 2890
1995
  },
1996
  {
1997
  "epoch": 20.0,
1998
- "learning_rate": 2.105263157894737e-07,
1999
- "loss": 1.7743,
2000
  "step": 2900
2001
  },
2002
  {
2003
  "epoch": 20.0,
2004
- "eval_loss": 1.742971658706665,
2005
- "eval_runtime": 62.4332,
2006
- "eval_samples_per_second": 26.909,
2007
- "eval_steps_per_second": 26.909,
2008
- "eval_wer": 0.808972503617945,
2009
  "step": 2900
2010
  },
2011
  {
2012
  "epoch": 20.0,
2013
  "step": 2900,
2014
  "total_flos": 1.1463619274186412e+18,
2015
- "train_loss": 2.308333969445064,
2016
- "train_runtime": 4034.046,
2017
- "train_samples_per_second": 22.905,
2018
- "train_steps_per_second": 0.719
2019
  }
2020
  ],
2021
  "max_steps": 2900,
 
34
  {
35
  "epoch": 0.34,
36
  "learning_rate": 4.800000000000001e-06,
37
+ "loss": 7.121,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.41,
42
  "learning_rate": 5.8e-06,
43
+ "loss": 6.1101,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.48,
48
  "learning_rate": 6.800000000000001e-06,
49
+ "loss": 5.2532,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.55,
54
  "learning_rate": 7.7e-06,
55
+ "loss": 5.3191,
56
  "step": 80
57
  },
58
  {
 
69
  },
70
  {
71
  "epoch": 0.69,
72
+ "eval_loss": 4.080183982849121,
73
+ "eval_runtime": 61.3907,
74
+ "eval_samples_per_second": 27.366,
75
+ "eval_steps_per_second": 27.366,
76
  "eval_wer": 1.0,
77
  "step": 100
78
  },
 
85
  {
86
  "epoch": 0.83,
87
  "learning_rate": 1.1700000000000001e-05,
88
+ "loss": 3.7733,
89
  "step": 120
90
  },
91
  {
 
133
  {
134
  "epoch": 1.38,
135
  "learning_rate": 1.97e-05,
136
+ "loss": 2.9805,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.38,
141
+ "eval_loss": 2.9792349338531494,
142
+ "eval_runtime": 61.348,
143
+ "eval_samples_per_second": 27.385,
144
+ "eval_steps_per_second": 27.385,
145
  "eval_wer": 1.0,
146
  "step": 200
147
  },
 
207
  },
208
  {
209
  "epoch": 2.07,
210
+ "eval_loss": 2.9407615661621094,
211
+ "eval_runtime": 61.4623,
212
+ "eval_samples_per_second": 27.334,
213
+ "eval_steps_per_second": 27.334,
214
  "eval_wer": 1.0,
215
  "step": 300
216
  },
 
271
  {
272
  "epoch": 2.76,
273
  "learning_rate": 3.97e-05,
274
+ "loss": 2.9655,
275
  "step": 400
276
  },
277
  {
278
  "epoch": 2.76,
279
+ "eval_loss": 2.9143309593200684,
280
+ "eval_runtime": 61.4594,
281
+ "eval_samples_per_second": 27.335,
282
+ "eval_steps_per_second": 27.335,
283
  "eval_wer": 1.0,
284
  "step": 400
285
  },
 
322
  {
323
  "epoch": 3.24,
324
  "learning_rate": 4.6700000000000003e-05,
325
+ "loss": 2.8961,
326
  "step": 470
327
  },
328
  {
329
  "epoch": 3.31,
330
  "learning_rate": 4.77e-05,
331
+ "loss": 2.9121,
332
  "step": 480
333
  },
334
  {
 
345
  },
346
  {
347
  "epoch": 3.45,
348
+ "eval_loss": 2.8774638175964355,
349
+ "eval_runtime": 62.6239,
350
+ "eval_samples_per_second": 26.827,
351
+ "eval_steps_per_second": 26.827,
352
  "eval_wer": 1.0,
353
  "step": 500
354
  },
 
361
  {
362
  "epoch": 3.59,
363
  "learning_rate": 5.17e-05,
364
+ "loss": 2.885,
365
  "step": 520
366
  },
367
  {
 
373
  {
374
  "epoch": 3.72,
375
  "learning_rate": 5.3700000000000004e-05,
376
+ "loss": 2.888,
377
  "step": 540
378
  },
379
  {
380
  "epoch": 3.79,
381
  "learning_rate": 5.470000000000001e-05,
382
+ "loss": 2.9058,
383
  "step": 550
384
  },
385
  {
386
  "epoch": 3.86,
387
  "learning_rate": 5.5700000000000005e-05,
388
+ "loss": 2.8629,
389
  "step": 560
390
  },
391
  {
392
  "epoch": 3.93,
393
  "learning_rate": 5.6699999999999996e-05,
394
+ "loss": 2.8436,
395
  "step": 570
396
  },
397
  {
398
  "epoch": 4.0,
399
  "learning_rate": 5.77e-05,
400
+ "loss": 2.8511,
401
  "step": 580
402
  },
403
  {
404
  "epoch": 4.07,
405
  "learning_rate": 5.87e-05,
406
+ "loss": 2.8431,
407
  "step": 590
408
  },
409
  {
410
  "epoch": 4.14,
411
  "learning_rate": 5.97e-05,
412
+ "loss": 2.7719,
413
  "step": 600
414
  },
415
  {
416
  "epoch": 4.14,
417
+ "eval_loss": 2.7815191745758057,
418
+ "eval_runtime": 62.3898,
419
+ "eval_samples_per_second": 26.927,
420
+ "eval_steps_per_second": 26.927,
421
  "eval_wer": 0.9999310867617669,
422
  "step": 600
423
  },
424
  {
425
  "epoch": 4.21,
426
  "learning_rate": 6.07e-05,
427
+ "loss": 2.7765,
428
  "step": 610
429
  },
430
  {
431
  "epoch": 4.28,
432
  "learning_rate": 6.170000000000001e-05,
433
+ "loss": 2.8196,
434
  "step": 620
435
  },
436
  {
437
  "epoch": 4.34,
438
  "learning_rate": 6.27e-05,
439
+ "loss": 2.7596,
440
  "step": 630
441
  },
442
  {
443
  "epoch": 4.41,
444
  "learning_rate": 6.37e-05,
445
+ "loss": 2.7299,
446
  "step": 640
447
  },
448
  {
449
  "epoch": 4.48,
450
  "learning_rate": 6.47e-05,
451
+ "loss": 2.7264,
452
  "step": 650
453
  },
454
  {
455
  "epoch": 4.55,
456
  "learning_rate": 6.570000000000001e-05,
457
+ "loss": 2.7499,
458
  "step": 660
459
  },
460
  {
461
  "epoch": 4.62,
462
  "learning_rate": 6.670000000000001e-05,
463
+ "loss": 2.6799,
464
  "step": 670
465
  },
466
  {
467
  "epoch": 4.69,
468
  "learning_rate": 6.77e-05,
469
+ "loss": 2.682,
470
  "step": 680
471
  },
472
  {
473
  "epoch": 4.76,
474
  "learning_rate": 6.87e-05,
475
+ "loss": 2.7446,
476
  "step": 690
477
  },
478
  {
479
  "epoch": 4.83,
480
  "learning_rate": 6.97e-05,
481
+ "loss": 2.6531,
482
  "step": 700
483
  },
484
  {
485
  "epoch": 4.83,
486
+ "eval_loss": 2.6374878883361816,
487
+ "eval_runtime": 63.0358,
488
+ "eval_samples_per_second": 26.652,
489
+ "eval_steps_per_second": 26.652,
490
+ "eval_wer": 1.006477844393908,
491
  "step": 700
492
  },
493
  {
494
  "epoch": 4.9,
495
  "learning_rate": 7.07e-05,
496
+ "loss": 2.6098,
497
  "step": 710
498
  },
499
  {
500
  "epoch": 4.97,
501
  "learning_rate": 7.17e-05,
502
+ "loss": 2.6314,
503
  "step": 720
504
  },
505
  {
506
  "epoch": 5.03,
507
  "learning_rate": 7.27e-05,
508
+ "loss": 2.7144,
509
  "step": 730
510
  },
511
  {
512
  "epoch": 5.1,
513
  "learning_rate": 7.37e-05,
514
+ "loss": 2.6205,
515
  "step": 740
516
  },
517
  {
518
  "epoch": 5.17,
519
  "learning_rate": 7.47e-05,
520
+ "loss": 2.5682,
521
  "step": 750
522
  },
523
  {
524
  "epoch": 5.24,
525
  "learning_rate": 7.570000000000001e-05,
526
+ "loss": 2.6472,
527
  "step": 760
528
  },
529
  {
530
  "epoch": 5.31,
531
  "learning_rate": 7.670000000000001e-05,
532
+ "loss": 2.6284,
533
  "step": 770
534
  },
535
  {
536
  "epoch": 5.38,
537
  "learning_rate": 7.77e-05,
538
+ "loss": 2.547,
539
  "step": 780
540
  },
541
  {
542
  "epoch": 5.45,
543
  "learning_rate": 7.87e-05,
544
+ "loss": 2.5544,
545
  "step": 790
546
  },
547
  {
548
  "epoch": 5.52,
549
  "learning_rate": 7.970000000000001e-05,
550
+ "loss": 2.6425,
551
  "step": 800
552
  },
553
  {
554
  "epoch": 5.52,
555
+ "eval_loss": 2.5602073669433594,
556
+ "eval_runtime": 63.2769,
557
+ "eval_samples_per_second": 26.55,
558
+ "eval_steps_per_second": 26.55,
559
+ "eval_wer": 1.0210185376610847,
560
  "step": 800
561
  },
562
  {
563
  "epoch": 5.59,
564
  "learning_rate": 8.070000000000001e-05,
565
+ "loss": 2.5776,
566
  "step": 810
567
  },
568
  {
569
  "epoch": 5.66,
570
  "learning_rate": 8.17e-05,
571
+ "loss": 2.4746,
572
  "step": 820
573
  },
574
  {
575
  "epoch": 5.72,
576
  "learning_rate": 8.27e-05,
577
+ "loss": 2.5199,
578
  "step": 830
579
  },
580
  {
581
  "epoch": 5.79,
582
  "learning_rate": 8.37e-05,
583
+ "loss": 2.6432,
584
  "step": 840
585
  },
586
  {
587
  "epoch": 5.86,
588
  "learning_rate": 8.47e-05,
589
+ "loss": 2.4396,
590
  "step": 850
591
  },
592
  {
593
  "epoch": 5.93,
594
  "learning_rate": 8.57e-05,
595
+ "loss": 2.4539,
596
  "step": 860
597
  },
598
  {
599
  "epoch": 6.0,
600
  "learning_rate": 8.67e-05,
601
+ "loss": 2.5582,
602
  "step": 870
603
  },
604
  {
605
  "epoch": 6.07,
606
  "learning_rate": 8.77e-05,
607
+ "loss": 2.5219,
608
  "step": 880
609
  },
610
  {
611
  "epoch": 6.14,
612
  "learning_rate": 8.87e-05,
613
+ "loss": 2.4092,
614
  "step": 890
615
  },
616
  {
617
  "epoch": 6.21,
618
  "learning_rate": 8.970000000000001e-05,
619
+ "loss": 2.3963,
620
  "step": 900
621
  },
622
  {
623
  "epoch": 6.21,
624
+ "eval_loss": 2.466503858566284,
625
+ "eval_runtime": 63.3462,
626
+ "eval_samples_per_second": 26.521,
627
+ "eval_steps_per_second": 26.521,
628
+ "eval_wer": 1.0590586451657364,
629
  "step": 900
630
  },
631
  {
632
  "epoch": 6.28,
633
  "learning_rate": 9.070000000000001e-05,
634
+ "loss": 2.5834,
635
  "step": 910
636
  },
637
  {
638
  "epoch": 6.34,
639
  "learning_rate": 9.17e-05,
640
+ "loss": 2.4385,
641
  "step": 920
642
  },
643
  {
644
  "epoch": 6.41,
645
  "learning_rate": 9.27e-05,
646
+ "loss": 2.3945,
647
  "step": 930
648
  },
649
  {
650
  "epoch": 6.48,
651
  "learning_rate": 9.370000000000001e-05,
652
+ "loss": 2.4617,
653
  "step": 940
654
  },
655
  {
656
  "epoch": 6.55,
657
  "learning_rate": 9.47e-05,
658
+ "loss": 2.5169,
659
  "step": 950
660
  },
661
  {
662
  "epoch": 6.62,
663
  "learning_rate": 9.57e-05,
664
+ "loss": 2.2648,
665
  "step": 960
666
  },
667
  {
668
  "epoch": 6.69,
669
  "learning_rate": 9.67e-05,
670
+ "loss": 2.2494,
671
  "step": 970
672
  },
673
  {
674
  "epoch": 6.76,
675
  "learning_rate": 9.77e-05,
676
+ "loss": 2.5194,
677
  "step": 980
678
  },
679
  {
680
  "epoch": 6.83,
681
  "learning_rate": 9.87e-05,
682
+ "loss": 2.3886,
683
  "step": 990
684
  },
685
  {
686
  "epoch": 6.9,
687
  "learning_rate": 9.970000000000001e-05,
688
+ "loss": 2.1447,
689
  "step": 1000
690
  },
691
  {
692
  "epoch": 6.9,
693
+ "eval_loss": 2.2791571617126465,
694
+ "eval_runtime": 63.8981,
695
+ "eval_samples_per_second": 26.292,
696
+ "eval_steps_per_second": 26.292,
697
+ "eval_wer": 0.9847701743504927,
698
  "step": 1000
699
  },
700
  {
701
  "epoch": 6.97,
702
  "learning_rate": 9.963157894736843e-05,
703
+ "loss": 2.2952,
704
  "step": 1010
705
  },
706
  {
707
  "epoch": 7.03,
708
  "learning_rate": 9.910526315789475e-05,
709
+ "loss": 2.4979,
710
  "step": 1020
711
  },
712
  {
713
  "epoch": 7.1,
714
  "learning_rate": 9.857894736842106e-05,
715
+ "loss": 2.1919,
716
  "step": 1030
717
  },
718
  {
719
  "epoch": 7.17,
720
  "learning_rate": 9.805263157894737e-05,
721
+ "loss": 2.1653,
722
  "step": 1040
723
  },
724
  {
725
  "epoch": 7.24,
726
  "learning_rate": 9.752631578947369e-05,
727
+ "loss": 2.3318,
728
  "step": 1050
729
  },
730
  {
731
  "epoch": 7.31,
732
  "learning_rate": 9.7e-05,
733
+ "loss": 2.3904,
734
  "step": 1060
735
  },
736
  {
737
  "epoch": 7.38,
738
  "learning_rate": 9.647368421052631e-05,
739
+ "loss": 2.0835,
740
  "step": 1070
741
  },
742
  {
743
  "epoch": 7.45,
744
  "learning_rate": 9.594736842105264e-05,
745
+ "loss": 2.146,
746
  "step": 1080
747
  },
748
  {
749
  "epoch": 7.52,
750
  "learning_rate": 9.542105263157895e-05,
751
+ "loss": 2.4565,
752
  "step": 1090
753
  },
754
  {
755
  "epoch": 7.59,
756
  "learning_rate": 9.489473684210527e-05,
757
+ "loss": 2.2719,
758
  "step": 1100
759
  },
760
  {
761
  "epoch": 7.59,
762
+ "eval_loss": 2.2237350940704346,
763
+ "eval_runtime": 63.7312,
764
+ "eval_samples_per_second": 26.361,
765
+ "eval_steps_per_second": 26.361,
766
+ "eval_wer": 0.9464544138929089,
767
  "step": 1100
768
  },
769
  {
770
  "epoch": 7.66,
771
  "learning_rate": 9.436842105263158e-05,
772
+ "loss": 1.9881,
773
  "step": 1110
774
  },
775
  {
776
  "epoch": 7.72,
777
  "learning_rate": 9.384210526315789e-05,
778
+ "loss": 2.1616,
779
  "step": 1120
780
  },
781
  {
782
  "epoch": 7.79,
783
  "learning_rate": 9.331578947368422e-05,
784
+ "loss": 2.3746,
785
  "step": 1130
786
  },
787
  {
788
  "epoch": 7.86,
789
  "learning_rate": 9.278947368421053e-05,
790
+ "loss": 1.9976,
791
  "step": 1140
792
  },
793
  {
794
  "epoch": 7.93,
795
  "learning_rate": 9.226315789473686e-05,
796
+ "loss": 2.024,
797
  "step": 1150
798
  },
799
  {
800
  "epoch": 8.0,
801
  "learning_rate": 9.173684210526317e-05,
802
+ "loss": 2.363,
803
  "step": 1160
804
  },
805
  {
806
  "epoch": 8.07,
807
  "learning_rate": 9.121052631578948e-05,
808
+ "loss": 2.2491,
809
  "step": 1170
810
  },
811
  {
812
  "epoch": 8.14,
813
  "learning_rate": 9.06842105263158e-05,
814
+ "loss": 1.9415,
815
  "step": 1180
816
  },
817
  {
818
  "epoch": 8.21,
819
  "learning_rate": 9.015789473684211e-05,
820
+ "loss": 2.0498,
821
  "step": 1190
822
  },
823
  {
824
  "epoch": 8.28,
825
  "learning_rate": 8.963157894736842e-05,
826
+ "loss": 2.3629,
827
  "step": 1200
828
  },
829
  {
830
  "epoch": 8.28,
831
+ "eval_loss": 2.1057612895965576,
832
+ "eval_runtime": 63.7855,
833
+ "eval_samples_per_second": 26.338,
834
+ "eval_steps_per_second": 26.338,
835
+ "eval_wer": 0.8907036041623596,
836
  "step": 1200
837
  },
838
  {
839
  "epoch": 8.34,
840
  "learning_rate": 8.910526315789474e-05,
841
+ "loss": 1.94,
842
  "step": 1210
843
  },
844
  {
845
  "epoch": 8.41,
846
  "learning_rate": 8.857894736842106e-05,
847
+ "loss": 1.8996,
848
  "step": 1220
849
  },
850
  {
851
  "epoch": 8.48,
852
  "learning_rate": 8.805263157894737e-05,
853
+ "loss": 2.2006,
854
  "step": 1230
855
  },
856
  {
857
  "epoch": 8.55,
858
  "learning_rate": 8.752631578947369e-05,
859
+ "loss": 2.2553,
860
  "step": 1240
861
  },
862
  {
863
  "epoch": 8.62,
864
  "learning_rate": 8.7e-05,
865
+ "loss": 1.8448,
866
  "step": 1250
867
  },
868
  {
869
  "epoch": 8.69,
870
  "learning_rate": 8.647368421052631e-05,
871
+ "loss": 1.9506,
872
  "step": 1260
873
  },
874
  {
875
  "epoch": 8.76,
876
  "learning_rate": 8.594736842105263e-05,
877
+ "loss": 2.3542,
878
  "step": 1270
879
  },
880
  {
881
  "epoch": 8.83,
882
  "learning_rate": 8.542105263157894e-05,
883
+ "loss": 2.0478,
884
  "step": 1280
885
  },
886
  {
887
  "epoch": 8.9,
888
  "learning_rate": 8.489473684210527e-05,
889
+ "loss": 1.7948,
890
  "step": 1290
891
  },
892
  {
893
  "epoch": 8.97,
894
  "learning_rate": 8.436842105263158e-05,
895
+ "loss": 2.0913,
896
  "step": 1300
897
  },
898
  {
899
  "epoch": 8.97,
900
+ "eval_loss": 2.0112950801849365,
901
+ "eval_runtime": 63.7547,
902
+ "eval_samples_per_second": 26.351,
903
+ "eval_steps_per_second": 26.351,
904
+ "eval_wer": 0.9069671283853629,
905
  "step": 1300
906
  },
907
  {
908
  "epoch": 9.03,
909
  "learning_rate": 8.38421052631579e-05,
910
+ "loss": 2.2877,
911
  "step": 1310
912
  },
913
  {
914
  "epoch": 9.1,
915
  "learning_rate": 8.331578947368422e-05,
916
+ "loss": 1.7645,
917
  "step": 1320
918
  },
919
  {
920
  "epoch": 9.17,
921
  "learning_rate": 8.278947368421053e-05,
922
+ "loss": 1.7994,
923
  "step": 1330
924
  },
925
  {
926
  "epoch": 9.24,
927
  "learning_rate": 8.226315789473684e-05,
928
+ "loss": 2.166,
929
  "step": 1340
930
  },
931
  {
932
  "epoch": 9.31,
933
  "learning_rate": 8.173684210526317e-05,
934
+ "loss": 2.1645,
935
  "step": 1350
936
  },
937
  {
938
  "epoch": 9.38,
939
  "learning_rate": 8.121052631578948e-05,
940
+ "loss": 1.8409,
941
  "step": 1360
942
  },
943
  {
944
  "epoch": 9.45,
945
  "learning_rate": 8.06842105263158e-05,
946
+ "loss": 1.9361,
947
  "step": 1370
948
  },
949
  {
950
  "epoch": 9.52,
951
  "learning_rate": 8.015789473684211e-05,
952
+ "loss": 2.2377,
953
  "step": 1380
954
  },
955
  {
956
  "epoch": 9.59,
957
  "learning_rate": 7.963157894736842e-05,
958
+ "loss": 1.8667,
959
  "step": 1390
960
  },
961
  {
962
  "epoch": 9.66,
963
  "learning_rate": 7.910526315789474e-05,
964
+ "loss": 1.8334,
965
  "step": 1400
966
  },
967
  {
968
  "epoch": 9.66,
969
+ "eval_loss": 1.946588397026062,
970
+ "eval_runtime": 63.4101,
971
+ "eval_samples_per_second": 26.494,
972
+ "eval_steps_per_second": 26.494,
973
+ "eval_wer": 0.8177244848735442,
974
  "step": 1400
975
  },
976
  {
977
  "epoch": 9.72,
978
  "learning_rate": 7.857894736842105e-05,
979
+ "loss": 2.1079,
980
  "step": 1410
981
  },
982
  {
983
  "epoch": 9.79,
984
  "learning_rate": 7.805263157894738e-05,
985
+ "loss": 2.2388,
986
  "step": 1420
987
  },
988
  {
989
  "epoch": 9.86,
990
  "learning_rate": 7.752631578947369e-05,
991
+ "loss": 1.7457,
992
  "step": 1430
993
  },
994
  {
995
  "epoch": 9.93,
996
  "learning_rate": 7.7e-05,
997
+ "loss": 1.8413,
998
  "step": 1440
999
  },
1000
  {
1001
  "epoch": 10.0,
1002
  "learning_rate": 7.647368421052631e-05,
1003
+ "loss": 2.1608,
1004
  "step": 1450
1005
  },
1006
  {
1007
  "epoch": 10.07,
1008
  "learning_rate": 7.594736842105263e-05,
1009
+ "loss": 2.023,
1010
  "step": 1460
1011
  },
1012
  {
1013
  "epoch": 10.14,
1014
  "learning_rate": 7.542105263157895e-05,
1015
+ "loss": 1.6968,
1016
  "step": 1470
1017
  },
1018
  {
1019
  "epoch": 10.21,
1020
  "learning_rate": 7.489473684210527e-05,
1021
+ "loss": 1.8525,
1022
  "step": 1480
1023
  },
1024
  {
1025
  "epoch": 10.28,
1026
  "learning_rate": 7.43684210526316e-05,
1027
+ "loss": 2.2802,
1028
  "step": 1490
1029
  },
1030
  {
1031
  "epoch": 10.34,
1032
  "learning_rate": 7.38421052631579e-05,
1033
+ "loss": 1.6608,
1034
  "step": 1500
1035
  },
1036
  {
1037
  "epoch": 10.34,
1038
+ "eval_loss": 1.921738862991333,
1039
+ "eval_runtime": 63.4326,
1040
+ "eval_samples_per_second": 26.485,
1041
+ "eval_steps_per_second": 26.485,
1042
+ "eval_wer": 0.869753979739508,
1043
  "step": 1500
1044
  },
1045
  {
1046
  "epoch": 10.41,
1047
  "learning_rate": 7.331578947368422e-05,
1048
+ "loss": 1.7294,
1049
  "step": 1510
1050
  },
1051
  {
1052
  "epoch": 10.48,
1053
  "learning_rate": 7.278947368421053e-05,
1054
+ "loss": 2.0241,
1055
  "step": 1520
1056
  },
1057
  {
1058
  "epoch": 10.55,
1059
  "learning_rate": 7.226315789473685e-05,
1060
+ "loss": 2.1875,
1061
  "step": 1530
1062
  },
1063
  {
1064
  "epoch": 10.62,
1065
  "learning_rate": 7.173684210526316e-05,
1066
+ "loss": 1.7529,
1067
  "step": 1540
1068
  },
1069
  {
1070
  "epoch": 10.69,
1071
  "learning_rate": 7.121052631578947e-05,
1072
+ "loss": 1.9517,
1073
  "step": 1550
1074
  },
1075
  {
1076
  "epoch": 10.76,
1077
  "learning_rate": 7.06842105263158e-05,
1078
+ "loss": 2.2139,
1079
  "step": 1560
1080
  },
1081
  {
1082
  "epoch": 10.83,
1083
  "learning_rate": 7.015789473684211e-05,
1084
+ "loss": 1.8942,
1085
  "step": 1570
1086
  },
1087
  {
1088
  "epoch": 10.9,
1089
  "learning_rate": 6.963157894736842e-05,
1090
+ "loss": 1.7269,
1091
  "step": 1580
1092
  },
1093
  {
1094
  "epoch": 10.97,
1095
  "learning_rate": 6.910526315789474e-05,
1096
+ "loss": 1.8851,
1097
  "step": 1590
1098
  },
1099
  {
1100
  "epoch": 11.03,
1101
  "learning_rate": 6.857894736842105e-05,
1102
+ "loss": 2.2194,
1103
  "step": 1600
1104
  },
1105
  {
1106
  "epoch": 11.03,
1107
+ "eval_loss": 1.9090718030929565,
1108
+ "eval_runtime": 63.6141,
1109
+ "eval_samples_per_second": 26.409,
1110
+ "eval_steps_per_second": 26.409,
1111
+ "eval_wer": 0.8727172489835298,
1112
  "step": 1600
1113
  },
1114
  {
1115
  "epoch": 11.1,
1116
  "learning_rate": 6.805263157894736e-05,
1117
+ "loss": 1.7286,
1118
  "step": 1610
1119
  },
1120
  {
1121
  "epoch": 11.17,
1122
  "learning_rate": 6.752631578947368e-05,
1123
+ "loss": 1.7089,
1124
  "step": 1620
1125
  },
1126
  {
1127
  "epoch": 11.24,
1128
  "learning_rate": 6.7e-05,
1129
+ "loss": 2.0165,
1130
  "step": 1630
1131
  },
1132
  {
1133
  "epoch": 11.31,
1134
  "learning_rate": 6.647368421052632e-05,
1135
+ "loss": 2.08,
1136
  "step": 1640
1137
  },
1138
  {
1139
  "epoch": 11.38,
1140
  "learning_rate": 6.594736842105264e-05,
1141
+ "loss": 1.7116,
1142
  "step": 1650
1143
  },
1144
  {
1145
  "epoch": 11.45,
1146
  "learning_rate": 6.542105263157895e-05,
1147
+ "loss": 1.8577,
1148
  "step": 1660
1149
  },
1150
  {
1151
  "epoch": 11.52,
1152
  "learning_rate": 6.489473684210527e-05,
1153
+ "loss": 2.2062,
1154
  "step": 1670
1155
  },
1156
  {
1157
  "epoch": 11.59,
1158
  "learning_rate": 6.436842105263158e-05,
1159
+ "loss": 1.7554,
1160
  "step": 1680
1161
  },
1162
  {
1163
  "epoch": 11.66,
1164
  "learning_rate": 6.384210526315791e-05,
1165
+ "loss": 1.6673,
1166
  "step": 1690
1167
  },
1168
  {
1169
  "epoch": 11.72,
1170
  "learning_rate": 6.331578947368422e-05,
1171
+ "loss": 1.9002,
1172
  "step": 1700
1173
  },
1174
  {
1175
  "epoch": 11.72,
1176
+ "eval_loss": 1.8745884895324707,
1177
+ "eval_runtime": 63.6985,
1178
+ "eval_samples_per_second": 26.374,
1179
+ "eval_steps_per_second": 26.374,
1180
+ "eval_wer": 0.8332299634759838,
1181
  "step": 1700
1182
  },
1183
  {
1184
  "epoch": 11.79,
1185
  "learning_rate": 6.278947368421053e-05,
1186
+ "loss": 2.1405,
1187
  "step": 1710
1188
  },
1189
  {
1190
  "epoch": 11.86,
1191
  "learning_rate": 6.226315789473685e-05,
1192
+ "loss": 1.6339,
1193
  "step": 1720
1194
  },
1195
  {
1196
  "epoch": 11.93,
1197
  "learning_rate": 6.173684210526316e-05,
1198
+ "loss": 1.7832,
1199
  "step": 1730
1200
  },
1201
  {
1202
  "epoch": 12.0,
1203
  "learning_rate": 6.121052631578947e-05,
1204
+ "loss": 2.1309,
1205
  "step": 1740
1206
  },
1207
  {
1208
  "epoch": 12.07,
1209
  "learning_rate": 6.0684210526315785e-05,
1210
+ "loss": 1.895,
1211
  "step": 1750
1212
  },
1213
  {
1214
  "epoch": 12.14,
1215
  "learning_rate": 6.015789473684211e-05,
1216
+ "loss": 1.6297,
1217
  "step": 1760
1218
  },
1219
  {
1220
  "epoch": 12.21,
1221
  "learning_rate": 5.9631578947368425e-05,
1222
+ "loss": 1.8337,
1223
  "step": 1770
1224
  },
1225
  {
1226
  "epoch": 12.28,
1227
  "learning_rate": 5.9105263157894744e-05,
1228
+ "loss": 2.1476,
1229
  "step": 1780
1230
  },
1231
  {
1232
  "epoch": 12.34,
1233
  "learning_rate": 5.857894736842106e-05,
1234
+ "loss": 1.6987,
1235
  "step": 1790
1236
  },
1237
  {
1238
  "epoch": 12.41,
1239
  "learning_rate": 5.805263157894737e-05,
1240
+ "loss": 1.6268,
1241
  "step": 1800
1242
  },
1243
  {
1244
  "epoch": 12.41,
1245
+ "eval_loss": 1.878185510635376,
1246
+ "eval_runtime": 63.513,
1247
+ "eval_samples_per_second": 26.451,
1248
+ "eval_steps_per_second": 26.451,
1249
+ "eval_wer": 0.795052029494866,
1250
  "step": 1800
1251
  },
1252
  {
1253
  "epoch": 12.48,
1254
  "learning_rate": 5.752631578947368e-05,
1255
+ "loss": 1.9851,
1256
  "step": 1810
1257
  },
1258
  {
1259
  "epoch": 12.55,
1260
  "learning_rate": 5.6999999999999996e-05,
1261
+ "loss": 2.0705,
1262
  "step": 1820
1263
  },
1264
  {
1265
  "epoch": 12.62,
1266
  "learning_rate": 5.647368421052632e-05,
1267
+ "loss": 1.609,
1268
  "step": 1830
1269
  },
1270
  {
1271
  "epoch": 12.69,
1272
  "learning_rate": 5.5947368421052636e-05,
1273
+ "loss": 1.7078,
1274
  "step": 1840
1275
  },
1276
  {
1277
  "epoch": 12.76,
1278
  "learning_rate": 5.542105263157895e-05,
1279
+ "loss": 2.0835,
1280
  "step": 1850
1281
  },
1282
  {
1283
  "epoch": 12.83,
1284
  "learning_rate": 5.489473684210527e-05,
1285
+ "loss": 1.8212,
1286
  "step": 1860
1287
  },
1288
  {
1289
  "epoch": 12.9,
1290
  "learning_rate": 5.436842105263158e-05,
1291
+ "loss": 1.6314,
1292
  "step": 1870
1293
  },
1294
  {
1295
  "epoch": 12.97,
1296
  "learning_rate": 5.3842105263157895e-05,
1297
+ "loss": 1.8577,
1298
  "step": 1880
1299
  },
1300
  {
1301
  "epoch": 13.03,
1302
  "learning_rate": 5.331578947368421e-05,
1303
+ "loss": 2.1196,
1304
  "step": 1890
1305
  },
1306
  {
1307
  "epoch": 13.1,
1308
  "learning_rate": 5.2789473684210534e-05,
1309
+ "loss": 1.6455,
1310
  "step": 1900
1311
  },
1312
  {
1313
  "epoch": 13.1,
1314
+ "eval_loss": 1.8229954242706299,
1315
+ "eval_runtime": 64.0654,
1316
+ "eval_samples_per_second": 26.223,
1317
+ "eval_steps_per_second": 26.223,
1318
+ "eval_wer": 0.8224794983116257,
1319
  "step": 1900
1320
  },
1321
  {
1322
  "epoch": 13.17,
1323
  "learning_rate": 5.226315789473685e-05,
1324
+ "loss": 1.7046,
1325
  "step": 1910
1326
  },
1327
  {
1328
  "epoch": 13.24,
1329
  "learning_rate": 5.173684210526316e-05,
1330
+ "loss": 1.9167,
1331
  "step": 1920
1332
  },
1333
  {
1334
  "epoch": 13.31,
1335
  "learning_rate": 5.121052631578947e-05,
1336
+ "loss": 1.909,
1337
  "step": 1930
1338
  },
1339
  {
1340
  "epoch": 13.38,
1341
  "learning_rate": 5.068421052631579e-05,
1342
+ "loss": 1.6338,
1343
  "step": 1940
1344
  },
1345
  {
1346
  "epoch": 13.45,
1347
  "learning_rate": 5.0157894736842106e-05,
1348
+ "loss": 1.7069,
1349
  "step": 1950
1350
  },
1351
  {
1352
  "epoch": 13.52,
1353
  "learning_rate": 4.9631578947368426e-05,
1354
+ "loss": 2.1442,
1355
  "step": 1960
1356
  },
1357
  {
1358
  "epoch": 13.59,
1359
  "learning_rate": 4.910526315789474e-05,
1360
+ "loss": 1.7517,
1361
  "step": 1970
1362
  },
1363
  {
1364
  "epoch": 13.66,
1365
  "learning_rate": 4.857894736842106e-05,
1366
+ "loss": 1.5841,
1367
  "step": 1980
1368
  },
1369
  {
1370
  "epoch": 13.72,
1371
  "learning_rate": 4.805263157894737e-05,
1372
+ "loss": 1.9493,
1373
  "step": 1990
1374
  },
1375
  {
1376
  "epoch": 13.79,
1377
  "learning_rate": 4.7526315789473684e-05,
1378
+ "loss": 2.0308,
1379
  "step": 2000
1380
  },
1381
  {
1382
  "epoch": 13.79,
1383
+ "eval_loss": 1.8066883087158203,
1384
+ "eval_runtime": 63.421,
1385
+ "eval_samples_per_second": 26.49,
1386
+ "eval_steps_per_second": 26.49,
1387
+ "eval_wer": 0.8560402453311281,
1388
  "step": 2000
1389
  },
1390
  {
1391
  "epoch": 13.86,
1392
  "learning_rate": 4.7e-05,
1393
+ "loss": 1.5627,
1394
  "step": 2010
1395
  },
1396
  {
 
1402
  {
1403
  "epoch": 14.0,
1404
  "learning_rate": 4.594736842105264e-05,
1405
+ "loss": 1.9153,
1406
  "step": 2030
1407
  },
1408
  {
1409
  "epoch": 14.07,
1410
  "learning_rate": 4.542105263157895e-05,
1411
+ "loss": 1.8294,
1412
  "step": 2040
1413
  },
1414
  {
1415
  "epoch": 14.14,
1416
  "learning_rate": 4.489473684210527e-05,
1417
+ "loss": 1.513,
1418
  "step": 2050
1419
  },
1420
  {
1421
  "epoch": 14.21,
1422
  "learning_rate": 4.436842105263158e-05,
1423
+ "loss": 1.7689,
1424
  "step": 2060
1425
  },
1426
  {
1427
  "epoch": 14.28,
1428
  "learning_rate": 4.3842105263157895e-05,
1429
+ "loss": 2.1114,
1430
  "step": 2070
1431
  },
1432
  {
1433
  "epoch": 14.34,
1434
  "learning_rate": 4.3315789473684215e-05,
1435
+ "loss": 1.6554,
1436
  "step": 2080
1437
  },
1438
  {
1439
  "epoch": 14.41,
1440
  "learning_rate": 4.278947368421053e-05,
1441
+ "loss": 1.639,
1442
  "step": 2090
1443
  },
1444
  {
1445
  "epoch": 14.48,
1446
  "learning_rate": 4.226315789473684e-05,
1447
+ "loss": 1.855,
1448
  "step": 2100
1449
  },
1450
  {
1451
  "epoch": 14.48,
1452
+ "eval_loss": 1.8128886222839355,
1453
+ "eval_runtime": 63.7285,
1454
+ "eval_samples_per_second": 26.362,
1455
+ "eval_steps_per_second": 26.362,
1456
+ "eval_wer": 0.8176555716353111,
1457
  "step": 2100
1458
  },
1459
  {
1460
  "epoch": 14.55,
1461
  "learning_rate": 4.1736842105263154e-05,
1462
+ "loss": 1.9404,
1463
  "step": 2110
1464
  },
1465
  {
1466
  "epoch": 14.62,
1467
  "learning_rate": 4.1210526315789474e-05,
1468
+ "loss": 1.5515,
1469
  "step": 2120
1470
  },
1471
  {
1472
  "epoch": 14.69,
1473
  "learning_rate": 4.0684210526315794e-05,
1474
+ "loss": 1.7568,
1475
  "step": 2130
1476
  },
1477
  {
1478
  "epoch": 14.76,
1479
  "learning_rate": 4.015789473684211e-05,
1480
+ "loss": 2.0269,
1481
  "step": 2140
1482
  },
1483
  {
1484
  "epoch": 14.83,
1485
  "learning_rate": 3.9631578947368426e-05,
1486
+ "loss": 1.7064,
1487
  "step": 2150
1488
  },
1489
  {
1490
  "epoch": 14.9,
1491
  "learning_rate": 3.910526315789474e-05,
1492
+ "loss": 1.4851,
1493
  "step": 2160
1494
  },
1495
  {
1496
  "epoch": 14.97,
1497
  "learning_rate": 3.857894736842105e-05,
1498
+ "loss": 1.69,
1499
  "step": 2170
1500
  },
1501
  {
1502
  "epoch": 15.03,
1503
  "learning_rate": 3.8052631578947365e-05,
1504
+ "loss": 2.0147,
1505
  "step": 2180
1506
  },
1507
  {
1508
  "epoch": 15.1,
1509
  "learning_rate": 3.7526315789473685e-05,
1510
+ "loss": 1.5575,
1511
  "step": 2190
1512
  },
1513
  {
1514
  "epoch": 15.17,
1515
  "learning_rate": 3.7e-05,
1516
+ "loss": 1.5901,
1517
  "step": 2200
1518
  },
1519
  {
1520
  "epoch": 15.17,
1521
+ "eval_loss": 1.7891418933868408,
1522
+ "eval_runtime": 63.2021,
1523
+ "eval_samples_per_second": 26.581,
1524
+ "eval_steps_per_second": 26.581,
1525
+ "eval_wer": 0.8367445386258701,
1526
  "step": 2200
1527
  },
1528
  {
1529
  "epoch": 15.24,
1530
  "learning_rate": 3.647368421052632e-05,
1531
+ "loss": 1.8573,
1532
  "step": 2210
1533
  },
1534
  {
1535
  "epoch": 15.31,
1536
  "learning_rate": 3.594736842105264e-05,
1537
+ "loss": 1.7881,
1538
  "step": 2220
1539
  },
1540
  {
1541
  "epoch": 15.38,
1542
  "learning_rate": 3.542105263157895e-05,
1543
+ "loss": 1.5679,
1544
  "step": 2230
1545
  },
1546
  {
1547
  "epoch": 15.45,
1548
  "learning_rate": 3.4894736842105264e-05,
1549
+ "loss": 1.6162,
1550
  "step": 2240
1551
  },
1552
  {
1553
  "epoch": 15.52,
1554
  "learning_rate": 3.436842105263158e-05,
1555
+ "loss": 2.0334,
1556
  "step": 2250
1557
  },
1558
  {
1559
  "epoch": 15.59,
1560
  "learning_rate": 3.3842105263157896e-05,
1561
+ "loss": 1.5751,
1562
  "step": 2260
1563
  },
1564
  {
1565
  "epoch": 15.66,
1566
  "learning_rate": 3.331578947368421e-05,
1567
+ "loss": 1.5643,
1568
  "step": 2270
1569
  },
1570
  {
1571
  "epoch": 15.72,
1572
  "learning_rate": 3.278947368421052e-05,
1573
+ "loss": 1.8435,
1574
  "step": 2280
1575
  },
1576
  {
1577
  "epoch": 15.79,
1578
  "learning_rate": 3.226315789473684e-05,
1579
+ "loss": 2.0355,
1580
  "step": 2290
1581
  },
1582
  {
1583
  "epoch": 15.86,
1584
  "learning_rate": 3.173684210526316e-05,
1585
+ "loss": 1.4848,
1586
  "step": 2300
1587
  },
1588
  {
1589
  "epoch": 15.86,
1590
+ "eval_loss": 1.7820757627487183,
1591
+ "eval_runtime": 63.5201,
1592
+ "eval_samples_per_second": 26.448,
1593
+ "eval_steps_per_second": 26.448,
1594
+ "eval_wer": 0.8201364482117015,
1595
  "step": 2300
1596
  },
1597
  {
1598
  "epoch": 15.93,
1599
  "learning_rate": 3.1210526315789475e-05,
1600
+ "loss": 1.5905,
1601
  "step": 2310
1602
  },
1603
  {
1604
  "epoch": 16.0,
1605
  "learning_rate": 3.0684210526315795e-05,
1606
+ "loss": 1.9714,
1607
  "step": 2320
1608
  },
1609
  {
1610
  "epoch": 16.07,
1611
  "learning_rate": 3.0157894736842108e-05,
1612
+ "loss": 1.7952,
1613
  "step": 2330
1614
  },
1615
  {
1616
  "epoch": 16.14,
1617
  "learning_rate": 2.963157894736842e-05,
1618
+ "loss": 1.4772,
1619
  "step": 2340
1620
  },
1621
  {
1622
  "epoch": 16.21,
1623
  "learning_rate": 2.910526315789474e-05,
1624
+ "loss": 1.6484,
1625
  "step": 2350
1626
  },
1627
  {
1628
  "epoch": 16.28,
1629
  "learning_rate": 2.8578947368421057e-05,
1630
+ "loss": 2.0105,
1631
  "step": 2360
1632
  },
1633
  {
1634
  "epoch": 16.34,
1635
  "learning_rate": 2.805263157894737e-05,
1636
+ "loss": 1.5344,
1637
  "step": 2370
1638
  },
1639
  {
1640
  "epoch": 16.41,
1641
  "learning_rate": 2.7526315789473683e-05,
1642
+ "loss": 1.5725,
1643
  "step": 2380
1644
  },
1645
  {
1646
  "epoch": 16.48,
1647
  "learning_rate": 2.7000000000000002e-05,
1648
+ "loss": 1.7699,
1649
  "step": 2390
1650
  },
1651
  {
1652
  "epoch": 16.55,
1653
  "learning_rate": 2.647368421052632e-05,
1654
+ "loss": 1.8754,
1655
  "step": 2400
1656
  },
1657
  {
1658
  "epoch": 16.55,
1659
+ "eval_loss": 1.7700049877166748,
1660
+ "eval_runtime": 63.9222,
1661
+ "eval_samples_per_second": 26.282,
1662
+ "eval_steps_per_second": 26.282,
1663
+ "eval_wer": 0.8137275170560264,
1664
  "step": 2400
1665
  },
1666
  {
1667
  "epoch": 16.62,
1668
  "learning_rate": 2.5947368421052632e-05,
1669
+ "loss": 1.4734,
1670
  "step": 2410
1671
  },
1672
  {
1673
  "epoch": 16.69,
1674
  "learning_rate": 2.542105263157895e-05,
1675
+ "loss": 1.6307,
1676
  "step": 2420
1677
  },
1678
  {
1679
  "epoch": 16.76,
1680
  "learning_rate": 2.4894736842105264e-05,
1681
+ "loss": 1.9546,
1682
  "step": 2430
1683
  },
1684
  {
1685
  "epoch": 16.83,
1686
  "learning_rate": 2.436842105263158e-05,
1687
+ "loss": 1.6538,
1688
  "step": 2440
1689
  },
1690
  {
1691
  "epoch": 16.9,
1692
  "learning_rate": 2.3842105263157897e-05,
1693
+ "loss": 1.5129,
1694
  "step": 2450
1695
  },
1696
  {
1697
  "epoch": 16.97,
1698
  "learning_rate": 2.331578947368421e-05,
1699
+ "loss": 1.667,
1700
  "step": 2460
1701
  },
1702
  {
1703
  "epoch": 17.03,
1704
  "learning_rate": 2.2789473684210527e-05,
1705
+ "loss": 1.966,
1706
  "step": 2470
1707
  },
1708
  {
1709
  "epoch": 17.1,
1710
  "learning_rate": 2.2263157894736843e-05,
1711
+ "loss": 1.4716,
1712
  "step": 2480
1713
  },
1714
  {
1715
  "epoch": 17.17,
1716
  "learning_rate": 2.173684210526316e-05,
1717
+ "loss": 1.5158,
1718
  "step": 2490
1719
  },
1720
  {
1721
  "epoch": 17.24,
1722
  "learning_rate": 2.1210526315789476e-05,
1723
+ "loss": 1.7975,
1724
  "step": 2500
1725
  },
1726
  {
1727
  "epoch": 17.24,
1728
+ "eval_loss": 1.779523253440857,
1729
+ "eval_runtime": 64.1655,
1730
+ "eval_samples_per_second": 26.182,
1731
+ "eval_steps_per_second": 26.182,
1732
+ "eval_wer": 0.8171042657294466,
1733
  "step": 2500
1734
  },
1735
  {
1736
  "epoch": 17.31,
1737
  "learning_rate": 2.068421052631579e-05,
1738
+ "loss": 1.849,
1739
  "step": 2510
1740
  },
1741
  {
1742
  "epoch": 17.38,
1743
  "learning_rate": 2.0157894736842105e-05,
1744
+ "loss": 1.4139,
1745
  "step": 2520
1746
  },
1747
  {
1748
  "epoch": 17.45,
1749
  "learning_rate": 1.963157894736842e-05,
1750
+ "loss": 1.6001,
1751
  "step": 2530
1752
  },
1753
  {
1754
  "epoch": 17.52,
1755
  "learning_rate": 1.9105263157894738e-05,
1756
+ "loss": 1.8943,
1757
  "step": 2540
1758
  },
1759
  {
1760
  "epoch": 17.59,
1761
  "learning_rate": 1.8578947368421054e-05,
1762
+ "loss": 1.5934,
1763
  "step": 2550
1764
  },
1765
  {
1766
  "epoch": 17.66,
1767
  "learning_rate": 1.8052631578947367e-05,
1768
+ "loss": 1.5103,
1769
  "step": 2560
1770
  },
1771
  {
1772
  "epoch": 17.72,
1773
  "learning_rate": 1.7526315789473683e-05,
1774
+ "loss": 1.7289,
1775
  "step": 2570
1776
  },
1777
  {
1778
  "epoch": 17.79,
1779
  "learning_rate": 1.7000000000000003e-05,
1780
+ "loss": 1.8807,
1781
  "step": 2580
1782
  },
1783
  {
1784
  "epoch": 17.86,
1785
  "learning_rate": 1.6473684210526316e-05,
1786
+ "loss": 1.4627,
1787
  "step": 2590
1788
  },
1789
  {
1790
  "epoch": 17.93,
1791
  "learning_rate": 1.5947368421052633e-05,
1792
+ "loss": 1.5194,
1793
  "step": 2600
1794
  },
1795
  {
1796
  "epoch": 17.93,
1797
+ "eval_loss": 1.7605171203613281,
1798
+ "eval_runtime": 63.8146,
1799
+ "eval_samples_per_second": 26.326,
1800
+ "eval_steps_per_second": 26.326,
1801
+ "eval_wer": 0.7976707325477225,
1802
  "step": 2600
1803
  },
1804
  {
1805
  "epoch": 18.0,
1806
  "learning_rate": 1.5421052631578946e-05,
1807
+ "loss": 1.8941,
1808
  "step": 2610
1809
  },
1810
  {
1811
  "epoch": 18.07,
1812
  "learning_rate": 1.4894736842105264e-05,
1813
+ "loss": 1.6843,
1814
  "step": 2620
1815
  },
1816
  {
1817
  "epoch": 18.14,
1818
  "learning_rate": 1.4368421052631582e-05,
1819
+ "loss": 1.3952,
1820
  "step": 2630
1821
  },
1822
  {
1823
  "epoch": 18.21,
1824
  "learning_rate": 1.3842105263157895e-05,
1825
+ "loss": 1.5958,
1826
  "step": 2640
1827
  },
1828
  {
1829
  "epoch": 18.28,
1830
  "learning_rate": 1.3315789473684213e-05,
1831
+ "loss": 1.9271,
1832
  "step": 2650
1833
  },
1834
  {
1835
  "epoch": 18.34,
1836
  "learning_rate": 1.2789473684210526e-05,
1837
+ "loss": 1.4861,
1838
  "step": 2660
1839
  },
1840
  {
1841
  "epoch": 18.41,
1842
  "learning_rate": 1.2263157894736844e-05,
1843
+ "loss": 1.47,
1844
  "step": 2670
1845
  },
1846
  {
1847
  "epoch": 18.48,
1848
  "learning_rate": 1.1736842105263158e-05,
1849
+ "loss": 1.7653,
1850
  "step": 2680
1851
  },
1852
  {
1853
  "epoch": 18.55,
1854
  "learning_rate": 1.1210526315789475e-05,
1855
+ "loss": 1.7886,
1856
  "step": 2690
1857
  },
1858
  {
1859
  "epoch": 18.62,
1860
  "learning_rate": 1.068421052631579e-05,
1861
+ "loss": 1.4374,
1862
  "step": 2700
1863
  },
1864
  {
1865
  "epoch": 18.62,
1866
+ "eval_loss": 1.7529252767562866,
1867
+ "eval_runtime": 63.7187,
1868
+ "eval_samples_per_second": 26.366,
1869
+ "eval_steps_per_second": 26.366,
1870
+ "eval_wer": 0.7978085590241886,
1871
  "step": 2700
1872
  },
1873
  {
1874
  "epoch": 18.69,
1875
  "learning_rate": 1.0157894736842106e-05,
1876
+ "loss": 1.5556,
1877
  "step": 2710
1878
  },
1879
  {
1880
  "epoch": 18.76,
1881
  "learning_rate": 9.631578947368422e-06,
1882
+ "loss": 1.8987,
1883
  "step": 2720
1884
  },
1885
  {
1886
  "epoch": 18.83,
1887
  "learning_rate": 9.105263157894737e-06,
1888
+ "loss": 1.5625,
1889
  "step": 2730
1890
  },
1891
  {
1892
  "epoch": 18.9,
1893
  "learning_rate": 8.578947368421053e-06,
1894
+ "loss": 1.5209,
1895
  "step": 2740
1896
  },
1897
  {
1898
  "epoch": 18.97,
1899
  "learning_rate": 8.052631578947368e-06,
1900
+ "loss": 1.6116,
1901
  "step": 2750
1902
  },
1903
  {
1904
  "epoch": 19.03,
1905
  "learning_rate": 7.526315789473684e-06,
1906
+ "loss": 1.8313,
1907
  "step": 2760
1908
  },
1909
  {
1910
  "epoch": 19.1,
1911
  "learning_rate": 7.000000000000001e-06,
1912
+ "loss": 1.4403,
1913
  "step": 2770
1914
  },
1915
  {
1916
  "epoch": 19.17,
1917
  "learning_rate": 6.473684210526316e-06,
1918
+ "loss": 1.4586,
1919
  "step": 2780
1920
  },
1921
  {
1922
  "epoch": 19.24,
1923
  "learning_rate": 5.947368421052632e-06,
1924
+ "loss": 1.7928,
1925
  "step": 2790
1926
  },
1927
  {
1928
  "epoch": 19.31,
1929
  "learning_rate": 5.421052631578947e-06,
1930
+ "loss": 1.7498,
1931
  "step": 2800
1932
  },
1933
  {
1934
  "epoch": 19.31,
1935
+ "eval_loss": 1.7522045373916626,
1936
+ "eval_runtime": 63.0263,
1937
+ "eval_samples_per_second": 26.656,
1938
+ "eval_steps_per_second": 26.656,
1939
+ "eval_wer": 0.8022879195093378,
1940
  "step": 2800
1941
  },
1942
  {
1943
  "epoch": 19.38,
1944
  "learning_rate": 4.894736842105263e-06,
1945
+ "loss": 1.4375,
1946
  "step": 2810
1947
  },
1948
  {
1949
  "epoch": 19.45,
1950
  "learning_rate": 4.368421052631579e-06,
1951
+ "loss": 1.5455,
1952
  "step": 2820
1953
  },
1954
  {
1955
  "epoch": 19.52,
1956
  "learning_rate": 3.842105263157895e-06,
1957
+ "loss": 1.9311,
1958
  "step": 2830
1959
  },
1960
  {
1961
  "epoch": 19.59,
1962
+ "learning_rate": 3.315789473684211e-06,
1963
+ "loss": 1.4828,
1964
  "step": 2840
1965
  },
1966
  {
1967
  "epoch": 19.66,
1968
+ "learning_rate": 2.7894736842105266e-06,
1969
+ "loss": 1.4371,
1970
  "step": 2850
1971
  },
1972
  {
1973
  "epoch": 19.72,
1974
+ "learning_rate": 2.2631578947368426e-06,
1975
+ "loss": 1.687,
1976
  "step": 2860
1977
  },
1978
  {
1979
  "epoch": 19.79,
1980
+ "learning_rate": 1.7368421052631579e-06,
1981
+ "loss": 1.8411,
1982
  "step": 2870
1983
  },
1984
  {
1985
  "epoch": 19.86,
1986
+ "learning_rate": 1.2105263157894738e-06,
1987
+ "loss": 1.4025,
1988
  "step": 2880
1989
  },
1990
  {
1991
  "epoch": 19.93,
1992
+ "learning_rate": 6.842105263157896e-07,
1993
+ "loss": 1.5635,
1994
  "step": 2890
1995
  },
1996
  {
1997
  "epoch": 20.0,
1998
+ "learning_rate": 1.5789473684210527e-07,
1999
+ "loss": 1.7452,
2000
  "step": 2900
2001
  },
2002
  {
2003
  "epoch": 20.0,
2004
+ "eval_loss": 1.7481727600097656,
2005
+ "eval_runtime": 63.0435,
2006
+ "eval_samples_per_second": 26.648,
2007
+ "eval_steps_per_second": 26.648,
2008
+ "eval_wer": 0.7987044311212184,
2009
  "step": 2900
2010
  },
2011
  {
2012
  "epoch": 20.0,
2013
  "step": 2900,
2014
  "total_flos": 1.1463619274186412e+18,
2015
+ "train_loss": 2.305506002491918,
2016
+ "train_runtime": 4265.6036,
2017
+ "train_samples_per_second": 21.662,
2018
+ "train_steps_per_second": 0.68
2019
  }
2020
  ],
2021
  "max_steps": 2900,