besimray commited on
Commit
07354ea
1 Parent(s): 000eb83

Training in progress, step 110, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a79d6468f9a8226984a35f278d2afc4996fc80bf229f9e9f4696b88b0244d70
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa396bc2eabb66ff27857a10437bfc7c558806c4a68ec1a75dacc99173908d82
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eed3ef08c89e9a07cd3218a687a41dd717cf80833936a581ab6db0cd1f2aabdf
3
  size 170920084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60c2f4152cf6b8df3c7f147c9866f5250416a16de45bd3c857da8ea2318ba659
3
  size 170920084
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fa0933e237e365bbcbd10bdc7014a011ec526d46395768542a06f4bd5bcda7b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ee3a3829b510f41bd584ab9c5fe3e74aab24b4293509d98d00cb95ebef4b7e8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5c84ec0ff3c8c6aa13b25568668096db118f67ce80a9fa015a625446606f15d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:398198b060b9edcfe93ff59de4a929b40cbc42323ec0afb0426f8d7b821a61c1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.2894105911254883,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
- "epoch": 0.028238616307800918,
5
  "eval_steps": 10,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -795,6 +795,84 @@
795
  "eval_samples_per_second": 1.771,
796
  "eval_steps_per_second": 1.771,
797
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
798
  }
799
  ],
800
  "logging_steps": 1,
@@ -809,7 +887,7 @@
809
  "early_stopping_threshold": 0.0
810
  },
811
  "attributes": {
812
- "early_stopping_patience_counter": 2
813
  }
814
  },
815
  "TrainerControl": {
@@ -818,12 +896,12 @@
818
  "should_evaluate": false,
819
  "should_log": false,
820
  "should_save": true,
821
- "should_training_stop": false
822
  },
823
  "attributes": {}
824
  }
825
  },
826
- "total_flos": 7.46010452164608e+16,
827
  "train_batch_size": 1,
828
  "trial_name": null,
829
  "trial_params": null
 
1
  {
2
  "best_metric": 1.2894105911254883,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
+ "epoch": 0.03106247793858101,
5
  "eval_steps": 10,
6
+ "global_step": 110,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
795
  "eval_samples_per_second": 1.771,
796
  "eval_steps_per_second": 1.771,
797
  "step": 100
798
+ },
799
+ {
800
+ "epoch": 0.028521002470878926,
801
+ "grad_norm": 0.8574671745300293,
802
+ "learning_rate": 0.00018345732537213027,
803
+ "loss": 1.9177,
804
+ "step": 101
805
+ },
806
+ {
807
+ "epoch": 0.028803388633956935,
808
+ "grad_norm": 0.9129525423049927,
809
+ "learning_rate": 0.00018310240965131041,
810
+ "loss": 2.4622,
811
+ "step": 102
812
+ },
813
+ {
814
+ "epoch": 0.029085774797034947,
815
+ "grad_norm": 0.8835414052009583,
816
+ "learning_rate": 0.00018274407791591966,
817
+ "loss": 1.051,
818
+ "step": 103
819
+ },
820
+ {
821
+ "epoch": 0.029368160960112955,
822
+ "grad_norm": 0.508120059967041,
823
+ "learning_rate": 0.00018238234489557215,
824
+ "loss": 0.4317,
825
+ "step": 104
826
+ },
827
+ {
828
+ "epoch": 0.029650547123190964,
829
+ "grad_norm": 1.259400725364685,
830
+ "learning_rate": 0.0001820172254596956,
831
+ "loss": 1.9737,
832
+ "step": 105
833
+ },
834
+ {
835
+ "epoch": 0.029932933286268972,
836
+ "grad_norm": 1.45259428024292,
837
+ "learning_rate": 0.00018164873461691986,
838
+ "loss": 0.6959,
839
+ "step": 106
840
+ },
841
+ {
842
+ "epoch": 0.03021531944934698,
843
+ "grad_norm": 0.8846643567085266,
844
+ "learning_rate": 0.00018127688751446027,
845
+ "loss": 1.3487,
846
+ "step": 107
847
+ },
848
+ {
849
+ "epoch": 0.030497705612424993,
850
+ "grad_norm": 0.7302697896957397,
851
+ "learning_rate": 0.00018090169943749476,
852
+ "loss": 1.7445,
853
+ "step": 108
854
+ },
855
+ {
856
+ "epoch": 0.030780091775503,
857
+ "grad_norm": 1.3288211822509766,
858
+ "learning_rate": 0.0001805231858085356,
859
+ "loss": 1.2461,
860
+ "step": 109
861
+ },
862
+ {
863
+ "epoch": 0.03106247793858101,
864
+ "grad_norm": 0.4046940207481384,
865
+ "learning_rate": 0.00018014136218679567,
866
+ "loss": 1.9418,
867
+ "step": 110
868
+ },
869
+ {
870
+ "epoch": 0.03106247793858101,
871
+ "eval_loss": 1.3085497617721558,
872
+ "eval_runtime": 418.6203,
873
+ "eval_samples_per_second": 1.782,
874
+ "eval_steps_per_second": 1.782,
875
+ "step": 110
876
  }
877
  ],
878
  "logging_steps": 1,
 
887
  "early_stopping_threshold": 0.0
888
  },
889
  "attributes": {
890
+ "early_stopping_patience_counter": 3
891
  }
892
  },
893
  "TrainerControl": {
 
896
  "should_evaluate": false,
897
  "should_log": false,
898
  "should_save": true,
899
+ "should_training_stop": true
900
  },
901
  "attributes": {}
902
  }
903
  },
904
+ "total_flos": 8.206114973810688e+16,
905
  "train_batch_size": 1,
906
  "trial_name": null,
907
  "trial_params": null