MohamedAhmedAE
commited on
Commit
•
9896419
1
Parent(s):
d1f9513
Training in progress, step 100900, checkpoint
Browse files
last-checkpoint/adapter_config.json
CHANGED
@@ -20,13 +20,13 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
-
"
|
24 |
"k_proj",
|
25 |
"gate_proj",
|
26 |
"down_proj",
|
27 |
-
"
|
28 |
-
"
|
29 |
-
"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"v_proj",
|
24 |
"k_proj",
|
25 |
"gate_proj",
|
26 |
"down_proj",
|
27 |
+
"q_proj",
|
28 |
+
"up_proj",
|
29 |
+
"o_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5544997664
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ff50e9a0eef14c00f32c5e550257295427f2d666e009aac32472aef43b0c78f
|
3 |
size 5544997664
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 674093138
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b1f3c8b9d1c8514057a760685418307853b2172387d395e371d81516debcc99
|
3 |
size 674093138
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d52e9778ae961a843d4efe5adba669832146332ec663eac9df46d71427724e3
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1dd725a3e5295711459643d6e1204a1d04a7f905cc6416544fa87ecdfb18228
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6657,6 +6657,419 @@
|
|
6657 |
"learning_rate": 1.9990150014305462e-05,
|
6658 |
"loss": 1.5194,
|
6659 |
"step": 95000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6660 |
}
|
6661 |
],
|
6662 |
"logging_steps": 100,
|
@@ -6676,7 +7089,7 @@
|
|
6676 |
"attributes": {}
|
6677 |
}
|
6678 |
},
|
6679 |
-
"total_flos": 1.
|
6680 |
"train_batch_size": 1,
|
6681 |
"trial_name": null,
|
6682 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.07503755596210195,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 100900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6657 |
"learning_rate": 1.9990150014305462e-05,
|
6658 |
"loss": 1.5194,
|
6659 |
"step": 95000
|
6660 |
+
},
|
6661 |
+
{
|
6662 |
+
"epoch": 0.07072419793851234,
|
6663 |
+
"grad_norm": 0.7444689273834229,
|
6664 |
+
"learning_rate": 1.9990129268900848e-05,
|
6665 |
+
"loss": 1.5198,
|
6666 |
+
"step": 95100
|
6667 |
+
},
|
6668 |
+
{
|
6669 |
+
"epoch": 0.07079856618029837,
|
6670 |
+
"grad_norm": 0.9299377202987671,
|
6671 |
+
"learning_rate": 1.9990108501683685e-05,
|
6672 |
+
"loss": 1.5393,
|
6673 |
+
"step": 95200
|
6674 |
+
},
|
6675 |
+
{
|
6676 |
+
"epoch": 0.0708729344220844,
|
6677 |
+
"grad_norm": 0.6611402630805969,
|
6678 |
+
"learning_rate": 1.999008771265401e-05,
|
6679 |
+
"loss": 1.5351,
|
6680 |
+
"step": 95300
|
6681 |
+
},
|
6682 |
+
{
|
6683 |
+
"epoch": 0.07094730266387042,
|
6684 |
+
"grad_norm": 0.4772530496120453,
|
6685 |
+
"learning_rate": 1.9990066901811876e-05,
|
6686 |
+
"loss": 1.5243,
|
6687 |
+
"step": 95400
|
6688 |
+
},
|
6689 |
+
{
|
6690 |
+
"epoch": 0.07102167090565645,
|
6691 |
+
"grad_norm": 0.42998188734054565,
|
6692 |
+
"learning_rate": 1.9990046069157322e-05,
|
6693 |
+
"loss": 1.5877,
|
6694 |
+
"step": 95500
|
6695 |
+
},
|
6696 |
+
{
|
6697 |
+
"epoch": 0.07109603914744247,
|
6698 |
+
"grad_norm": 0.7415347099304199,
|
6699 |
+
"learning_rate": 1.9990025214690396e-05,
|
6700 |
+
"loss": 1.5633,
|
6701 |
+
"step": 95600
|
6702 |
+
},
|
6703 |
+
{
|
6704 |
+
"epoch": 0.0711704073892285,
|
6705 |
+
"grad_norm": 0.657112717628479,
|
6706 |
+
"learning_rate": 1.999000433841114e-05,
|
6707 |
+
"loss": 1.4555,
|
6708 |
+
"step": 95700
|
6709 |
+
},
|
6710 |
+
{
|
6711 |
+
"epoch": 0.07124477563101453,
|
6712 |
+
"grad_norm": 0.9188429713249207,
|
6713 |
+
"learning_rate": 1.998998344031961e-05,
|
6714 |
+
"loss": 1.4329,
|
6715 |
+
"step": 95800
|
6716 |
+
},
|
6717 |
+
{
|
6718 |
+
"epoch": 0.07131914387280056,
|
6719 |
+
"grad_norm": 0.8823667168617249,
|
6720 |
+
"learning_rate": 1.9989962520415836e-05,
|
6721 |
+
"loss": 1.4754,
|
6722 |
+
"step": 95900
|
6723 |
+
},
|
6724 |
+
{
|
6725 |
+
"epoch": 0.07139351211458658,
|
6726 |
+
"grad_norm": 0.7276200652122498,
|
6727 |
+
"learning_rate": 1.9989941578699878e-05,
|
6728 |
+
"loss": 1.5286,
|
6729 |
+
"step": 96000
|
6730 |
+
},
|
6731 |
+
{
|
6732 |
+
"epoch": 0.07146788035637261,
|
6733 |
+
"grad_norm": 0.941512405872345,
|
6734 |
+
"learning_rate": 1.998992061517177e-05,
|
6735 |
+
"loss": 1.5087,
|
6736 |
+
"step": 96100
|
6737 |
+
},
|
6738 |
+
{
|
6739 |
+
"epoch": 0.07154224859815864,
|
6740 |
+
"grad_norm": 1.0310442447662354,
|
6741 |
+
"learning_rate": 1.998989962983157e-05,
|
6742 |
+
"loss": 1.5895,
|
6743 |
+
"step": 96200
|
6744 |
+
},
|
6745 |
+
{
|
6746 |
+
"epoch": 0.07161661683994466,
|
6747 |
+
"grad_norm": 1.3620883226394653,
|
6748 |
+
"learning_rate": 1.9989878622679317e-05,
|
6749 |
+
"loss": 1.474,
|
6750 |
+
"step": 96300
|
6751 |
+
},
|
6752 |
+
{
|
6753 |
+
"epoch": 0.0716909850817307,
|
6754 |
+
"grad_norm": 0.5119801163673401,
|
6755 |
+
"learning_rate": 1.998985759371505e-05,
|
6756 |
+
"loss": 1.5112,
|
6757 |
+
"step": 96400
|
6758 |
+
},
|
6759 |
+
{
|
6760 |
+
"epoch": 0.07176535332351673,
|
6761 |
+
"grad_norm": 0.8966123461723328,
|
6762 |
+
"learning_rate": 1.998983654293883e-05,
|
6763 |
+
"loss": 1.4903,
|
6764 |
+
"step": 96500
|
6765 |
+
},
|
6766 |
+
{
|
6767 |
+
"epoch": 0.07183972156530276,
|
6768 |
+
"grad_norm": 0.5336944460868835,
|
6769 |
+
"learning_rate": 1.998981547035069e-05,
|
6770 |
+
"loss": 1.5673,
|
6771 |
+
"step": 96600
|
6772 |
+
},
|
6773 |
+
{
|
6774 |
+
"epoch": 0.07191408980708879,
|
6775 |
+
"grad_norm": 1.2533961534500122,
|
6776 |
+
"learning_rate": 1.9989794375950688e-05,
|
6777 |
+
"loss": 1.5039,
|
6778 |
+
"step": 96700
|
6779 |
+
},
|
6780 |
+
{
|
6781 |
+
"epoch": 0.07198845804887481,
|
6782 |
+
"grad_norm": 1.3317081928253174,
|
6783 |
+
"learning_rate": 1.9989773259738858e-05,
|
6784 |
+
"loss": 1.567,
|
6785 |
+
"step": 96800
|
6786 |
+
},
|
6787 |
+
{
|
6788 |
+
"epoch": 0.07206282629066084,
|
6789 |
+
"grad_norm": 0.49700722098350525,
|
6790 |
+
"learning_rate": 1.998975212171525e-05,
|
6791 |
+
"loss": 1.542,
|
6792 |
+
"step": 96900
|
6793 |
+
},
|
6794 |
+
{
|
6795 |
+
"epoch": 0.07213719453244687,
|
6796 |
+
"grad_norm": 0.5809246301651001,
|
6797 |
+
"learning_rate": 1.9989730961879913e-05,
|
6798 |
+
"loss": 1.5097,
|
6799 |
+
"step": 97000
|
6800 |
+
},
|
6801 |
+
{
|
6802 |
+
"epoch": 0.07221156277423289,
|
6803 |
+
"grad_norm": 0.6107625365257263,
|
6804 |
+
"learning_rate": 1.9989709780232894e-05,
|
6805 |
+
"loss": 1.536,
|
6806 |
+
"step": 97100
|
6807 |
+
},
|
6808 |
+
{
|
6809 |
+
"epoch": 0.07228593101601892,
|
6810 |
+
"grad_norm": 0.5271338820457458,
|
6811 |
+
"learning_rate": 1.9989688576774234e-05,
|
6812 |
+
"loss": 1.5819,
|
6813 |
+
"step": 97200
|
6814 |
+
},
|
6815 |
+
{
|
6816 |
+
"epoch": 0.07236029925780495,
|
6817 |
+
"grad_norm": 0.6692411303520203,
|
6818 |
+
"learning_rate": 1.9989667351503988e-05,
|
6819 |
+
"loss": 1.4833,
|
6820 |
+
"step": 97300
|
6821 |
+
},
|
6822 |
+
{
|
6823 |
+
"epoch": 0.07243466749959097,
|
6824 |
+
"grad_norm": 1.0627728700637817,
|
6825 |
+
"learning_rate": 1.998964610442219e-05,
|
6826 |
+
"loss": 1.5404,
|
6827 |
+
"step": 97400
|
6828 |
+
},
|
6829 |
+
{
|
6830 |
+
"epoch": 0.072509035741377,
|
6831 |
+
"grad_norm": 0.5696298480033875,
|
6832 |
+
"learning_rate": 1.9989624835528896e-05,
|
6833 |
+
"loss": 1.4491,
|
6834 |
+
"step": 97500
|
6835 |
+
},
|
6836 |
+
{
|
6837 |
+
"epoch": 0.07258340398316303,
|
6838 |
+
"grad_norm": 0.5105301141738892,
|
6839 |
+
"learning_rate": 1.998960354482415e-05,
|
6840 |
+
"loss": 1.5188,
|
6841 |
+
"step": 97600
|
6842 |
+
},
|
6843 |
+
{
|
6844 |
+
"epoch": 0.07265777222494905,
|
6845 |
+
"grad_norm": 0.53251713514328,
|
6846 |
+
"learning_rate": 1.9989582232307998e-05,
|
6847 |
+
"loss": 1.5367,
|
6848 |
+
"step": 97700
|
6849 |
+
},
|
6850 |
+
{
|
6851 |
+
"epoch": 0.07273214046673508,
|
6852 |
+
"grad_norm": 0.6559078693389893,
|
6853 |
+
"learning_rate": 1.9989560897980485e-05,
|
6854 |
+
"loss": 1.4773,
|
6855 |
+
"step": 97800
|
6856 |
+
},
|
6857 |
+
{
|
6858 |
+
"epoch": 0.07280650870852111,
|
6859 |
+
"grad_norm": 0.39833974838256836,
|
6860 |
+
"learning_rate": 1.998953954184166e-05,
|
6861 |
+
"loss": 1.6063,
|
6862 |
+
"step": 97900
|
6863 |
+
},
|
6864 |
+
{
|
6865 |
+
"epoch": 0.07288087695030714,
|
6866 |
+
"grad_norm": 1.0479645729064941,
|
6867 |
+
"learning_rate": 1.9989518163891566e-05,
|
6868 |
+
"loss": 1.565,
|
6869 |
+
"step": 98000
|
6870 |
+
},
|
6871 |
+
{
|
6872 |
+
"epoch": 0.07295524519209316,
|
6873 |
+
"grad_norm": 0.7905478477478027,
|
6874 |
+
"learning_rate": 1.9989496764130253e-05,
|
6875 |
+
"loss": 1.5266,
|
6876 |
+
"step": 98100
|
6877 |
+
},
|
6878 |
+
{
|
6879 |
+
"epoch": 0.07302961343387919,
|
6880 |
+
"grad_norm": 0.4569951295852661,
|
6881 |
+
"learning_rate": 1.998947534255777e-05,
|
6882 |
+
"loss": 1.5295,
|
6883 |
+
"step": 98200
|
6884 |
+
},
|
6885 |
+
{
|
6886 |
+
"epoch": 0.07310398167566523,
|
6887 |
+
"grad_norm": 0.5308849215507507,
|
6888 |
+
"learning_rate": 1.9989453899174158e-05,
|
6889 |
+
"loss": 1.5203,
|
6890 |
+
"step": 98300
|
6891 |
+
},
|
6892 |
+
{
|
6893 |
+
"epoch": 0.07317834991745126,
|
6894 |
+
"grad_norm": 0.906802773475647,
|
6895 |
+
"learning_rate": 1.998943243397947e-05,
|
6896 |
+
"loss": 1.556,
|
6897 |
+
"step": 98400
|
6898 |
+
},
|
6899 |
+
{
|
6900 |
+
"epoch": 0.07325271815923728,
|
6901 |
+
"grad_norm": 0.5071494579315186,
|
6902 |
+
"learning_rate": 1.9989410946973747e-05,
|
6903 |
+
"loss": 1.5627,
|
6904 |
+
"step": 98500
|
6905 |
+
},
|
6906 |
+
{
|
6907 |
+
"epoch": 0.07332708640102331,
|
6908 |
+
"grad_norm": 0.5252199172973633,
|
6909 |
+
"learning_rate": 1.9989389438157037e-05,
|
6910 |
+
"loss": 1.5181,
|
6911 |
+
"step": 98600
|
6912 |
+
},
|
6913 |
+
{
|
6914 |
+
"epoch": 0.07340145464280934,
|
6915 |
+
"grad_norm": 0.5738980174064636,
|
6916 |
+
"learning_rate": 1.9989367907529394e-05,
|
6917 |
+
"loss": 1.6101,
|
6918 |
+
"step": 98700
|
6919 |
+
},
|
6920 |
+
{
|
6921 |
+
"epoch": 0.07347582288459537,
|
6922 |
+
"grad_norm": 0.6898683309555054,
|
6923 |
+
"learning_rate": 1.9989346355090853e-05,
|
6924 |
+
"loss": 1.579,
|
6925 |
+
"step": 98800
|
6926 |
+
},
|
6927 |
+
{
|
6928 |
+
"epoch": 0.07355019112638139,
|
6929 |
+
"grad_norm": 0.5396860241889954,
|
6930 |
+
"learning_rate": 1.998932478084147e-05,
|
6931 |
+
"loss": 1.5645,
|
6932 |
+
"step": 98900
|
6933 |
+
},
|
6934 |
+
{
|
6935 |
+
"epoch": 0.07362455936816742,
|
6936 |
+
"grad_norm": 0.5482293367385864,
|
6937 |
+
"learning_rate": 1.998930318478129e-05,
|
6938 |
+
"loss": 1.5453,
|
6939 |
+
"step": 99000
|
6940 |
+
},
|
6941 |
+
{
|
6942 |
+
"epoch": 0.07369892760995345,
|
6943 |
+
"grad_norm": 0.8394240736961365,
|
6944 |
+
"learning_rate": 1.9989281566910363e-05,
|
6945 |
+
"loss": 1.5025,
|
6946 |
+
"step": 99100
|
6947 |
+
},
|
6948 |
+
{
|
6949 |
+
"epoch": 0.07377329585173947,
|
6950 |
+
"grad_norm": 0.9409950971603394,
|
6951 |
+
"learning_rate": 1.9989259927228725e-05,
|
6952 |
+
"loss": 1.5489,
|
6953 |
+
"step": 99200
|
6954 |
+
},
|
6955 |
+
{
|
6956 |
+
"epoch": 0.0738476640935255,
|
6957 |
+
"grad_norm": 0.5597321391105652,
|
6958 |
+
"learning_rate": 1.9989238265736437e-05,
|
6959 |
+
"loss": 1.5994,
|
6960 |
+
"step": 99300
|
6961 |
+
},
|
6962 |
+
{
|
6963 |
+
"epoch": 0.07392203233531153,
|
6964 |
+
"grad_norm": 0.5139235258102417,
|
6965 |
+
"learning_rate": 1.9989216582433538e-05,
|
6966 |
+
"loss": 1.5478,
|
6967 |
+
"step": 99400
|
6968 |
+
},
|
6969 |
+
{
|
6970 |
+
"epoch": 0.07399640057709755,
|
6971 |
+
"grad_norm": 0.6312362551689148,
|
6972 |
+
"learning_rate": 1.998919487732008e-05,
|
6973 |
+
"loss": 1.4989,
|
6974 |
+
"step": 99500
|
6975 |
+
},
|
6976 |
+
{
|
6977 |
+
"epoch": 0.07407076881888358,
|
6978 |
+
"grad_norm": 0.6924223303794861,
|
6979 |
+
"learning_rate": 1.9989173150396105e-05,
|
6980 |
+
"loss": 1.4491,
|
6981 |
+
"step": 99600
|
6982 |
+
},
|
6983 |
+
{
|
6984 |
+
"epoch": 0.07414513706066961,
|
6985 |
+
"grad_norm": 0.5490585565567017,
|
6986 |
+
"learning_rate": 1.9989151401661666e-05,
|
6987 |
+
"loss": 1.538,
|
6988 |
+
"step": 99700
|
6989 |
+
},
|
6990 |
+
{
|
6991 |
+
"epoch": 0.07421950530245564,
|
6992 |
+
"grad_norm": 0.630455732345581,
|
6993 |
+
"learning_rate": 1.998912963111681e-05,
|
6994 |
+
"loss": 1.5286,
|
6995 |
+
"step": 99800
|
6996 |
+
},
|
6997 |
+
{
|
6998 |
+
"epoch": 0.07429387354424166,
|
6999 |
+
"grad_norm": 0.8591504693031311,
|
7000 |
+
"learning_rate": 1.998910783876158e-05,
|
7001 |
+
"loss": 1.5612,
|
7002 |
+
"step": 99900
|
7003 |
+
},
|
7004 |
+
{
|
7005 |
+
"epoch": 0.07436824178602769,
|
7006 |
+
"grad_norm": 1.0016669034957886,
|
7007 |
+
"learning_rate": 1.9989086024596027e-05,
|
7008 |
+
"loss": 1.5154,
|
7009 |
+
"step": 100000
|
7010 |
+
},
|
7011 |
+
{
|
7012 |
+
"epoch": 0.07444261002781372,
|
7013 |
+
"grad_norm": 0.6513885259628296,
|
7014 |
+
"learning_rate": 1.9989064188620197e-05,
|
7015 |
+
"loss": 1.5446,
|
7016 |
+
"step": 100100
|
7017 |
+
},
|
7018 |
+
{
|
7019 |
+
"epoch": 0.07451697826959976,
|
7020 |
+
"grad_norm": 0.6838514804840088,
|
7021 |
+
"learning_rate": 1.998904233083414e-05,
|
7022 |
+
"loss": 1.5336,
|
7023 |
+
"step": 100200
|
7024 |
+
},
|
7025 |
+
{
|
7026 |
+
"epoch": 0.07459134651138578,
|
7027 |
+
"grad_norm": 0.46571242809295654,
|
7028 |
+
"learning_rate": 1.9989020451237903e-05,
|
7029 |
+
"loss": 1.4838,
|
7030 |
+
"step": 100300
|
7031 |
+
},
|
7032 |
+
{
|
7033 |
+
"epoch": 0.07466571475317181,
|
7034 |
+
"grad_norm": 0.9936356544494629,
|
7035 |
+
"learning_rate": 1.998899854983153e-05,
|
7036 |
+
"loss": 1.5929,
|
7037 |
+
"step": 100400
|
7038 |
+
},
|
7039 |
+
{
|
7040 |
+
"epoch": 0.07474008299495784,
|
7041 |
+
"grad_norm": 0.6591018438339233,
|
7042 |
+
"learning_rate": 1.9988976626615075e-05,
|
7043 |
+
"loss": 1.54,
|
7044 |
+
"step": 100500
|
7045 |
+
},
|
7046 |
+
{
|
7047 |
+
"epoch": 0.07481445123674386,
|
7048 |
+
"grad_norm": 0.8453909754753113,
|
7049 |
+
"learning_rate": 1.998895468158858e-05,
|
7050 |
+
"loss": 1.5191,
|
7051 |
+
"step": 100600
|
7052 |
+
},
|
7053 |
+
{
|
7054 |
+
"epoch": 0.07488881947852989,
|
7055 |
+
"grad_norm": 0.6555935144424438,
|
7056 |
+
"learning_rate": 1.9988932714752095e-05,
|
7057 |
+
"loss": 1.5734,
|
7058 |
+
"step": 100700
|
7059 |
+
},
|
7060 |
+
{
|
7061 |
+
"epoch": 0.07496318772031592,
|
7062 |
+
"grad_norm": 0.6445733308792114,
|
7063 |
+
"learning_rate": 1.998891072610567e-05,
|
7064 |
+
"loss": 1.5516,
|
7065 |
+
"step": 100800
|
7066 |
+
},
|
7067 |
+
{
|
7068 |
+
"epoch": 0.07503755596210195,
|
7069 |
+
"grad_norm": 0.534389078617096,
|
7070 |
+
"learning_rate": 1.9988888715649357e-05,
|
7071 |
+
"loss": 1.5441,
|
7072 |
+
"step": 100900
|
7073 |
}
|
7074 |
],
|
7075 |
"logging_steps": 100,
|
|
|
7089 |
"attributes": {}
|
7090 |
}
|
7091 |
},
|
7092 |
+
"total_flos": 1.3747108667853128e+18,
|
7093 |
"train_batch_size": 1,
|
7094 |
"trial_name": null,
|
7095 |
"trial_params": null
|