Chuxin-Embedding / trainer_state.json
Chrislu's picture
Upload 21 files
4ba380a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.098106712564544,
"eval_steps": 500,
"global_step": 1800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01721170395869191,
"grad_norm": 0.29955029487609863,
"learning_rate": 2.0293089116901574e-06,
"loss": 0.6322,
"step": 10
},
{
"epoch": 0.03442340791738382,
"grad_norm": 0.06169761344790459,
"learning_rate": 2.6401917645771237e-06,
"loss": 0.4697,
"step": 20
},
{
"epoch": 0.05163511187607573,
"grad_norm": 0.051926977932453156,
"learning_rate": 2.9975353258495578e-06,
"loss": 0.5617,
"step": 30
},
{
"epoch": 0.06884681583476764,
"grad_norm": 0.07096195966005325,
"learning_rate": 3.25107461746409e-06,
"loss": 0.4301,
"step": 40
},
{
"epoch": 0.08605851979345955,
"grad_norm": 0.06899057328701019,
"learning_rate": 3.4477349704933476e-06,
"loss": 0.4905,
"step": 50
},
{
"epoch": 0.10327022375215146,
"grad_norm": 0.08537387102842331,
"learning_rate": 3.6084181787365237e-06,
"loss": 0.4551,
"step": 60
},
{
"epoch": 0.12048192771084337,
"grad_norm": 0.049780745059251785,
"learning_rate": 3.7442738955429737e-06,
"loss": 0.4058,
"step": 70
},
{
"epoch": 0.13769363166953527,
"grad_norm": 0.04421038553118706,
"learning_rate": 3.861957470351056e-06,
"loss": 0.6748,
"step": 80
},
{
"epoch": 0.1549053356282272,
"grad_norm": 1.9084473848342896,
"learning_rate": 3.965761740008958e-06,
"loss": 0.8719,
"step": 90
},
{
"epoch": 0.1721170395869191,
"grad_norm": 0.08046019077301025,
"learning_rate": 4.058617823380315e-06,
"loss": 0.4635,
"step": 100
},
{
"epoch": 0.18932874354561102,
"grad_norm": 0.21439455449581146,
"learning_rate": 4.142616368250685e-06,
"loss": 0.928,
"step": 110
},
{
"epoch": 0.20654044750430292,
"grad_norm": 0.06055545434355736,
"learning_rate": 4.21930103162349e-06,
"loss": 0.3721,
"step": 120
},
{
"epoch": 0.22375215146299485,
"grad_norm": 0.08670035004615784,
"learning_rate": 4.289844083644429e-06,
"loss": 0.7536,
"step": 130
},
{
"epoch": 0.24096385542168675,
"grad_norm": 0.06118405610322952,
"learning_rate": 4.355156748429939e-06,
"loss": 0.9829,
"step": 140
},
{
"epoch": 0.25817555938037867,
"grad_norm": 0.04853704199194908,
"learning_rate": 4.415961384652748e-06,
"loss": 0.4444,
"step": 150
},
{
"epoch": 0.27538726333907054,
"grad_norm": 0.03537767753005028,
"learning_rate": 4.472840323238023e-06,
"loss": 0.5064,
"step": 160
},
{
"epoch": 0.29259896729776247,
"grad_norm": 0.06154410541057587,
"learning_rate": 4.52626987322263e-06,
"loss": 0.5456,
"step": 170
},
{
"epoch": 0.3098106712564544,
"grad_norm": 0.052560485899448395,
"learning_rate": 4.576644592895925e-06,
"loss": 0.5106,
"step": 180
},
{
"epoch": 0.3270223752151463,
"grad_norm": 0.04913010448217392,
"learning_rate": 4.6242949899596115e-06,
"loss": 0.4026,
"step": 190
},
{
"epoch": 0.3442340791738382,
"grad_norm": 0.07974158972501755,
"learning_rate": 4.66950067626728e-06,
"loss": 0.4828,
"step": 200
},
{
"epoch": 0.3614457831325301,
"grad_norm": 0.03538183122873306,
"learning_rate": 4.712500309702374e-06,
"loss": 0.3549,
"step": 210
},
{
"epoch": 0.37865748709122204,
"grad_norm": 0.21638496220111847,
"learning_rate": 4.753499221137652e-06,
"loss": 0.4912,
"step": 220
},
{
"epoch": 0.3958691910499139,
"grad_norm": 0.03895362466573715,
"learning_rate": 4.792675344617211e-06,
"loss": 0.3846,
"step": 230
},
{
"epoch": 0.41308089500860584,
"grad_norm": 0.03565879911184311,
"learning_rate": 4.830183884510456e-06,
"loss": 0.8434,
"step": 240
},
{
"epoch": 0.43029259896729777,
"grad_norm": 0.03526683151721954,
"learning_rate": 4.866161029296539e-06,
"loss": 0.3603,
"step": 250
},
{
"epoch": 0.4475043029259897,
"grad_norm": 0.064102903008461,
"learning_rate": 4.900726936531396e-06,
"loss": 0.5178,
"step": 260
},
{
"epoch": 0.46471600688468157,
"grad_norm": 0.06982860714197159,
"learning_rate": 4.9339881541683585e-06,
"loss": 0.3712,
"step": 270
},
{
"epoch": 0.4819277108433735,
"grad_norm": 0.0654272809624672,
"learning_rate": 4.966039601316906e-06,
"loss": 0.9119,
"step": 280
},
{
"epoch": 0.4991394148020654,
"grad_norm": 0.04955059662461281,
"learning_rate": 4.9969662012643525e-06,
"loss": 0.3874,
"step": 290
},
{
"epoch": 0.5163511187607573,
"grad_norm": 1.0234352350234985,
"learning_rate": 4.984697781178272e-06,
"loss": 0.8952,
"step": 300
},
{
"epoch": 0.5335628227194492,
"grad_norm": 0.03769606724381447,
"learning_rate": 4.96557000765111e-06,
"loss": 0.3347,
"step": 310
},
{
"epoch": 0.5507745266781411,
"grad_norm": 0.11739111691713333,
"learning_rate": 4.946442234123948e-06,
"loss": 0.3677,
"step": 320
},
{
"epoch": 0.5679862306368331,
"grad_norm": 0.04959660395979881,
"learning_rate": 4.927314460596787e-06,
"loss": 1.1762,
"step": 330
},
{
"epoch": 0.5851979345955249,
"grad_norm": 0.1042531356215477,
"learning_rate": 4.908186687069626e-06,
"loss": 0.4252,
"step": 340
},
{
"epoch": 0.6024096385542169,
"grad_norm": 0.05064910277724266,
"learning_rate": 4.889058913542464e-06,
"loss": 0.3836,
"step": 350
},
{
"epoch": 0.6196213425129088,
"grad_norm": 0.0689607635140419,
"learning_rate": 4.869931140015303e-06,
"loss": 0.7539,
"step": 360
},
{
"epoch": 0.6368330464716007,
"grad_norm": 0.23462702333927155,
"learning_rate": 4.850803366488141e-06,
"loss": 0.8236,
"step": 370
},
{
"epoch": 0.6540447504302926,
"grad_norm": 0.11018137633800507,
"learning_rate": 4.83167559296098e-06,
"loss": 0.4839,
"step": 380
},
{
"epoch": 0.6712564543889845,
"grad_norm": 0.0751522108912468,
"learning_rate": 4.812547819433818e-06,
"loss": 0.5791,
"step": 390
},
{
"epoch": 0.6884681583476764,
"grad_norm": 0.17227555811405182,
"learning_rate": 4.793420045906657e-06,
"loss": 0.7993,
"step": 400
},
{
"epoch": 0.7056798623063684,
"grad_norm": 0.0664035975933075,
"learning_rate": 4.7742922723794954e-06,
"loss": 0.387,
"step": 410
},
{
"epoch": 0.7228915662650602,
"grad_norm": 0.04762504622340202,
"learning_rate": 4.755164498852334e-06,
"loss": 0.5436,
"step": 420
},
{
"epoch": 0.7401032702237521,
"grad_norm": 0.03658389300107956,
"learning_rate": 4.736036725325173e-06,
"loss": 0.6715,
"step": 430
},
{
"epoch": 0.7573149741824441,
"grad_norm": 0.03955502808094025,
"learning_rate": 4.716908951798011e-06,
"loss": 0.4902,
"step": 440
},
{
"epoch": 0.774526678141136,
"grad_norm": 0.05926811322569847,
"learning_rate": 4.69778117827085e-06,
"loss": 0.7329,
"step": 450
},
{
"epoch": 0.7917383820998278,
"grad_norm": 0.26404136419296265,
"learning_rate": 4.678653404743688e-06,
"loss": 0.5748,
"step": 460
},
{
"epoch": 0.8089500860585198,
"grad_norm": 0.07195431739091873,
"learning_rate": 4.6595256312165265e-06,
"loss": 0.5501,
"step": 470
},
{
"epoch": 0.8261617900172117,
"grad_norm": 0.0486939400434494,
"learning_rate": 4.640397857689365e-06,
"loss": 0.4527,
"step": 480
},
{
"epoch": 0.8433734939759037,
"grad_norm": 0.05488497018814087,
"learning_rate": 4.621270084162204e-06,
"loss": 0.8637,
"step": 490
},
{
"epoch": 0.8605851979345955,
"grad_norm": 0.045418575406074524,
"learning_rate": 4.6021423106350425e-06,
"loss": 0.437,
"step": 500
},
{
"epoch": 0.8777969018932874,
"grad_norm": 0.04055708646774292,
"learning_rate": 4.583014537107881e-06,
"loss": 0.6466,
"step": 510
},
{
"epoch": 0.8950086058519794,
"grad_norm": 0.03856475651264191,
"learning_rate": 4.563886763580719e-06,
"loss": 0.669,
"step": 520
},
{
"epoch": 0.9122203098106713,
"grad_norm": 0.035741958767175674,
"learning_rate": 4.5447589900535585e-06,
"loss": 0.3615,
"step": 530
},
{
"epoch": 0.9294320137693631,
"grad_norm": 0.04278489947319031,
"learning_rate": 4.525631216526396e-06,
"loss": 0.3849,
"step": 540
},
{
"epoch": 0.9466437177280551,
"grad_norm": 0.031775712966918945,
"learning_rate": 4.506503442999236e-06,
"loss": 0.6446,
"step": 550
},
{
"epoch": 0.963855421686747,
"grad_norm": 0.19989252090454102,
"learning_rate": 4.487375669472074e-06,
"loss": 0.6668,
"step": 560
},
{
"epoch": 0.9810671256454389,
"grad_norm": 0.04056662693619728,
"learning_rate": 4.468247895944912e-06,
"loss": 0.4243,
"step": 570
},
{
"epoch": 0.9982788296041308,
"grad_norm": 0.06392610818147659,
"learning_rate": 4.449120122417751e-06,
"loss": 0.3431,
"step": 580
},
{
"epoch": 1.0154905335628228,
"grad_norm": 0.03935154527425766,
"learning_rate": 4.42999234889059e-06,
"loss": 0.5167,
"step": 590
},
{
"epoch": 1.0327022375215147,
"grad_norm": 0.05566889047622681,
"learning_rate": 4.410864575363428e-06,
"loss": 0.4372,
"step": 600
},
{
"epoch": 1.0499139414802066,
"grad_norm": 0.07127536088228226,
"learning_rate": 4.391736801836267e-06,
"loss": 1.4152,
"step": 610
},
{
"epoch": 1.0671256454388984,
"grad_norm": 0.04618392139673233,
"learning_rate": 4.372609028309105e-06,
"loss": 0.601,
"step": 620
},
{
"epoch": 1.0843373493975903,
"grad_norm": 0.04588570445775986,
"learning_rate": 4.3534812547819434e-06,
"loss": 0.4723,
"step": 630
},
{
"epoch": 1.1015490533562822,
"grad_norm": 0.03991321101784706,
"learning_rate": 4.334353481254782e-06,
"loss": 0.4807,
"step": 640
},
{
"epoch": 1.1187607573149743,
"grad_norm": 0.2501582205295563,
"learning_rate": 4.315225707727621e-06,
"loss": 0.8098,
"step": 650
},
{
"epoch": 1.1359724612736661,
"grad_norm": 0.042163778096437454,
"learning_rate": 4.296097934200459e-06,
"loss": 0.4158,
"step": 660
},
{
"epoch": 1.153184165232358,
"grad_norm": 0.04054609313607216,
"learning_rate": 4.276970160673298e-06,
"loss": 0.3728,
"step": 670
},
{
"epoch": 1.1703958691910499,
"grad_norm": 0.0925000011920929,
"learning_rate": 4.257842387146137e-06,
"loss": 0.4251,
"step": 680
},
{
"epoch": 1.1876075731497417,
"grad_norm": 0.06017041206359863,
"learning_rate": 4.2387146136189745e-06,
"loss": 0.4782,
"step": 690
},
{
"epoch": 1.2048192771084336,
"grad_norm": 0.040517594665288925,
"learning_rate": 4.219586840091814e-06,
"loss": 0.4354,
"step": 700
},
{
"epoch": 1.2220309810671257,
"grad_norm": 0.04731125384569168,
"learning_rate": 4.200459066564652e-06,
"loss": 0.4969,
"step": 710
},
{
"epoch": 1.2392426850258176,
"grad_norm": 0.050880610942840576,
"learning_rate": 4.1813312930374905e-06,
"loss": 0.492,
"step": 720
},
{
"epoch": 1.2564543889845095,
"grad_norm": 0.04548948258161545,
"learning_rate": 4.162203519510329e-06,
"loss": 0.3914,
"step": 730
},
{
"epoch": 1.2736660929432013,
"grad_norm": 0.03825736418366432,
"learning_rate": 4.143075745983168e-06,
"loss": 0.3921,
"step": 740
},
{
"epoch": 1.2908777969018934,
"grad_norm": 0.046227287501096725,
"learning_rate": 4.1239479724560065e-06,
"loss": 0.4632,
"step": 750
},
{
"epoch": 1.3080895008605853,
"grad_norm": 0.04002716392278671,
"learning_rate": 4.104820198928845e-06,
"loss": 0.7436,
"step": 760
},
{
"epoch": 1.3253012048192772,
"grad_norm": 0.04381329566240311,
"learning_rate": 4.085692425401683e-06,
"loss": 0.5388,
"step": 770
},
{
"epoch": 1.342512908777969,
"grad_norm": 0.09227538853883743,
"learning_rate": 4.0665646518745225e-06,
"loss": 0.7008,
"step": 780
},
{
"epoch": 1.359724612736661,
"grad_norm": 0.0453125424683094,
"learning_rate": 4.04743687834736e-06,
"loss": 0.4813,
"step": 790
},
{
"epoch": 1.3769363166953528,
"grad_norm": 0.20484060049057007,
"learning_rate": 4.0283091048202e-06,
"loss": 0.6594,
"step": 800
},
{
"epoch": 1.3941480206540446,
"grad_norm": 0.05485668033361435,
"learning_rate": 4.009181331293038e-06,
"loss": 0.6538,
"step": 810
},
{
"epoch": 1.4113597246127367,
"grad_norm": 0.04452645406126976,
"learning_rate": 3.990053557765876e-06,
"loss": 0.3713,
"step": 820
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.03632510080933571,
"learning_rate": 3.970925784238715e-06,
"loss": 0.3395,
"step": 830
},
{
"epoch": 1.4457831325301205,
"grad_norm": 0.0884113535284996,
"learning_rate": 3.951798010711554e-06,
"loss": 0.3602,
"step": 840
},
{
"epoch": 1.4629948364888123,
"grad_norm": 0.1275469958782196,
"learning_rate": 3.932670237184392e-06,
"loss": 0.4533,
"step": 850
},
{
"epoch": 1.4802065404475044,
"grad_norm": 0.03843805938959122,
"learning_rate": 3.913542463657231e-06,
"loss": 0.7519,
"step": 860
},
{
"epoch": 1.4974182444061963,
"grad_norm": 0.03635178506374359,
"learning_rate": 3.89441469013007e-06,
"loss": 0.388,
"step": 870
},
{
"epoch": 1.5146299483648882,
"grad_norm": 0.039031002670526505,
"learning_rate": 3.875286916602907e-06,
"loss": 0.4425,
"step": 880
},
{
"epoch": 1.53184165232358,
"grad_norm": 0.04110798239707947,
"learning_rate": 3.856159143075746e-06,
"loss": 0.4095,
"step": 890
},
{
"epoch": 1.549053356282272,
"grad_norm": 0.04002736508846283,
"learning_rate": 3.837031369548585e-06,
"loss": 0.6104,
"step": 900
},
{
"epoch": 1.5662650602409638,
"grad_norm": 0.03314425051212311,
"learning_rate": 3.817903596021423e-06,
"loss": 0.5594,
"step": 910
},
{
"epoch": 1.5834767641996557,
"grad_norm": 0.03947990760207176,
"learning_rate": 3.798775822494262e-06,
"loss": 0.4931,
"step": 920
},
{
"epoch": 1.6006884681583475,
"grad_norm": 0.05939627066254616,
"learning_rate": 3.7796480489671007e-06,
"loss": 0.5127,
"step": 930
},
{
"epoch": 1.6179001721170396,
"grad_norm": 0.03439631685614586,
"learning_rate": 3.760520275439939e-06,
"loss": 0.4139,
"step": 940
},
{
"epoch": 1.6351118760757315,
"grad_norm": 0.06566853076219559,
"learning_rate": 3.7413925019127776e-06,
"loss": 0.6641,
"step": 950
},
{
"epoch": 1.6523235800344234,
"grad_norm": 0.06731946766376495,
"learning_rate": 3.7222647283856163e-06,
"loss": 0.6865,
"step": 960
},
{
"epoch": 1.6695352839931155,
"grad_norm": 0.03529343381524086,
"learning_rate": 3.703136954858455e-06,
"loss": 0.6395,
"step": 970
},
{
"epoch": 1.6867469879518073,
"grad_norm": 0.09028229117393494,
"learning_rate": 3.684009181331293e-06,
"loss": 0.774,
"step": 980
},
{
"epoch": 1.7039586919104992,
"grad_norm": 0.04828124865889549,
"learning_rate": 3.664881407804132e-06,
"loss": 0.4953,
"step": 990
},
{
"epoch": 1.721170395869191,
"grad_norm": 0.050330750644207,
"learning_rate": 3.6457536342769705e-06,
"loss": 0.6435,
"step": 1000
},
{
"epoch": 1.738382099827883,
"grad_norm": 0.03781217709183693,
"learning_rate": 3.6266258607498087e-06,
"loss": 0.4538,
"step": 1010
},
{
"epoch": 1.7555938037865748,
"grad_norm": 0.053586967289447784,
"learning_rate": 3.607498087222648e-06,
"loss": 0.384,
"step": 1020
},
{
"epoch": 1.7728055077452667,
"grad_norm": 0.04280597344040871,
"learning_rate": 3.588370313695486e-06,
"loss": 0.385,
"step": 1030
},
{
"epoch": 1.7900172117039586,
"grad_norm": 0.05530484393239021,
"learning_rate": 3.5692425401683243e-06,
"loss": 0.732,
"step": 1040
},
{
"epoch": 1.8072289156626506,
"grad_norm": 0.05707624554634094,
"learning_rate": 3.5501147666411634e-06,
"loss": 0.4075,
"step": 1050
},
{
"epoch": 1.8244406196213425,
"grad_norm": 0.07795403897762299,
"learning_rate": 3.5309869931140016e-06,
"loss": 1.0486,
"step": 1060
},
{
"epoch": 1.8416523235800344,
"grad_norm": 0.08253274112939835,
"learning_rate": 3.5118592195868407e-06,
"loss": 0.7014,
"step": 1070
},
{
"epoch": 1.8588640275387265,
"grad_norm": 0.037665221840143204,
"learning_rate": 3.492731446059679e-06,
"loss": 0.5129,
"step": 1080
},
{
"epoch": 1.8760757314974184,
"grad_norm": 0.08074070513248444,
"learning_rate": 3.473603672532517e-06,
"loss": 0.6965,
"step": 1090
},
{
"epoch": 1.8932874354561102,
"grad_norm": 0.053863946348428726,
"learning_rate": 3.4544758990053563e-06,
"loss": 0.3608,
"step": 1100
},
{
"epoch": 1.910499139414802,
"grad_norm": 0.03980562463402748,
"learning_rate": 3.4353481254781945e-06,
"loss": 0.3408,
"step": 1110
},
{
"epoch": 1.927710843373494,
"grad_norm": 0.03091476857662201,
"learning_rate": 3.4162203519510336e-06,
"loss": 0.4147,
"step": 1120
},
{
"epoch": 1.9449225473321858,
"grad_norm": 0.05423520505428314,
"learning_rate": 3.399005355776588e-06,
"loss": 0.501,
"step": 1130
},
{
"epoch": 1.9621342512908777,
"grad_norm": 0.056222882121801376,
"learning_rate": 3.379877582249426e-06,
"loss": 0.6646,
"step": 1140
},
{
"epoch": 1.9793459552495696,
"grad_norm": 0.04780727997422218,
"learning_rate": 3.360749808722265e-06,
"loss": 0.4433,
"step": 1150
},
{
"epoch": 1.9965576592082617,
"grad_norm": 0.0465485118329525,
"learning_rate": 3.3416220351951034e-06,
"loss": 0.4117,
"step": 1160
},
{
"epoch": 2.0137693631669533,
"grad_norm": 0.038410015404224396,
"learning_rate": 3.3224942616679424e-06,
"loss": 0.9719,
"step": 1170
},
{
"epoch": 2.0309810671256456,
"grad_norm": 0.03839205205440521,
"learning_rate": 3.3033664881407807e-06,
"loss": 0.5383,
"step": 1180
},
{
"epoch": 2.0481927710843375,
"grad_norm": 0.05250284820795059,
"learning_rate": 3.284238714613619e-06,
"loss": 0.5573,
"step": 1190
},
{
"epoch": 2.0654044750430294,
"grad_norm": 0.05850391089916229,
"learning_rate": 3.265110941086458e-06,
"loss": 0.3652,
"step": 1200
},
{
"epoch": 2.0826161790017212,
"grad_norm": 0.03551226481795311,
"learning_rate": 3.2459831675592962e-06,
"loss": 1.1687,
"step": 1210
},
{
"epoch": 2.099827882960413,
"grad_norm": 0.035683631896972656,
"learning_rate": 3.226855394032135e-06,
"loss": 0.3377,
"step": 1220
},
{
"epoch": 2.117039586919105,
"grad_norm": 0.05406322330236435,
"learning_rate": 3.2077276205049736e-06,
"loss": 0.4614,
"step": 1230
},
{
"epoch": 2.134251290877797,
"grad_norm": 0.030787965282797813,
"learning_rate": 3.188599846977812e-06,
"loss": 0.3771,
"step": 1240
},
{
"epoch": 2.1514629948364887,
"grad_norm": 0.04496818408370018,
"learning_rate": 3.169472073450651e-06,
"loss": 0.4846,
"step": 1250
},
{
"epoch": 2.1686746987951806,
"grad_norm": 0.03633632883429527,
"learning_rate": 3.150344299923489e-06,
"loss": 0.3549,
"step": 1260
},
{
"epoch": 2.1858864027538725,
"grad_norm": 0.033117033541202545,
"learning_rate": 3.1312165263963278e-06,
"loss": 0.4224,
"step": 1270
},
{
"epoch": 2.2030981067125643,
"grad_norm": 0.04940853640437126,
"learning_rate": 3.1120887528691664e-06,
"loss": 0.6976,
"step": 1280
},
{
"epoch": 2.2203098106712567,
"grad_norm": 0.03474991396069527,
"learning_rate": 3.092960979342005e-06,
"loss": 0.5837,
"step": 1290
},
{
"epoch": 2.2375215146299485,
"grad_norm": 0.08616980165243149,
"learning_rate": 3.0738332058148433e-06,
"loss": 0.5885,
"step": 1300
},
{
"epoch": 2.2547332185886404,
"grad_norm": 0.04921899363398552,
"learning_rate": 3.054705432287682e-06,
"loss": 0.4007,
"step": 1310
},
{
"epoch": 2.2719449225473323,
"grad_norm": 0.033128101378679276,
"learning_rate": 3.0355776587605207e-06,
"loss": 0.3948,
"step": 1320
},
{
"epoch": 2.289156626506024,
"grad_norm": 0.0420563630759716,
"learning_rate": 3.016449885233359e-06,
"loss": 0.6675,
"step": 1330
},
{
"epoch": 2.306368330464716,
"grad_norm": 0.04620426893234253,
"learning_rate": 2.997322111706198e-06,
"loss": 0.3454,
"step": 1340
},
{
"epoch": 2.323580034423408,
"grad_norm": 0.031115278601646423,
"learning_rate": 2.9781943381790362e-06,
"loss": 0.4697,
"step": 1350
},
{
"epoch": 2.3407917383820998,
"grad_norm": 0.03716883435845375,
"learning_rate": 2.9590665646518745e-06,
"loss": 0.7016,
"step": 1360
},
{
"epoch": 2.3580034423407916,
"grad_norm": 0.2217116802930832,
"learning_rate": 2.9399387911247135e-06,
"loss": 0.6504,
"step": 1370
},
{
"epoch": 2.3752151462994835,
"grad_norm": 0.08799983561038971,
"learning_rate": 2.9208110175975518e-06,
"loss": 0.3518,
"step": 1380
},
{
"epoch": 2.3924268502581754,
"grad_norm": 0.03414052352309227,
"learning_rate": 2.901683244070391e-06,
"loss": 0.5522,
"step": 1390
},
{
"epoch": 2.4096385542168672,
"grad_norm": 0.14305748045444489,
"learning_rate": 2.882555470543229e-06,
"loss": 0.7692,
"step": 1400
},
{
"epoch": 2.4268502581755595,
"grad_norm": 0.04776856303215027,
"learning_rate": 2.8634276970160673e-06,
"loss": 0.4163,
"step": 1410
},
{
"epoch": 2.4440619621342514,
"grad_norm": 0.06117096543312073,
"learning_rate": 2.8442999234889064e-06,
"loss": 0.3797,
"step": 1420
},
{
"epoch": 2.4612736660929433,
"grad_norm": 0.1437849998474121,
"learning_rate": 2.8251721499617447e-06,
"loss": 0.3978,
"step": 1430
},
{
"epoch": 2.478485370051635,
"grad_norm": 0.03535407409071922,
"learning_rate": 2.8060443764345833e-06,
"loss": 0.7543,
"step": 1440
},
{
"epoch": 2.495697074010327,
"grad_norm": 0.034573543816804886,
"learning_rate": 2.786916602907422e-06,
"loss": 0.4385,
"step": 1450
},
{
"epoch": 2.512908777969019,
"grad_norm": 0.05264075845479965,
"learning_rate": 2.7677888293802602e-06,
"loss": 0.5788,
"step": 1460
},
{
"epoch": 2.5301204819277108,
"grad_norm": 0.047263339161872864,
"learning_rate": 2.748661055853099e-06,
"loss": 0.5397,
"step": 1470
},
{
"epoch": 2.5473321858864026,
"grad_norm": 0.03852943331003189,
"learning_rate": 2.7295332823259375e-06,
"loss": 0.3995,
"step": 1480
},
{
"epoch": 2.5645438898450945,
"grad_norm": 0.04756772890686989,
"learning_rate": 2.710405508798776e-06,
"loss": 0.5136,
"step": 1490
},
{
"epoch": 2.581755593803787,
"grad_norm": 0.07750029861927032,
"learning_rate": 2.6912777352716144e-06,
"loss": 0.8293,
"step": 1500
},
{
"epoch": 2.5989672977624787,
"grad_norm": 0.047012392431497574,
"learning_rate": 2.672149961744453e-06,
"loss": 0.5485,
"step": 1510
},
{
"epoch": 2.6161790017211706,
"grad_norm": 0.04318179562687874,
"learning_rate": 2.6530221882172918e-06,
"loss": 0.4112,
"step": 1520
},
{
"epoch": 2.6333907056798624,
"grad_norm": 0.06012555584311485,
"learning_rate": 2.63389441469013e-06,
"loss": 0.7031,
"step": 1530
},
{
"epoch": 2.6506024096385543,
"grad_norm": 0.03384987264871597,
"learning_rate": 2.614766641162969e-06,
"loss": 0.439,
"step": 1540
},
{
"epoch": 2.667814113597246,
"grad_norm": 0.05770883336663246,
"learning_rate": 2.5956388676358073e-06,
"loss": 0.3991,
"step": 1550
},
{
"epoch": 2.685025817555938,
"grad_norm": 0.05510050430893898,
"learning_rate": 2.5765110941086456e-06,
"loss": 0.9784,
"step": 1560
},
{
"epoch": 2.70223752151463,
"grad_norm": 0.055017050355672836,
"learning_rate": 2.5573833205814846e-06,
"loss": 0.3796,
"step": 1570
},
{
"epoch": 2.719449225473322,
"grad_norm": 0.04332127049565315,
"learning_rate": 2.538255547054323e-06,
"loss": 0.433,
"step": 1580
},
{
"epoch": 2.7366609294320137,
"grad_norm": 0.060054711997509,
"learning_rate": 2.519127773527162e-06,
"loss": 0.2799,
"step": 1590
},
{
"epoch": 2.7538726333907055,
"grad_norm": 0.0340825691819191,
"learning_rate": 2.5e-06,
"loss": 0.6797,
"step": 1600
},
{
"epoch": 2.7710843373493974,
"grad_norm": 0.22405555844306946,
"learning_rate": 2.480872226472839e-06,
"loss": 0.6071,
"step": 1610
},
{
"epoch": 2.7882960413080893,
"grad_norm": 0.04493927210569382,
"learning_rate": 2.4617444529456775e-06,
"loss": 0.4004,
"step": 1620
},
{
"epoch": 2.805507745266781,
"grad_norm": 0.06454917788505554,
"learning_rate": 2.4426166794185158e-06,
"loss": 0.3903,
"step": 1630
},
{
"epoch": 2.8227194492254735,
"grad_norm": 0.07336492091417313,
"learning_rate": 2.4234889058913544e-06,
"loss": 0.9157,
"step": 1640
},
{
"epoch": 2.8399311531841653,
"grad_norm": 0.08775831758975983,
"learning_rate": 2.404361132364193e-06,
"loss": 0.4865,
"step": 1650
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.03372660651803017,
"learning_rate": 2.3852333588370317e-06,
"loss": 0.3975,
"step": 1660
},
{
"epoch": 2.874354561101549,
"grad_norm": 0.034449730068445206,
"learning_rate": 2.3661055853098704e-06,
"loss": 0.3927,
"step": 1670
},
{
"epoch": 2.891566265060241,
"grad_norm": 0.02975647896528244,
"learning_rate": 2.3469778117827086e-06,
"loss": 0.3664,
"step": 1680
},
{
"epoch": 2.908777969018933,
"grad_norm": 0.037901297211647034,
"learning_rate": 2.3278500382555473e-06,
"loss": 0.3973,
"step": 1690
},
{
"epoch": 2.9259896729776247,
"grad_norm": 0.05662724748253822,
"learning_rate": 2.308722264728386e-06,
"loss": 0.4422,
"step": 1700
},
{
"epoch": 2.9432013769363166,
"grad_norm": 0.044157788157463074,
"learning_rate": 2.289594491201224e-06,
"loss": 0.4324,
"step": 1710
},
{
"epoch": 2.960413080895009,
"grad_norm": 0.04280713573098183,
"learning_rate": 2.270466717674063e-06,
"loss": 0.5674,
"step": 1720
},
{
"epoch": 2.9776247848537007,
"grad_norm": 0.04871043935418129,
"learning_rate": 2.2513389441469015e-06,
"loss": 0.3223,
"step": 1730
},
{
"epoch": 2.9948364888123926,
"grad_norm": 0.036149609833955765,
"learning_rate": 2.2322111706197398e-06,
"loss": 0.6471,
"step": 1740
},
{
"epoch": 3.0120481927710845,
"grad_norm": 0.02951321005821228,
"learning_rate": 2.2130833970925784e-06,
"loss": 0.3926,
"step": 1750
},
{
"epoch": 3.0292598967297764,
"grad_norm": 0.04006199911236763,
"learning_rate": 2.193955623565417e-06,
"loss": 0.6222,
"step": 1760
},
{
"epoch": 3.0464716006884682,
"grad_norm": 0.03238508850336075,
"learning_rate": 2.1748278500382557e-06,
"loss": 0.4144,
"step": 1770
},
{
"epoch": 3.06368330464716,
"grad_norm": 0.035425204783678055,
"learning_rate": 2.1557000765110944e-06,
"loss": 0.3745,
"step": 1780
},
{
"epoch": 3.080895008605852,
"grad_norm": 0.08181657642126083,
"learning_rate": 2.1365723029839326e-06,
"loss": 0.4049,
"step": 1790
},
{
"epoch": 3.098106712564544,
"grad_norm": 0.03448079526424408,
"learning_rate": 2.1174445294567713e-06,
"loss": 0.5435,
"step": 1800
}
],
"logging_steps": 10,
"max_steps": 2905,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}