|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.448542237727386, |
|
"eval_steps": 500, |
|
"global_step": 900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00049838026414154, |
|
"grad_norm": 0.8175273537635803, |
|
"learning_rate": 1e-05, |
|
"loss": 1.8901, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00099676052828308, |
|
"grad_norm": 0.5205090641975403, |
|
"learning_rate": 2e-05, |
|
"loss": 1.8661, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00149514079242462, |
|
"grad_norm": 0.7050982713699341, |
|
"learning_rate": 3e-05, |
|
"loss": 1.884, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00199352105656616, |
|
"grad_norm": 0.3958536684513092, |
|
"learning_rate": 4e-05, |
|
"loss": 1.848, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0024919013207077, |
|
"grad_norm": 0.2910257577896118, |
|
"learning_rate": 5e-05, |
|
"loss": 1.8363, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00299028158484924, |
|
"grad_norm": 1.1061186790466309, |
|
"learning_rate": 6e-05, |
|
"loss": 2.1065, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00348866184899078, |
|
"grad_norm": 0.35989394783973694, |
|
"learning_rate": 7e-05, |
|
"loss": 1.8461, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00398704211313232, |
|
"grad_norm": 0.3001234233379364, |
|
"learning_rate": 8e-05, |
|
"loss": 1.8691, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00448542237727386, |
|
"grad_norm": 0.3210326135158539, |
|
"learning_rate": 9e-05, |
|
"loss": 1.8006, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0049838026414154, |
|
"grad_norm": 0.24240201711654663, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8136, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00548218290555694, |
|
"grad_norm": 0.2921009957790375, |
|
"learning_rate": 0.00011000000000000002, |
|
"loss": 1.7785, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00598056316969848, |
|
"grad_norm": 0.2199179232120514, |
|
"learning_rate": 0.00012, |
|
"loss": 1.8334, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00647894343384002, |
|
"grad_norm": 0.18247301876544952, |
|
"learning_rate": 0.00013000000000000002, |
|
"loss": 1.8171, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.00697732369798156, |
|
"grad_norm": 0.16971151530742645, |
|
"learning_rate": 0.00014, |
|
"loss": 1.8838, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0074757039621231, |
|
"grad_norm": 0.19395150244235992, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 1.8121, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00797408422626464, |
|
"grad_norm": 0.18596555292606354, |
|
"learning_rate": 0.00016, |
|
"loss": 1.7756, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00847246449040618, |
|
"grad_norm": 0.23639832437038422, |
|
"learning_rate": 0.00017, |
|
"loss": 1.8293, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.00897084475454772, |
|
"grad_norm": 0.5992503762245178, |
|
"learning_rate": 0.00018, |
|
"loss": 1.8285, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.00946922501868926, |
|
"grad_norm": 0.24062925577163696, |
|
"learning_rate": 0.00019, |
|
"loss": 1.8139, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0099676052828308, |
|
"grad_norm": 0.1615862101316452, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7916, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01046598554697234, |
|
"grad_norm": 0.1461448222398758, |
|
"learning_rate": 0.0002, |
|
"loss": 1.756, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01096436581111388, |
|
"grad_norm": 0.16745099425315857, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7139, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01146274607525542, |
|
"grad_norm": 0.13099125027656555, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7764, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01196112633939696, |
|
"grad_norm": 0.11523797363042831, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6983, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.012459506603538499, |
|
"grad_norm": 0.4995543956756592, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7629, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01295788686768004, |
|
"grad_norm": 0.1197713166475296, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6818, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01345626713182158, |
|
"grad_norm": 0.12242875248193741, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7446, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.01395464739596312, |
|
"grad_norm": 0.11533704400062561, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7924, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01445302766010466, |
|
"grad_norm": 0.11372833698987961, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8541, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0149514079242462, |
|
"grad_norm": 0.10559230297803879, |
|
"learning_rate": 0.0002, |
|
"loss": 1.727, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01544978818838774, |
|
"grad_norm": 0.1040055975317955, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6867, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.01594816845252928, |
|
"grad_norm": 0.09699314832687378, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7119, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.016446548716670818, |
|
"grad_norm": 0.09951823949813843, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6883, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01694492898081236, |
|
"grad_norm": 0.09926764667034149, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6828, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0174433092449539, |
|
"grad_norm": 0.11137701570987701, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8129, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01794168950909544, |
|
"grad_norm": 0.09449079632759094, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7351, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01844006977323698, |
|
"grad_norm": 0.10035137832164764, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7835, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.01893845003737852, |
|
"grad_norm": 0.0987599715590477, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6905, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.01943683030152006, |
|
"grad_norm": 0.1124144196510315, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7833, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0199352105656616, |
|
"grad_norm": 0.10424085706472397, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7308, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02043359082980314, |
|
"grad_norm": 0.10069456696510315, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7756, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.02093197109394468, |
|
"grad_norm": 0.096500463783741, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6723, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02143035135808622, |
|
"grad_norm": 0.10054206848144531, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7609, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02192873162222776, |
|
"grad_norm": 0.6995068192481995, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8469, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0224271118863693, |
|
"grad_norm": 0.10629299283027649, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7838, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02292549215051084, |
|
"grad_norm": 0.7601500749588013, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9191, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02342387241465238, |
|
"grad_norm": 0.15130610764026642, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7054, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.02392225267879392, |
|
"grad_norm": 0.13523732125759125, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8099, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02442063294293546, |
|
"grad_norm": 0.13607007265090942, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7106, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.024919013207076998, |
|
"grad_norm": 0.12477318197488785, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6664, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02541739347121854, |
|
"grad_norm": 0.6004332304000854, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8337, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.02591577373536008, |
|
"grad_norm": 0.11952889710664749, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8014, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02641415399950162, |
|
"grad_norm": 0.12411167472600937, |
|
"learning_rate": 0.0002, |
|
"loss": 1.716, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.02691253426364316, |
|
"grad_norm": 0.13071775436401367, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8158, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0274109145277847, |
|
"grad_norm": 0.10316825658082962, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7051, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.02790929479192624, |
|
"grad_norm": 0.12366951256990433, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7233, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.02840767505606778, |
|
"grad_norm": 0.11353752017021179, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7875, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.02890605532020932, |
|
"grad_norm": 0.10084105283021927, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8455, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.02940443558435086, |
|
"grad_norm": 0.09446979314088821, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6738, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0299028158484924, |
|
"grad_norm": 0.10983336716890335, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7517, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03040119611263394, |
|
"grad_norm": 0.09697376936674118, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7885, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.03089957637677548, |
|
"grad_norm": 0.10111090540885925, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7711, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03139795664091702, |
|
"grad_norm": 0.09077231585979462, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6886, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.03189633690505856, |
|
"grad_norm": 0.09181386977434158, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7101, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0323947171692001, |
|
"grad_norm": 0.09549912065267563, |
|
"learning_rate": 0.0002, |
|
"loss": 1.727, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.032893097433341636, |
|
"grad_norm": 0.09550771117210388, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7627, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03339147769748318, |
|
"grad_norm": 0.09617152065038681, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7195, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.03388985796162472, |
|
"grad_norm": 0.08987727761268616, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6672, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03438823822576626, |
|
"grad_norm": 0.1968306601047516, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7743, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0348866184899078, |
|
"grad_norm": 0.11987251788377762, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7883, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03538499875404934, |
|
"grad_norm": 0.09412620961666107, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7965, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.03588337901819088, |
|
"grad_norm": 0.09160133451223373, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7451, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03638175928233242, |
|
"grad_norm": 0.08958347886800766, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6991, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.03688013954647396, |
|
"grad_norm": 0.08735426515340805, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7267, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0373785198106155, |
|
"grad_norm": 0.09234903752803802, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7363, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03787690007475704, |
|
"grad_norm": 0.3366870582103729, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7519, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03837528033889858, |
|
"grad_norm": 0.11989757418632507, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7388, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.03887366060304012, |
|
"grad_norm": 0.09671110659837723, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6955, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.03937204086718166, |
|
"grad_norm": 0.3544454276561737, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7123, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0398704211313232, |
|
"grad_norm": 0.36497563123703003, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8832, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04036880139546474, |
|
"grad_norm": 0.1029423251748085, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6739, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.04086718165960628, |
|
"grad_norm": 0.13265877962112427, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6735, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.041365561923747816, |
|
"grad_norm": 0.10281170904636383, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7079, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.04186394218788936, |
|
"grad_norm": 0.9060964584350586, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0666, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0423623224520309, |
|
"grad_norm": 0.6496222615242004, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7719, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04286070271617244, |
|
"grad_norm": 0.20052167773246765, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7717, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04335908298031398, |
|
"grad_norm": 0.20841394364833832, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7548, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.04385746324445552, |
|
"grad_norm": 0.14324237406253815, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6689, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.04435584350859706, |
|
"grad_norm": 0.1330689936876297, |
|
"learning_rate": 0.0002, |
|
"loss": 1.741, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0448542237727386, |
|
"grad_norm": 0.13436254858970642, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8316, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04535260403688014, |
|
"grad_norm": 0.11558011174201965, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7094, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.04585098430102168, |
|
"grad_norm": 0.13997307419776917, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7487, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.04634936456516322, |
|
"grad_norm": 0.11401030421257019, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6971, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.04684774482930476, |
|
"grad_norm": 0.1490752398967743, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7318, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0473461250934463, |
|
"grad_norm": 0.10417014360427856, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6225, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.04784450535758784, |
|
"grad_norm": 0.11896169185638428, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6585, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04834288562172938, |
|
"grad_norm": 0.1187196597456932, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6665, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.04884126588587092, |
|
"grad_norm": 0.10665114969015121, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7154, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.04933964615001246, |
|
"grad_norm": 0.11822202056646347, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7159, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.049838026414153996, |
|
"grad_norm": 0.10062436759471893, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6696, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05033640667829554, |
|
"grad_norm": 0.10343766212463379, |
|
"learning_rate": 0.0002, |
|
"loss": 1.69, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.05083478694243708, |
|
"grad_norm": 0.09872441738843918, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7566, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.05133316720657862, |
|
"grad_norm": 0.08979122340679169, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6714, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.05183154747072016, |
|
"grad_norm": 0.10805679857730865, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7127, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.0523299277348617, |
|
"grad_norm": 0.0966518372297287, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6586, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.05282830799900324, |
|
"grad_norm": 0.6643556952476501, |
|
"learning_rate": 0.0002, |
|
"loss": 1.906, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.05332668826314478, |
|
"grad_norm": 0.14238013327121735, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7367, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.05382506852728632, |
|
"grad_norm": 0.2091197371482849, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7879, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.05432344879142786, |
|
"grad_norm": 0.11703892797231674, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7743, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0548218290555694, |
|
"grad_norm": 0.15277640521526337, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6906, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05532020931971094, |
|
"grad_norm": 0.11744142323732376, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6935, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.05581858958385248, |
|
"grad_norm": 0.10640200227499008, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6654, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.05631696984799402, |
|
"grad_norm": 0.10955353826284409, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7095, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.05681535011213556, |
|
"grad_norm": 0.3743372857570648, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8212, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0573137303762771, |
|
"grad_norm": 0.11817771941423416, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7246, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.05781211064041864, |
|
"grad_norm": 0.10563557595014572, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6554, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.058310490904560176, |
|
"grad_norm": 0.11494623869657516, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7563, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.05880887116870172, |
|
"grad_norm": 0.12262585759162903, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7416, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.05930725143284326, |
|
"grad_norm": 0.09501025080680847, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7068, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0598056316969848, |
|
"grad_norm": 0.15478286147117615, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8005, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06030401196112634, |
|
"grad_norm": 0.5174306631088257, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7736, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.06080239222526788, |
|
"grad_norm": 0.37489035725593567, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7367, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.06130077248940942, |
|
"grad_norm": 0.10632194578647614, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6754, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.06179915275355096, |
|
"grad_norm": 0.5897635817527771, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8483, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.0622975330176925, |
|
"grad_norm": 0.1104891449213028, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6705, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06279591328183404, |
|
"grad_norm": 0.171495720744133, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8345, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.06329429354597559, |
|
"grad_norm": 0.2864750921726227, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6944, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.06379267381011712, |
|
"grad_norm": 0.1258823126554489, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6922, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.06429105407425866, |
|
"grad_norm": 0.10813643783330917, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6886, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.0647894343384002, |
|
"grad_norm": 0.12285427749156952, |
|
"learning_rate": 0.0002, |
|
"loss": 1.712, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06528781460254174, |
|
"grad_norm": 0.11049698293209076, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7107, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.06578619486668327, |
|
"grad_norm": 0.4740373492240906, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8128, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.06628457513082482, |
|
"grad_norm": 0.11663281917572021, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7054, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.06678295539496636, |
|
"grad_norm": 0.1274426281452179, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7461, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.0672813356591079, |
|
"grad_norm": 0.11273318529129028, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6195, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.06777971592324944, |
|
"grad_norm": 0.12240920960903168, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7528, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.06827809618739097, |
|
"grad_norm": 0.1003924235701561, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5651, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.06877647645153252, |
|
"grad_norm": 0.12279325723648071, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7905, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.06927485671567406, |
|
"grad_norm": 0.10567662119865417, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7437, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.0697732369798156, |
|
"grad_norm": 0.0949968695640564, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7375, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07027161724395714, |
|
"grad_norm": 0.10375083237886429, |
|
"learning_rate": 0.0002, |
|
"loss": 1.713, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.07076999750809868, |
|
"grad_norm": 0.0937686413526535, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7152, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.07126837777224022, |
|
"grad_norm": 0.0981929674744606, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7116, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.07176675803638176, |
|
"grad_norm": 1.1460381746292114, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9091, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.0722651383005233, |
|
"grad_norm": 0.1193133145570755, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7387, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.07276351856466484, |
|
"grad_norm": 0.13854117691516876, |
|
"learning_rate": 0.0002, |
|
"loss": 1.656, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.07326189882880638, |
|
"grad_norm": 0.6005303263664246, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9014, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.07376027909294793, |
|
"grad_norm": 0.13879133760929108, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7158, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.07425865935708946, |
|
"grad_norm": 0.13073574006557465, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7355, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.074757039621231, |
|
"grad_norm": 0.12578125298023224, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7376, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07525541988537254, |
|
"grad_norm": 0.13024558126926422, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7675, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.07575380014951408, |
|
"grad_norm": 0.12630225718021393, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6509, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.07625218041365561, |
|
"grad_norm": 0.13081084191799164, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7393, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.07675056067779716, |
|
"grad_norm": 0.11292438209056854, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6533, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.0772489409419387, |
|
"grad_norm": 0.10187578946352005, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6915, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.07774732120608024, |
|
"grad_norm": 0.10563293844461441, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7378, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.07824570147022178, |
|
"grad_norm": 0.10501443594694138, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6498, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.07874408173436331, |
|
"grad_norm": 0.11756912618875504, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7963, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.07924246199850486, |
|
"grad_norm": 0.1010415181517601, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6637, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.0797408422626464, |
|
"grad_norm": 0.09472226351499557, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6057, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08023922252678795, |
|
"grad_norm": 0.10156677663326263, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7573, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.08073760279092948, |
|
"grad_norm": 0.09345332533121109, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6327, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.08123598305507101, |
|
"grad_norm": 0.09440191835165024, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6753, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.08173436331921256, |
|
"grad_norm": 0.0925949364900589, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6786, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.0822327435833541, |
|
"grad_norm": 0.09808436781167984, |
|
"learning_rate": 0.0002, |
|
"loss": 1.75, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.08273112384749563, |
|
"grad_norm": 0.10032784938812256, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6463, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.08322950411163718, |
|
"grad_norm": 0.769005298614502, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8314, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.08372788437577872, |
|
"grad_norm": 1.013753890991211, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9179, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.08422626463992026, |
|
"grad_norm": 0.11522974818944931, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8271, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.0847246449040618, |
|
"grad_norm": 0.1381683349609375, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7015, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08522302516820333, |
|
"grad_norm": 0.13124744594097137, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7213, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.08572140543234488, |
|
"grad_norm": 0.1552695333957672, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6868, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.08621978569648642, |
|
"grad_norm": 0.11559716612100601, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7474, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.08671816596062797, |
|
"grad_norm": 0.11131990700960159, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6365, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.0872165462247695, |
|
"grad_norm": 0.11412417143583298, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6205, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.08771492648891104, |
|
"grad_norm": 0.11382830142974854, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7673, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.08821330675305258, |
|
"grad_norm": 0.7038962244987488, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8568, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.08871168701719412, |
|
"grad_norm": 0.11253572255373001, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7263, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.08921006728133565, |
|
"grad_norm": 0.12908123433589935, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7021, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.0897084475454772, |
|
"grad_norm": 0.12027324736118317, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7542, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09020682780961874, |
|
"grad_norm": 0.13822880387306213, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7947, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.09070520807376029, |
|
"grad_norm": 0.11809349060058594, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7438, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.09120358833790182, |
|
"grad_norm": 0.11567198485136032, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7006, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.09170196860204335, |
|
"grad_norm": 0.11884818226099014, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7481, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.0922003488661849, |
|
"grad_norm": 0.13118627667427063, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7579, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.09269872913032644, |
|
"grad_norm": 0.10780288279056549, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7563, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.09319710939446797, |
|
"grad_norm": 0.1052689403295517, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7176, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.09369548965860952, |
|
"grad_norm": 0.11142247915267944, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6998, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.09419386992275106, |
|
"grad_norm": 0.11082904785871506, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7492, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.0946922501868926, |
|
"grad_norm": 0.09668837487697601, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6655, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09519063045103414, |
|
"grad_norm": 0.09926537424325943, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7393, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.09568901071517567, |
|
"grad_norm": 0.09865368157625198, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7538, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.09618739097931722, |
|
"grad_norm": 0.10074108839035034, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7556, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.09668577124345876, |
|
"grad_norm": 0.11467942595481873, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7414, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.0971841515076003, |
|
"grad_norm": 0.09638036042451859, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7296, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.09768253177174184, |
|
"grad_norm": 0.09951262921094894, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6691, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.09818091203588337, |
|
"grad_norm": 0.09425103664398193, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6563, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.09867929230002492, |
|
"grad_norm": 0.09163974225521088, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6591, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.09917767256416646, |
|
"grad_norm": 0.10825615376234055, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6748, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.09967605282830799, |
|
"grad_norm": 0.08873865008354187, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7027, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10017443309244954, |
|
"grad_norm": 0.09379550069570541, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7475, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.10067281335659108, |
|
"grad_norm": 0.09395930916070938, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7183, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.10117119362073262, |
|
"grad_norm": 0.09373954683542252, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7413, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.10166957388487416, |
|
"grad_norm": 0.0926884338259697, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7284, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1021679541490157, |
|
"grad_norm": 0.09394028782844543, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6777, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.10266633441315724, |
|
"grad_norm": 0.0934232845902443, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6389, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.10316471467729878, |
|
"grad_norm": 0.08943123370409012, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7382, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.10366309494144033, |
|
"grad_norm": 0.09671316295862198, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7017, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.10416147520558186, |
|
"grad_norm": 0.12016978114843369, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7993, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.1046598554697234, |
|
"grad_norm": 0.5822897553443909, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6948, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.10515823573386494, |
|
"grad_norm": 0.10984666645526886, |
|
"learning_rate": 0.0002, |
|
"loss": 1.703, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.10565661599800648, |
|
"grad_norm": 0.661040186882019, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7008, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.10615499626214801, |
|
"grad_norm": 0.1641639620065689, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8105, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.10665337652628956, |
|
"grad_norm": 0.34271761775016785, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7768, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.1071517567904311, |
|
"grad_norm": 0.11224206537008286, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7126, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.10765013705457264, |
|
"grad_norm": 0.11788146197795868, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7617, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.10814851731871418, |
|
"grad_norm": 0.10918893665075302, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6258, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.10864689758285571, |
|
"grad_norm": 0.12023265659809113, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7459, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.10914527784699726, |
|
"grad_norm": 0.11474837362766266, |
|
"learning_rate": 0.0002, |
|
"loss": 1.749, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.1096436581111388, |
|
"grad_norm": 0.10222747921943665, |
|
"learning_rate": 0.0002, |
|
"loss": 1.696, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11014203837528033, |
|
"grad_norm": 0.1074354350566864, |
|
"learning_rate": 0.0002, |
|
"loss": 1.708, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.11064041863942188, |
|
"grad_norm": 0.5447832345962524, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8402, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.11113879890356342, |
|
"grad_norm": 0.12009864300489426, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7412, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.11163717916770496, |
|
"grad_norm": 0.11686031520366669, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7185, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.1121355594318465, |
|
"grad_norm": 0.12914586067199707, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6867, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.11263393969598803, |
|
"grad_norm": 0.10797183215618134, |
|
"learning_rate": 0.0002, |
|
"loss": 1.706, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.11313231996012958, |
|
"grad_norm": 0.1088324561715126, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6257, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.11363070022427112, |
|
"grad_norm": 0.10438574105501175, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6798, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.11412908048841267, |
|
"grad_norm": 0.14163640141487122, |
|
"learning_rate": 0.0002, |
|
"loss": 1.785, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.1146274607525542, |
|
"grad_norm": 0.10191742330789566, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6979, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.11512584101669573, |
|
"grad_norm": 0.11547041684389114, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7793, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.11562422128083728, |
|
"grad_norm": 0.10447453707456589, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7791, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.11612260154497882, |
|
"grad_norm": 0.10447558760643005, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6799, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.11662098180912035, |
|
"grad_norm": 0.10260461270809174, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6561, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.1171193620732619, |
|
"grad_norm": 0.10199354588985443, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6476, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.11761774233740344, |
|
"grad_norm": 0.09869713336229324, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6183, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.11811612260154498, |
|
"grad_norm": 0.9354596138000488, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9584, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.11861450286568652, |
|
"grad_norm": 0.15785987675189972, |
|
"learning_rate": 0.0002, |
|
"loss": 1.718, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.11911288312982805, |
|
"grad_norm": 0.16236662864685059, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7275, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.1196112633939696, |
|
"grad_norm": 0.1407175064086914, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6987, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12010964365811114, |
|
"grad_norm": 0.13428977131843567, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6998, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.12060802392225269, |
|
"grad_norm": 0.5954437255859375, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7536, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.12110640418639422, |
|
"grad_norm": 0.12084382027387619, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6446, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.12160478445053576, |
|
"grad_norm": 0.12887060642242432, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6994, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.1221031647146773, |
|
"grad_norm": 0.12585604190826416, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6705, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.12260154497881884, |
|
"grad_norm": 0.11495430767536163, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6833, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.12309992524296037, |
|
"grad_norm": 0.36918768286705017, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8354, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.12359830550710192, |
|
"grad_norm": 0.1330924779176712, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6915, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.12409668577124346, |
|
"grad_norm": 0.6573293805122375, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7672, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.124595066035385, |
|
"grad_norm": 0.13000234961509705, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6639, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12509344629952654, |
|
"grad_norm": 0.14653077721595764, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7126, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.12559182656366807, |
|
"grad_norm": 0.13498292863368988, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6848, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.1260902068278096, |
|
"grad_norm": 0.13268351554870605, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7338, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.12658858709195117, |
|
"grad_norm": 0.1395343542098999, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7099, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.1270869673560927, |
|
"grad_norm": 0.1279151439666748, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7156, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.12758534762023424, |
|
"grad_norm": 0.112457275390625, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7054, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.12808372788437578, |
|
"grad_norm": 0.11672843992710114, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6895, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.1285821081485173, |
|
"grad_norm": 0.1295323520898819, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6738, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.12908048841265887, |
|
"grad_norm": 0.10538823157548904, |
|
"learning_rate": 0.0002, |
|
"loss": 1.626, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.1295788686768004, |
|
"grad_norm": 0.1093951016664505, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6494, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.13007724894094194, |
|
"grad_norm": 0.10753627866506577, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7058, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.13057562920508348, |
|
"grad_norm": 0.11015735566616058, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7519, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.131074009469225, |
|
"grad_norm": 0.10606027394533157, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6725, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.13157238973336655, |
|
"grad_norm": 0.09919940680265427, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6522, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.1320707699975081, |
|
"grad_norm": 0.1004357561469078, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.13256915026164964, |
|
"grad_norm": 0.1044403687119484, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7131, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.13306753052579118, |
|
"grad_norm": 0.09830351173877716, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7057, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.1335659107899327, |
|
"grad_norm": 0.09731124341487885, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6696, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.13406429105407425, |
|
"grad_norm": 0.09874913096427917, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6704, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.1345626713182158, |
|
"grad_norm": 1.0015792846679688, |
|
"learning_rate": 0.0002, |
|
"loss": 1.828, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.13506105158235734, |
|
"grad_norm": 0.15942072868347168, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6851, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.13555943184649888, |
|
"grad_norm": 0.1272728443145752, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6946, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.1360578121106404, |
|
"grad_norm": 0.13415473699569702, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6865, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.13655619237478195, |
|
"grad_norm": 0.6600972414016724, |
|
"learning_rate": 0.0002, |
|
"loss": 1.845, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.1370545726389235, |
|
"grad_norm": 0.16784119606018066, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8104, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.13755295290306505, |
|
"grad_norm": 0.14813649654388428, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7188, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.13805133316720658, |
|
"grad_norm": 0.14158020913600922, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7002, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.13854971343134811, |
|
"grad_norm": 0.48206424713134766, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8617, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.13904809369548965, |
|
"grad_norm": 0.18177767097949982, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7111, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.1395464739596312, |
|
"grad_norm": 0.12430819869041443, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6939, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.14004485422377275, |
|
"grad_norm": 0.44922658801078796, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7779, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.14054323448791428, |
|
"grad_norm": 0.14023765921592712, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6521, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.14104161475205582, |
|
"grad_norm": 0.15241369605064392, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6819, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.14153999501619735, |
|
"grad_norm": 0.12531667947769165, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7014, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.14203837528033889, |
|
"grad_norm": 0.13596689701080322, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6841, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.14253675554448045, |
|
"grad_norm": 0.1316744089126587, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7503, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.14303513580862198, |
|
"grad_norm": 0.11584890633821487, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6776, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.14353351607276352, |
|
"grad_norm": 0.37444308400154114, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7808, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.14403189633690505, |
|
"grad_norm": 0.3217577338218689, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6491, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.1445302766010466, |
|
"grad_norm": 0.12234029918909073, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7131, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.14502865686518815, |
|
"grad_norm": 0.13871504366397858, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7737, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.14552703712932968, |
|
"grad_norm": 0.10792572051286697, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7162, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.14602541739347122, |
|
"grad_norm": 0.11277946084737778, |
|
"learning_rate": 0.0002, |
|
"loss": 1.666, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.14652379765761275, |
|
"grad_norm": 0.11250103265047073, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7334, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.1470221779217543, |
|
"grad_norm": 0.10644537955522537, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6836, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.14752055818589585, |
|
"grad_norm": 0.12423089891672134, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7349, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.14801893845003739, |
|
"grad_norm": 0.10547474026679993, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6783, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.14851731871417892, |
|
"grad_norm": 0.10867539793252945, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6709, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.14901569897832045, |
|
"grad_norm": 0.21218198537826538, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6717, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.149514079242462, |
|
"grad_norm": 0.11373799294233322, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7398, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15001245950660355, |
|
"grad_norm": 0.12452666461467743, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7625, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.1505108397707451, |
|
"grad_norm": 0.4068242609500885, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7357, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.15100922003488662, |
|
"grad_norm": 0.15395419299602509, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6878, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.15150760029902816, |
|
"grad_norm": 0.11441215127706528, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7055, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.1520059805631697, |
|
"grad_norm": 0.13675518333911896, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7005, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.15250436082731123, |
|
"grad_norm": 0.11606375873088837, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6453, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.1530027410914528, |
|
"grad_norm": 0.4435337483882904, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7435, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.15350112135559432, |
|
"grad_norm": 0.12212298810482025, |
|
"learning_rate": 0.0002, |
|
"loss": 1.705, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.15399950161973586, |
|
"grad_norm": 0.14606495201587677, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6517, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.1544978818838774, |
|
"grad_norm": 0.11753024160861969, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7427, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.15499626214801893, |
|
"grad_norm": 0.13007789850234985, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7462, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.1554946424121605, |
|
"grad_norm": 0.11651528626680374, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7128, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.15599302267630202, |
|
"grad_norm": 0.1128389984369278, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6977, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.15649140294044356, |
|
"grad_norm": 0.10965872555971146, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6578, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.1569897832045851, |
|
"grad_norm": 0.10751237720251083, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6346, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.15748816346872663, |
|
"grad_norm": 0.09646358340978622, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6873, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.1579865437328682, |
|
"grad_norm": 0.09908836334943771, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6934, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.15848492399700972, |
|
"grad_norm": 0.09631779044866562, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6703, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.15898330426115126, |
|
"grad_norm": 0.5702200531959534, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7651, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.1594816845252928, |
|
"grad_norm": 0.1274351179599762, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6632, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.15998006478943433, |
|
"grad_norm": 0.10685572028160095, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6691, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.1604784450535759, |
|
"grad_norm": 0.12333345413208008, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6811, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.16097682531771743, |
|
"grad_norm": 0.10747205466032028, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6292, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.16147520558185896, |
|
"grad_norm": 0.10506169497966766, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7463, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.1619735858460005, |
|
"grad_norm": 0.11267457902431488, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7192, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.16247196611014203, |
|
"grad_norm": 0.10924848914146423, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7146, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.16297034637428356, |
|
"grad_norm": 0.11103785783052444, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6215, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.16346872663842513, |
|
"grad_norm": 0.3997076451778412, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8753, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.16396710690256666, |
|
"grad_norm": 0.10188498347997665, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7483, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.1644654871667082, |
|
"grad_norm": 0.10824645310640335, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6828, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.16496386743084973, |
|
"grad_norm": 0.09962976723909378, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7127, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.16546224769499127, |
|
"grad_norm": 0.10796276479959488, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6799, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.16596062795913283, |
|
"grad_norm": 0.09546298533678055, |
|
"learning_rate": 0.0002, |
|
"loss": 1.736, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.16645900822327436, |
|
"grad_norm": 0.3045598864555359, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6192, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.1669573884874159, |
|
"grad_norm": 0.10275569558143616, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7551, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.16745576875155743, |
|
"grad_norm": 0.14451362192630768, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7094, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.16795414901569897, |
|
"grad_norm": 0.0982123464345932, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6996, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.16845252927984053, |
|
"grad_norm": 0.11521178483963013, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6409, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.16895090954398206, |
|
"grad_norm": 0.2746621072292328, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7035, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.1694492898081236, |
|
"grad_norm": 0.0955624207854271, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6689, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.16994767007226513, |
|
"grad_norm": 0.10157962888479233, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6561, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.17044605033640667, |
|
"grad_norm": 0.0971306711435318, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7626, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.17094443060054823, |
|
"grad_norm": 0.10407841205596924, |
|
"learning_rate": 0.0002, |
|
"loss": 1.681, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.17144281086468977, |
|
"grad_norm": 0.09228493273258209, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6196, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.1719411911288313, |
|
"grad_norm": 0.10309567302465439, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6534, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.17243957139297283, |
|
"grad_norm": 0.10019028931856155, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7315, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.17293795165711437, |
|
"grad_norm": 0.09051994234323502, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6537, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.17343633192125593, |
|
"grad_norm": 0.09501929581165314, |
|
"learning_rate": 0.0002, |
|
"loss": 1.681, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.17393471218539747, |
|
"grad_norm": 0.09314325451850891, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6141, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.174433092449539, |
|
"grad_norm": 0.09021347016096115, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6864, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17493147271368054, |
|
"grad_norm": 0.27376627922058105, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7223, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.17542985297782207, |
|
"grad_norm": 0.11608853936195374, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6974, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.1759282332419636, |
|
"grad_norm": 0.09565002471208572, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6925, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.17642661350610517, |
|
"grad_norm": 0.10814974457025528, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6349, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.1769249937702467, |
|
"grad_norm": 0.09551705420017242, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6715, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.17742337403438824, |
|
"grad_norm": 0.10541266202926636, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6592, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.17792175429852977, |
|
"grad_norm": 0.09884203970432281, |
|
"learning_rate": 0.0002, |
|
"loss": 1.638, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.1784201345626713, |
|
"grad_norm": 0.19244062900543213, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6823, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.17891851482681287, |
|
"grad_norm": 0.1312815397977829, |
|
"learning_rate": 0.0002, |
|
"loss": 1.747, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.1794168950909544, |
|
"grad_norm": 0.10575084388256073, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6958, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.17991527535509594, |
|
"grad_norm": 0.1993856579065323, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5862, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.18041365561923747, |
|
"grad_norm": 0.1053745448589325, |
|
"learning_rate": 0.0002, |
|
"loss": 1.705, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.180912035883379, |
|
"grad_norm": 0.10017159581184387, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6565, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.18141041614752057, |
|
"grad_norm": 0.12066628038883209, |
|
"learning_rate": 0.0002, |
|
"loss": 1.639, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.1819087964116621, |
|
"grad_norm": 0.12606841325759888, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8435, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.18240717667580364, |
|
"grad_norm": 0.10491355508565903, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5846, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.18290555693994517, |
|
"grad_norm": 0.10337149351835251, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6903, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.1834039372040867, |
|
"grad_norm": 0.09452168643474579, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6865, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.18390231746822827, |
|
"grad_norm": 0.09799271076917648, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6343, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.1844006977323698, |
|
"grad_norm": 0.09442919492721558, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6266, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.18489907799651134, |
|
"grad_norm": 0.09542658925056458, |
|
"learning_rate": 0.0002, |
|
"loss": 1.612, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.18539745826065288, |
|
"grad_norm": 0.0989847183227539, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6957, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.1858958385247944, |
|
"grad_norm": 0.09289655089378357, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6501, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.18639421878893594, |
|
"grad_norm": 0.10097731649875641, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7114, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.1868925990530775, |
|
"grad_norm": 0.09352610260248184, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7375, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.18739097931721904, |
|
"grad_norm": 0.0907459631562233, |
|
"learning_rate": 0.0002, |
|
"loss": 1.651, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.18788935958136058, |
|
"grad_norm": 0.0915813073515892, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6289, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.1883877398455021, |
|
"grad_norm": 0.09011110663414001, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7024, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.18888612010964365, |
|
"grad_norm": 0.4069153964519501, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6647, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.1893845003737852, |
|
"grad_norm": 0.1351984292268753, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7911, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.18988288063792674, |
|
"grad_norm": 0.537133514881134, |
|
"learning_rate": 0.0002, |
|
"loss": 1.75, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.19038126090206828, |
|
"grad_norm": 0.10901357978582382, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6767, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.1908796411662098, |
|
"grad_norm": 0.19000430405139923, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6682, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.19137802143035135, |
|
"grad_norm": 0.12100650370121002, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6844, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.1918764016944929, |
|
"grad_norm": 0.12487197667360306, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7239, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.19237478195863444, |
|
"grad_norm": 0.12008525431156158, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6443, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.19287316222277598, |
|
"grad_norm": 0.119840107858181, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6271, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.1933715424869175, |
|
"grad_norm": 0.1126130223274231, |
|
"learning_rate": 0.0002, |
|
"loss": 1.681, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.19386992275105905, |
|
"grad_norm": 0.11164896190166473, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6586, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.1943683030152006, |
|
"grad_norm": 0.1496819108724594, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6856, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.19486668327934215, |
|
"grad_norm": 0.09984704852104187, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6656, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.19536506354348368, |
|
"grad_norm": 0.10864219069480896, |
|
"learning_rate": 0.0002, |
|
"loss": 1.659, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.19586344380762521, |
|
"grad_norm": 0.09744228422641754, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6162, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.19636182407176675, |
|
"grad_norm": 0.11409466713666916, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6646, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.19686020433590828, |
|
"grad_norm": 0.096027672290802, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6464, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.19735858460004985, |
|
"grad_norm": 0.48993775248527527, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7454, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.19785696486419138, |
|
"grad_norm": 0.11972647160291672, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6958, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.19835534512833292, |
|
"grad_norm": 0.49595576524734497, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6128, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.19885372539247445, |
|
"grad_norm": 0.11590411514043808, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7173, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.19935210565661599, |
|
"grad_norm": 0.11584487557411194, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6773, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19985048592075755, |
|
"grad_norm": 0.1017480343580246, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6388, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.20034886618489908, |
|
"grad_norm": 0.12011077255010605, |
|
"learning_rate": 0.0002, |
|
"loss": 1.707, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.20084724644904062, |
|
"grad_norm": 0.36016201972961426, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8179, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.20134562671318215, |
|
"grad_norm": 0.11278028786182404, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6733, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.2018440069773237, |
|
"grad_norm": 0.10928738862276077, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6858, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.20234238724146525, |
|
"grad_norm": 0.10860306769609451, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6975, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.20284076750560678, |
|
"grad_norm": 0.11352024972438812, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7504, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.20333914776974832, |
|
"grad_norm": 0.10320567339658737, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6715, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.20383752803388985, |
|
"grad_norm": 0.12056868523359299, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7571, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.2043359082980314, |
|
"grad_norm": 0.11091714352369308, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6391, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.20483428856217295, |
|
"grad_norm": 0.10888761281967163, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6763, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.20533266882631449, |
|
"grad_norm": 0.2625375986099243, |
|
"learning_rate": 0.0002, |
|
"loss": 1.58, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.20583104909045602, |
|
"grad_norm": 0.12070990353822708, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7437, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.20632942935459755, |
|
"grad_norm": 0.09670402854681015, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6502, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.2068278096187391, |
|
"grad_norm": 0.10343360900878906, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7273, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.20732618988288065, |
|
"grad_norm": 0.10445055365562439, |
|
"learning_rate": 0.0002, |
|
"loss": 1.674, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.2078245701470222, |
|
"grad_norm": 0.24325382709503174, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7492, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.20832295041116372, |
|
"grad_norm": 0.10541153699159622, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6389, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.20882133067530526, |
|
"grad_norm": 0.09688902646303177, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7145, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.2093197109394468, |
|
"grad_norm": 0.10568691790103912, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6699, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.20981809120358832, |
|
"grad_norm": 0.09683585166931152, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6411, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.2103164714677299, |
|
"grad_norm": 0.10286644101142883, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6951, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.21081485173187142, |
|
"grad_norm": 0.09786178171634674, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6316, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.21131323199601296, |
|
"grad_norm": 0.10202211886644363, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6702, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.2118116122601545, |
|
"grad_norm": 0.10444546490907669, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6371, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.21230999252429603, |
|
"grad_norm": 0.09346964955329895, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6638, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.2128083727884376, |
|
"grad_norm": 0.09578395634889603, |
|
"learning_rate": 0.0002, |
|
"loss": 1.622, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.21330675305257912, |
|
"grad_norm": 0.09412133693695068, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6292, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.21380513331672066, |
|
"grad_norm": 0.49985215067863464, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7932, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.2143035135808622, |
|
"grad_norm": 0.58636075258255, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7671, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.21480189384500373, |
|
"grad_norm": 0.12334456294775009, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6392, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.2153002741091453, |
|
"grad_norm": 0.13144731521606445, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6686, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.21579865437328682, |
|
"grad_norm": 0.14804112911224365, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7357, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.21629703463742836, |
|
"grad_norm": 0.7628450393676758, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8465, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.2167954149015699, |
|
"grad_norm": 0.18024517595767975, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6732, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.21729379516571143, |
|
"grad_norm": 0.195417121052742, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7811, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.217792175429853, |
|
"grad_norm": 0.28199324011802673, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6088, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.21829055569399453, |
|
"grad_norm": 0.15422897040843964, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7555, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.21878893595813606, |
|
"grad_norm": 0.13214194774627686, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6575, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.2192873162222776, |
|
"grad_norm": 0.14797765016555786, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7903, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.21978569648641913, |
|
"grad_norm": 0.12424055486917496, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7089, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.22028407675056066, |
|
"grad_norm": 0.5921161770820618, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7352, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.22078245701470223, |
|
"grad_norm": 0.1724957525730133, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7427, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.22128083727884376, |
|
"grad_norm": 0.1341264247894287, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6738, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.2217792175429853, |
|
"grad_norm": 0.43373820185661316, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7591, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.22227759780712683, |
|
"grad_norm": 0.15030571818351746, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7306, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.22277597807126837, |
|
"grad_norm": 0.15096893906593323, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7637, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.22327435833540993, |
|
"grad_norm": 0.1577889323234558, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6704, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.22377273859955146, |
|
"grad_norm": 0.11596284061670303, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5843, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.224271118863693, |
|
"grad_norm": 0.14083531498908997, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6502, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.22476949912783453, |
|
"grad_norm": 0.11369968950748444, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7063, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.22526787939197607, |
|
"grad_norm": 0.12249240279197693, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6041, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.22576625965611763, |
|
"grad_norm": 0.13246704638004303, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7227, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.22626463992025916, |
|
"grad_norm": 0.15372870862483978, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7364, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.2267630201844007, |
|
"grad_norm": 0.10773339122533798, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6797, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.22726140044854223, |
|
"grad_norm": 0.10603539645671844, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6608, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.22775978071268377, |
|
"grad_norm": 0.11118324100971222, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6659, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.22825816097682533, |
|
"grad_norm": 0.10193316638469696, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7149, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.22875654124096687, |
|
"grad_norm": 0.118270143866539, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6581, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.2292549215051084, |
|
"grad_norm": 0.09839551895856857, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6906, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.22975330176924993, |
|
"grad_norm": 0.10430920869112015, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6367, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.23025168203339147, |
|
"grad_norm": 0.7883297204971313, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8726, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.230750062297533, |
|
"grad_norm": 0.14015096426010132, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6885, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.23124844256167457, |
|
"grad_norm": 0.6940969824790955, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8366, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.2317468228258161, |
|
"grad_norm": 0.16839167475700378, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6627, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.23224520308995764, |
|
"grad_norm": 0.14831361174583435, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6192, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.23274358335409917, |
|
"grad_norm": 0.6374949216842651, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8086, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.2332419636182407, |
|
"grad_norm": 0.1442909985780716, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6875, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.23374034388238227, |
|
"grad_norm": 0.15487882494926453, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6939, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.2342387241465238, |
|
"grad_norm": 0.133474662899971, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6011, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.23473710441066534, |
|
"grad_norm": 0.15738508105278015, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6801, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.23523548467480687, |
|
"grad_norm": 0.13371291756629944, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6454, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.2357338649389484, |
|
"grad_norm": 0.12480079382658005, |
|
"learning_rate": 0.0002, |
|
"loss": 1.613, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.23623224520308997, |
|
"grad_norm": 0.138162761926651, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6844, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.2367306254672315, |
|
"grad_norm": 0.13453134894371033, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7113, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.23722900573137304, |
|
"grad_norm": 0.11864453554153442, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7311, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.23772738599551457, |
|
"grad_norm": 0.3905930817127228, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7638, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.2382257662596561, |
|
"grad_norm": 0.1613403707742691, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6413, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.23872414652379767, |
|
"grad_norm": 0.13828811049461365, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7163, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.2392225267879392, |
|
"grad_norm": 0.13535858690738678, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6059, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.23972090705208074, |
|
"grad_norm": 0.15594834089279175, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7161, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.24021928731622227, |
|
"grad_norm": 0.11990589648485184, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7051, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.2407176675803638, |
|
"grad_norm": 0.11655411124229431, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6711, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.24121604784450537, |
|
"grad_norm": 0.11754405498504639, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7237, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.2417144281086469, |
|
"grad_norm": 0.1332051157951355, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7598, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.24221280837278844, |
|
"grad_norm": 0.10240749269723892, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6356, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.24271118863692998, |
|
"grad_norm": 0.1425447165966034, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7993, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.2432095689010715, |
|
"grad_norm": 0.10178319364786148, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6705, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.24370794916521304, |
|
"grad_norm": 0.354878306388855, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7251, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.2442063294293546, |
|
"grad_norm": 0.10244394838809967, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5874, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.24470470969349614, |
|
"grad_norm": 0.10944903641939163, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5817, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.24520308995763768, |
|
"grad_norm": 0.11182764172554016, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6859, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.2457014702217792, |
|
"grad_norm": 0.11066277325153351, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6275, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.24619985048592075, |
|
"grad_norm": 0.6789163947105408, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8408, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.2466982307500623, |
|
"grad_norm": 0.15237462520599365, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5969, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.24719661101420384, |
|
"grad_norm": 0.14016127586364746, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6325, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.24769499127834538, |
|
"grad_norm": 0.12557458877563477, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6745, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.2481933715424869, |
|
"grad_norm": 0.12593714892864227, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7337, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.24869175180662845, |
|
"grad_norm": 0.12869895994663239, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6982, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.24919013207077, |
|
"grad_norm": 0.6727408766746521, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7735, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24968851233491154, |
|
"grad_norm": 0.18164046108722687, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7327, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.2501868925990531, |
|
"grad_norm": 0.12988890707492828, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6335, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.25068527286319464, |
|
"grad_norm": 0.14229950308799744, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6705, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.25118365312733615, |
|
"grad_norm": 0.12232649326324463, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5992, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.2516820333914777, |
|
"grad_norm": 0.12053592503070831, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5962, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.2521804136556192, |
|
"grad_norm": 0.12370762974023819, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6675, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.2526787939197608, |
|
"grad_norm": 0.11628440022468567, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6743, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.25317717418390234, |
|
"grad_norm": 0.1284741759300232, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6903, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.25367555444804385, |
|
"grad_norm": 0.133184552192688, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6735, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.2541739347121854, |
|
"grad_norm": 0.11966334283351898, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6323, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2546723149763269, |
|
"grad_norm": 0.12117716670036316, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6458, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.2551706952404685, |
|
"grad_norm": 0.11778345704078674, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6272, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.25566907550461004, |
|
"grad_norm": 0.11609595268964767, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6588, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.25616745576875155, |
|
"grad_norm": 0.11605001240968704, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6666, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.2566658360328931, |
|
"grad_norm": 0.10593124479055405, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6628, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.2571642162970346, |
|
"grad_norm": 0.11132659763097763, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7112, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.2576625965611762, |
|
"grad_norm": 0.09980247169733047, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6759, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.25816097682531775, |
|
"grad_norm": 0.6143377423286438, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6616, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.25865935708945925, |
|
"grad_norm": 0.11244726181030273, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7124, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.2591577373536008, |
|
"grad_norm": 0.6190444827079773, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7698, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2596561176177423, |
|
"grad_norm": 0.7441633939743042, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8182, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.2601544978818839, |
|
"grad_norm": 0.13578347861766815, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6609, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.2606528781460254, |
|
"grad_norm": 0.1662416160106659, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7167, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.26115125841016695, |
|
"grad_norm": 0.16020916402339935, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6636, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.2616496386743085, |
|
"grad_norm": 0.12748084962368011, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6832, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.26214801893845, |
|
"grad_norm": 0.13277047872543335, |
|
"learning_rate": 0.0002, |
|
"loss": 1.682, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.2626463992025916, |
|
"grad_norm": 0.11746570467948914, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6567, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.2631447794667331, |
|
"grad_norm": 0.1124933585524559, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6462, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.26364315973087465, |
|
"grad_norm": 0.13045774400234222, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7247, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.2641415399950162, |
|
"grad_norm": 0.11953026801347733, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6896, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2646399202591577, |
|
"grad_norm": 0.3236943185329437, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6562, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.2651383005232993, |
|
"grad_norm": 0.13000494241714478, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6329, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.2656366807874408, |
|
"grad_norm": 0.13072949647903442, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6584, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.26613506105158236, |
|
"grad_norm": 0.30452999472618103, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6066, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.2666334413157239, |
|
"grad_norm": 0.11118455231189728, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6874, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.2671318215798654, |
|
"grad_norm": 0.12459013611078262, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6959, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.267630201844007, |
|
"grad_norm": 0.10970738530158997, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6167, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.2681285821081485, |
|
"grad_norm": 0.1440659761428833, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7254, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.26862696237229006, |
|
"grad_norm": 0.11448108404874802, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6896, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.2691253426364316, |
|
"grad_norm": 0.11026275157928467, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6675, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.2696237229005731, |
|
"grad_norm": 0.10443202406167984, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7035, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.2701221031647147, |
|
"grad_norm": 0.11404629796743393, |
|
"learning_rate": 0.0002, |
|
"loss": 1.727, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.2706204834288562, |
|
"grad_norm": 0.12783807516098022, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7468, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.27111886369299776, |
|
"grad_norm": 0.1040879487991333, |
|
"learning_rate": 0.0002, |
|
"loss": 1.642, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.2716172439571393, |
|
"grad_norm": 0.10120297223329544, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6792, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.2721156242212808, |
|
"grad_norm": 0.11116039007902145, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6685, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.2726140044854224, |
|
"grad_norm": 0.353816956281662, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7458, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.2731123847495639, |
|
"grad_norm": 0.10361409932374954, |
|
"learning_rate": 0.0002, |
|
"loss": 1.583, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.27361076501370546, |
|
"grad_norm": 0.10164079070091248, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7219, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.274109145277847, |
|
"grad_norm": 0.3576943278312683, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7155, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.27460752554198853, |
|
"grad_norm": 0.1307370960712433, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6491, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.2751059058061301, |
|
"grad_norm": 0.11267419159412384, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6299, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.2756042860702716, |
|
"grad_norm": 0.10955934971570969, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6972, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.27610266633441316, |
|
"grad_norm": 0.3629993796348572, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6558, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.2766010465985547, |
|
"grad_norm": 0.10678595304489136, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7133, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.27709942686269623, |
|
"grad_norm": 0.3551732301712036, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7884, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.2775978071268378, |
|
"grad_norm": 0.1157960370182991, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6664, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.2780961873909793, |
|
"grad_norm": 0.4219015836715698, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6258, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.27859456765512086, |
|
"grad_norm": 0.1442400962114334, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7081, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.2790929479192624, |
|
"grad_norm": 0.12307796627283096, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5812, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.27959132818340393, |
|
"grad_norm": 0.13523195683956146, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6644, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.2800897084475455, |
|
"grad_norm": 0.14576253294944763, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6724, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.280588088711687, |
|
"grad_norm": 0.1239597350358963, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6501, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.28108646897582856, |
|
"grad_norm": 0.11444118618965149, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6218, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.28158484923997007, |
|
"grad_norm": 0.11568321287631989, |
|
"learning_rate": 0.0002, |
|
"loss": 1.622, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.28208322950411163, |
|
"grad_norm": 0.1155436560511589, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6856, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.2825816097682532, |
|
"grad_norm": 0.10945037007331848, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5764, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.2830799900323947, |
|
"grad_norm": 0.5043824315071106, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7022, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.28357837029653626, |
|
"grad_norm": 0.7879558801651001, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8313, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.28407675056067777, |
|
"grad_norm": 0.13888636231422424, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6418, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.28457513082481933, |
|
"grad_norm": 0.16137146949768066, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6884, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.2850735110889609, |
|
"grad_norm": 0.2237291783094406, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7934, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.2855718913531024, |
|
"grad_norm": 0.14624369144439697, |
|
"learning_rate": 0.0002, |
|
"loss": 1.676, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.28607027161724397, |
|
"grad_norm": 0.1463831216096878, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5869, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.28656865188138547, |
|
"grad_norm": 0.14725126326084137, |
|
"learning_rate": 0.0002, |
|
"loss": 1.632, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.28706703214552703, |
|
"grad_norm": 0.13732214272022247, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7513, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.2875654124096686, |
|
"grad_norm": 0.14334504306316376, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6318, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.2880637926738101, |
|
"grad_norm": 0.8194677829742432, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8945, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.28856217293795167, |
|
"grad_norm": 0.1749170422554016, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6608, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.2890605532020932, |
|
"grad_norm": 0.12977321445941925, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6363, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.28955893346623474, |
|
"grad_norm": 0.2908933162689209, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8448, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.2900573137303763, |
|
"grad_norm": 0.17108629643917084, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6822, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.2905556939945178, |
|
"grad_norm": 0.14702463150024414, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7491, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.29105407425865937, |
|
"grad_norm": 0.12582743167877197, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6245, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.2915524545228009, |
|
"grad_norm": 0.14732137322425842, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6916, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.29205083478694244, |
|
"grad_norm": 0.12849657237529755, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6583, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.292549215051084, |
|
"grad_norm": 0.11466097086668015, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6306, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.2930475953152255, |
|
"grad_norm": 0.12361207604408264, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6765, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.29354597557936707, |
|
"grad_norm": 0.1265360414981842, |
|
"learning_rate": 0.0002, |
|
"loss": 1.667, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.2940443558435086, |
|
"grad_norm": 0.11903838813304901, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6567, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.29454273610765014, |
|
"grad_norm": 0.8345243334770203, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6467, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.2950411163717917, |
|
"grad_norm": 0.1365821361541748, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7028, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.2955394966359332, |
|
"grad_norm": 0.13564884662628174, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6129, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.29603787690007477, |
|
"grad_norm": 0.13604499399662018, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7387, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.2965362571642163, |
|
"grad_norm": 0.12102136015892029, |
|
"learning_rate": 0.0002, |
|
"loss": 1.632, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.29703463742835784, |
|
"grad_norm": 0.11927222460508347, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7149, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.2975330176924994, |
|
"grad_norm": 0.10716401040554047, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6268, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.2980313979566409, |
|
"grad_norm": 0.12001641094684601, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6879, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.29852977822078247, |
|
"grad_norm": 0.11045756936073303, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6871, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.299028158484924, |
|
"grad_norm": 0.7450900077819824, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8146, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.29952653874906554, |
|
"grad_norm": 0.16306158900260925, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7092, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.3000249190132071, |
|
"grad_norm": 0.43425318598747253, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7405, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.3005232992773486, |
|
"grad_norm": 0.16279961168766022, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.3010216795414902, |
|
"grad_norm": 0.1403011977672577, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5979, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.3015200598056317, |
|
"grad_norm": 0.13146822154521942, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5689, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.30201844006977324, |
|
"grad_norm": 0.15902653336524963, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6664, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.3025168203339148, |
|
"grad_norm": 0.12351160496473312, |
|
"learning_rate": 0.0002, |
|
"loss": 1.714, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.3030152005980563, |
|
"grad_norm": 0.1543518602848053, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6432, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.3035135808621979, |
|
"grad_norm": 0.11827117949724197, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6325, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.3040119611263394, |
|
"grad_norm": 0.5559304356575012, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6789, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.30451034139048094, |
|
"grad_norm": 0.13763754069805145, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6715, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.30500872165462245, |
|
"grad_norm": 0.12646999955177307, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7162, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.305507101918764, |
|
"grad_norm": 0.34849414229393005, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6708, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.3060054821829056, |
|
"grad_norm": 0.11648757755756378, |
|
"learning_rate": 0.0002, |
|
"loss": 1.646, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.3065038624470471, |
|
"grad_norm": 0.13477148115634918, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6502, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.30700224271118864, |
|
"grad_norm": 0.1102217361330986, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6729, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.30750062297533015, |
|
"grad_norm": 0.5752671957015991, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6233, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.3079990032394717, |
|
"grad_norm": 0.13107599318027496, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6636, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.3084973835036133, |
|
"grad_norm": 0.11860768496990204, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7313, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.3089957637677548, |
|
"grad_norm": 0.1229948177933693, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6327, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.30949414403189635, |
|
"grad_norm": 0.30836552381515503, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6969, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.30999252429603785, |
|
"grad_norm": 0.11798208951950073, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7364, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.3104909045601794, |
|
"grad_norm": 0.4807080030441284, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6899, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.310989284824321, |
|
"grad_norm": 0.1726754605770111, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8045, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.3114876650884625, |
|
"grad_norm": 0.13296914100646973, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6966, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.31198604535260405, |
|
"grad_norm": 0.14966656267642975, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6685, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.31248442561674555, |
|
"grad_norm": 0.3757789731025696, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7225, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.3129828058808871, |
|
"grad_norm": 0.1234004870057106, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6204, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.3134811861450287, |
|
"grad_norm": 0.12280552089214325, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6913, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.3139795664091702, |
|
"grad_norm": 0.12360548228025436, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6808, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.31447794667331175, |
|
"grad_norm": 0.1292014867067337, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6697, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.31497632693745325, |
|
"grad_norm": 0.11038494855165482, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6103, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.3154747072015948, |
|
"grad_norm": 0.11607655137777328, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6241, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.3159730874657364, |
|
"grad_norm": 0.10514742881059647, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6922, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.3164714677298779, |
|
"grad_norm": 0.107606902718544, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6975, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.31696984799401945, |
|
"grad_norm": 0.20367765426635742, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5704, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.31746822825816096, |
|
"grad_norm": 0.10455407947301865, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7109, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.3179666085223025, |
|
"grad_norm": 0.48424893617630005, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5871, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.3184649887864441, |
|
"grad_norm": 0.16340336203575134, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6856, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.3189633690505856, |
|
"grad_norm": 0.1317445933818817, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6904, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.31946174931472715, |
|
"grad_norm": 0.12784677743911743, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6983, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.31996012957886866, |
|
"grad_norm": 0.10745134204626083, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6353, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.3204585098430102, |
|
"grad_norm": 0.1444125920534134, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7109, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.3209568901071518, |
|
"grad_norm": 0.3750239908695221, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6571, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.3214552703712933, |
|
"grad_norm": 0.11034873872995377, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6547, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.32195365063543485, |
|
"grad_norm": 0.10759663581848145, |
|
"learning_rate": 0.0002, |
|
"loss": 1.628, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.32245203089957636, |
|
"grad_norm": 0.11017131060361862, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6877, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.3229504111637179, |
|
"grad_norm": 0.1253817230463028, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7226, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.3234487914278595, |
|
"grad_norm": 0.5153695344924927, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7687, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.323947171692001, |
|
"grad_norm": 0.11948184669017792, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7044, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.32444555195614255, |
|
"grad_norm": 0.11249465495347977, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6282, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.32494393222028406, |
|
"grad_norm": 0.11555810272693634, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7295, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.3254423124844256, |
|
"grad_norm": 0.11882718652486801, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6531, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.32594069274856713, |
|
"grad_norm": 0.10453632473945618, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6342, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.3264390730127087, |
|
"grad_norm": 0.11219029873609543, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6902, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.32693745327685025, |
|
"grad_norm": 0.10499835759401321, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5583, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.32743583354099176, |
|
"grad_norm": 0.10964427143335342, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5675, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.3279342138051333, |
|
"grad_norm": 0.18510489165782928, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6178, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.32843259406927483, |
|
"grad_norm": 0.11548275500535965, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6699, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.3289309743334164, |
|
"grad_norm": 0.11357063800096512, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6008, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.32942935459755796, |
|
"grad_norm": 0.10668730735778809, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6433, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.32992773486169946, |
|
"grad_norm": 0.11750250309705734, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6813, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.330426115125841, |
|
"grad_norm": 0.8277010321617126, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7333, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.33092449538998253, |
|
"grad_norm": 0.165303573012352, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6812, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.3314228756541241, |
|
"grad_norm": 0.12780268490314484, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7106, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.33192125591826566, |
|
"grad_norm": 0.13066166639328003, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6846, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.33241963618240716, |
|
"grad_norm": 0.12650184333324432, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6144, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.3329180164465487, |
|
"grad_norm": 0.12420842051506042, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7015, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.33341639671069023, |
|
"grad_norm": 0.1261165291070938, |
|
"learning_rate": 0.0002, |
|
"loss": 1.67, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.3339147769748318, |
|
"grad_norm": 0.11121337115764618, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6772, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.33441315723897336, |
|
"grad_norm": 0.10835525393486023, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6681, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.33491153750311486, |
|
"grad_norm": 0.10837749391794205, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6268, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.3354099177672564, |
|
"grad_norm": 0.10254842787981033, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5997, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.33590829803139793, |
|
"grad_norm": 0.5288554430007935, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7397, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.3364066782955395, |
|
"grad_norm": 0.10820039361715317, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6962, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.33690505855968106, |
|
"grad_norm": 0.11754646897315979, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6059, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.33740343882382257, |
|
"grad_norm": 0.9506744742393494, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8916, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.33790181908796413, |
|
"grad_norm": 0.1273750215768814, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6896, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.33840019935210564, |
|
"grad_norm": 0.14315767586231232, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6903, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.3388985796162472, |
|
"grad_norm": 0.15645241737365723, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6823, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.33939695988038876, |
|
"grad_norm": 0.5159462690353394, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6947, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.33989534014453027, |
|
"grad_norm": 0.13883577287197113, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7448, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.34039372040867183, |
|
"grad_norm": 0.39283788204193115, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6181, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.34089210067281334, |
|
"grad_norm": 0.20534516870975494, |
|
"learning_rate": 0.0002, |
|
"loss": 1.721, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.3413904809369549, |
|
"grad_norm": 0.14379210770130157, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6955, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.34188886120109646, |
|
"grad_norm": 0.1505320966243744, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7168, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.34238724146523797, |
|
"grad_norm": 0.1377919316291809, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7001, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.34288562172937953, |
|
"grad_norm": 0.1268286257982254, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6405, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.34338400199352104, |
|
"grad_norm": 0.11991781741380692, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6862, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.3438823822576626, |
|
"grad_norm": 0.12283925712108612, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7222, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.34438076252180416, |
|
"grad_norm": 0.11207298189401627, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6477, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.34487914278594567, |
|
"grad_norm": 0.11342150717973709, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6907, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.34537752305008723, |
|
"grad_norm": 0.1479737013578415, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6982, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.34587590331422874, |
|
"grad_norm": 0.11498729884624481, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6604, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.3463742835783703, |
|
"grad_norm": 0.12394261360168457, |
|
"learning_rate": 0.0002, |
|
"loss": 1.699, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.34687266384251186, |
|
"grad_norm": 0.12563689053058624, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6637, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.34737104410665337, |
|
"grad_norm": 0.10661863535642624, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6921, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.34786942437079493, |
|
"grad_norm": 0.10778840631246567, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6719, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.34836780463493644, |
|
"grad_norm": 0.10504487156867981, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6616, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.348866184899078, |
|
"grad_norm": 0.10722413659095764, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6452, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3493645651632195, |
|
"grad_norm": 0.10450419783592224, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6342, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.34986294542736107, |
|
"grad_norm": 0.10961712151765823, |
|
"learning_rate": 0.0002, |
|
"loss": 1.68, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.35036132569150263, |
|
"grad_norm": 0.10789170861244202, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6662, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.35085970595564414, |
|
"grad_norm": 0.10823702067136765, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6733, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.3513580862197857, |
|
"grad_norm": 0.11080746352672577, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6332, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.3518564664839272, |
|
"grad_norm": 0.10004162788391113, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5841, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.3523548467480688, |
|
"grad_norm": 0.10398257523775101, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6735, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.35285322701221034, |
|
"grad_norm": 0.10170764476060867, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6584, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.35335160727635184, |
|
"grad_norm": 0.8194452524185181, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8272, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.3538499875404934, |
|
"grad_norm": 0.15103065967559814, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6954, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3543483678046349, |
|
"grad_norm": 0.12205032259225845, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6823, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.3548467480687765, |
|
"grad_norm": 0.1272657811641693, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5557, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.35534512833291804, |
|
"grad_norm": 0.503338634967804, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7847, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.35584350859705954, |
|
"grad_norm": 0.11442038416862488, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6633, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.3563418888612011, |
|
"grad_norm": 0.1573084145784378, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7377, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.3568402691253426, |
|
"grad_norm": 0.11450973153114319, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5862, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.3573386493894842, |
|
"grad_norm": 0.1249619573354721, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5954, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.35783702965362574, |
|
"grad_norm": 0.11494952440261841, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6432, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.35833540991776724, |
|
"grad_norm": 0.13213759660720825, |
|
"learning_rate": 0.0002, |
|
"loss": 1.803, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.3588337901819088, |
|
"grad_norm": 1.1261271238327026, |
|
"learning_rate": 0.0002, |
|
"loss": 1.818, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3593321704460503, |
|
"grad_norm": 1.338255524635315, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7306, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.3598305507101919, |
|
"grad_norm": 0.21815264225006104, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7224, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.36032893097433344, |
|
"grad_norm": 0.5178132653236389, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7097, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.36082731123847495, |
|
"grad_norm": 0.241803839802742, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7047, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.3613256915026165, |
|
"grad_norm": 0.20727293193340302, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7278, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.361824071766758, |
|
"grad_norm": 0.16459515690803528, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7204, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.3623224520308996, |
|
"grad_norm": 0.16415144503116608, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6764, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.36282083229504114, |
|
"grad_norm": 0.16096027195453644, |
|
"learning_rate": 0.0002, |
|
"loss": 1.665, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.36331921255918265, |
|
"grad_norm": 0.17240643501281738, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6761, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.3638175928233242, |
|
"grad_norm": 0.19763271510601044, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7402, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3643159730874657, |
|
"grad_norm": 0.15238463878631592, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6884, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.3648143533516073, |
|
"grad_norm": 0.27482038736343384, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7064, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.36531273361574884, |
|
"grad_norm": 0.5192012786865234, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8117, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.36581111387989035, |
|
"grad_norm": 0.1510191708803177, |
|
"learning_rate": 0.0002, |
|
"loss": 1.667, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.3663094941440319, |
|
"grad_norm": 0.14513470232486725, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6431, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.3668078744081734, |
|
"grad_norm": 0.7901990413665771, |
|
"learning_rate": 0.0002, |
|
"loss": 1.764, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.367306254672315, |
|
"grad_norm": 0.17642100155353546, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7096, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.36780463493645654, |
|
"grad_norm": 0.14719779789447784, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6343, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.36830301520059805, |
|
"grad_norm": 0.16173601150512695, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6937, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.3688013954647396, |
|
"grad_norm": 0.32359546422958374, |
|
"learning_rate": 0.0002, |
|
"loss": 1.681, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3692997757288811, |
|
"grad_norm": 0.14779435098171234, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6745, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.3697981559930227, |
|
"grad_norm": 0.19540923833847046, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5529, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.37029653625716424, |
|
"grad_norm": 0.13870155811309814, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6497, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.37079491652130575, |
|
"grad_norm": 0.13447612524032593, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7275, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.3712932967854473, |
|
"grad_norm": 0.13197576999664307, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6776, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.3717916770495888, |
|
"grad_norm": 0.13072870671749115, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6227, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.3722900573137304, |
|
"grad_norm": 0.13418208062648773, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6998, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.3727884375778719, |
|
"grad_norm": 0.11689562350511551, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6863, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.37328681784201345, |
|
"grad_norm": 0.1243453249335289, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6456, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.373785198106155, |
|
"grad_norm": 0.11520450562238693, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6815, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3742835783702965, |
|
"grad_norm": 0.13939018547534943, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6556, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.3747819586344381, |
|
"grad_norm": 0.11021385341882706, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6923, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.3752803388985796, |
|
"grad_norm": 0.11470180004835129, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6402, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.37577871916272115, |
|
"grad_norm": 0.12256886065006256, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7271, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.3762770994268627, |
|
"grad_norm": 0.11696486920118332, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7069, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.3767754796910042, |
|
"grad_norm": 0.11340934783220291, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6261, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.3772738599551458, |
|
"grad_norm": 0.10606078803539276, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6425, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.3777722402192873, |
|
"grad_norm": 0.12084966152906418, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6273, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.37827062048342885, |
|
"grad_norm": 0.1084008663892746, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6471, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.3787690007475704, |
|
"grad_norm": 0.11194922029972076, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6478, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3792673810117119, |
|
"grad_norm": 0.48235663771629333, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5982, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.3797657612758535, |
|
"grad_norm": 0.586637556552887, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7294, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.380264141539995, |
|
"grad_norm": 0.14328181743621826, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7112, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.38076252180413656, |
|
"grad_norm": 0.13296020030975342, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7044, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.3812609020682781, |
|
"grad_norm": 0.44004350900650024, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6377, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.3817592823324196, |
|
"grad_norm": 0.12628889083862305, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6192, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.3822576625965612, |
|
"grad_norm": 0.1330346316099167, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6461, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.3827560428607027, |
|
"grad_norm": 0.11893340200185776, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6299, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.38325442312484426, |
|
"grad_norm": 0.15412816405296326, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7436, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.3837528033889858, |
|
"grad_norm": 0.12351204454898834, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6844, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.3842511836531273, |
|
"grad_norm": 0.11671744287014008, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6748, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.3847495639172689, |
|
"grad_norm": 0.12512736022472382, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6362, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.3852479441814104, |
|
"grad_norm": 0.12629447877407074, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6033, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.38574632444555196, |
|
"grad_norm": 0.11553051322698593, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6639, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.3862447047096935, |
|
"grad_norm": 0.12756189703941345, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6397, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.386743084973835, |
|
"grad_norm": 0.11309953778982162, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6098, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.3872414652379766, |
|
"grad_norm": 0.164617121219635, |
|
"learning_rate": 0.0002, |
|
"loss": 1.54, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.3877398455021181, |
|
"grad_norm": 0.45813101530075073, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7208, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.38823822576625966, |
|
"grad_norm": 0.7587694525718689, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6195, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.3887366060304012, |
|
"grad_norm": 0.12699078023433685, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6596, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.38923498629454273, |
|
"grad_norm": 0.139120951294899, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6511, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.3897333665586843, |
|
"grad_norm": 0.13968676328659058, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7033, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.3902317468228258, |
|
"grad_norm": 0.28061848878860474, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6016, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.39073012708696736, |
|
"grad_norm": 0.11748450994491577, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5984, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.3912285073511089, |
|
"grad_norm": 0.7288643717765808, |
|
"learning_rate": 0.0002, |
|
"loss": 1.769, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.39172688761525043, |
|
"grad_norm": 0.12540021538734436, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6622, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.392225267879392, |
|
"grad_norm": 0.13594292104244232, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6626, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.3927236481435335, |
|
"grad_norm": 0.12894773483276367, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5733, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.39322202840767506, |
|
"grad_norm": 0.6577300429344177, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8085, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.39372040867181657, |
|
"grad_norm": 0.12034627795219421, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5798, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.39421878893595813, |
|
"grad_norm": 0.1254388988018036, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6677, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.3947171692000997, |
|
"grad_norm": 0.136959508061409, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6108, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.3952155494642412, |
|
"grad_norm": 0.37221673130989075, |
|
"learning_rate": 0.0002, |
|
"loss": 1.826, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.39571392972838276, |
|
"grad_norm": 0.14947831630706787, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6967, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.39621230999252427, |
|
"grad_norm": 0.1409454494714737, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7217, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.39671069025666583, |
|
"grad_norm": 0.1448691040277481, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7872, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.3972090705208074, |
|
"grad_norm": 0.12816311419010162, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6976, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.3977074507849489, |
|
"grad_norm": 0.12581898272037506, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7111, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.39820583104909046, |
|
"grad_norm": 0.1256158947944641, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6778, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.39870421131323197, |
|
"grad_norm": 0.12009266763925552, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6336, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.39920259157737353, |
|
"grad_norm": 0.14727051556110382, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7165, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.3997009718415151, |
|
"grad_norm": 1.98500394821167, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9632, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.4001993521056566, |
|
"grad_norm": 0.12300129979848862, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6003, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.40069773236979817, |
|
"grad_norm": 0.13758836686611176, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6486, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.40119611263393967, |
|
"grad_norm": 0.13127754628658295, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6673, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.40169449289808123, |
|
"grad_norm": 0.13612794876098633, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7149, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.4021928731622228, |
|
"grad_norm": 0.3637385964393616, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6486, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.4026912534263643, |
|
"grad_norm": 0.19778436422348022, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5517, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.40318963369050587, |
|
"grad_norm": 0.1478605717420578, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7642, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.4036880139546474, |
|
"grad_norm": 0.3014202415943146, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6141, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.40418639421878894, |
|
"grad_norm": 0.13049842417240143, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6579, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.4046847744829305, |
|
"grad_norm": 0.932788610458374, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7722, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.405183154747072, |
|
"grad_norm": 0.1687835305929184, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6492, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.40568153501121357, |
|
"grad_norm": 0.2024388164281845, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5523, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.4061799152753551, |
|
"grad_norm": 0.20838886499404907, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6884, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.40667829553949664, |
|
"grad_norm": 0.1490757167339325, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6936, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.4071766758036382, |
|
"grad_norm": 1.1997255086898804, |
|
"learning_rate": 0.0002, |
|
"loss": 1.873, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.4076750560677797, |
|
"grad_norm": 0.139000803232193, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7303, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.40817343633192127, |
|
"grad_norm": 0.14747615158557892, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6558, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.4086718165960628, |
|
"grad_norm": 0.15866988897323608, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6991, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.40917019686020434, |
|
"grad_norm": 0.14660963416099548, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7233, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.4096685771243459, |
|
"grad_norm": 0.14071424305438995, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6434, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.4101669573884874, |
|
"grad_norm": 0.1368856132030487, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6415, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.41066533765262897, |
|
"grad_norm": 0.14662376046180725, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7111, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.4111637179167705, |
|
"grad_norm": 0.14027300477027893, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6698, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.41166209818091204, |
|
"grad_norm": 0.5542290210723877, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6551, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.4121604784450536, |
|
"grad_norm": 0.15360352396965027, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7313, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.4126588587091951, |
|
"grad_norm": 0.14451801776885986, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6481, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.41315723897333667, |
|
"grad_norm": 0.1393883228302002, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5922, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.4136556192374782, |
|
"grad_norm": 0.13610626757144928, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6347, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.41415399950161974, |
|
"grad_norm": 0.12424327433109283, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6563, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.4146523797657613, |
|
"grad_norm": 0.127548947930336, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6609, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.4151507600299028, |
|
"grad_norm": 0.1881740391254425, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7251, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.4156491402940444, |
|
"grad_norm": 0.12144262343645096, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6922, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.4161475205581859, |
|
"grad_norm": 0.11799559742212296, |
|
"learning_rate": 0.0002, |
|
"loss": 1.672, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.41664590082232744, |
|
"grad_norm": 0.12129071354866028, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6189, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.41714428108646895, |
|
"grad_norm": 0.11648084223270416, |
|
"learning_rate": 0.0002, |
|
"loss": 1.636, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.4176426613506105, |
|
"grad_norm": 0.11401843279600143, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6266, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.4181410416147521, |
|
"grad_norm": 0.11244560778141022, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6338, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.4186394218788936, |
|
"grad_norm": 0.11274567991495132, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5518, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.41913780214303514, |
|
"grad_norm": 0.11203539371490479, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6372, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.41963618240717665, |
|
"grad_norm": 0.11548861116170883, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5787, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.4201345626713182, |
|
"grad_norm": 0.10921257734298706, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6457, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.4206329429354598, |
|
"grad_norm": 0.10832211375236511, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6613, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.4211313231996013, |
|
"grad_norm": 0.11785157024860382, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6687, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.42162970346374284, |
|
"grad_norm": 0.1575067639350891, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7148, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.42212808372788435, |
|
"grad_norm": 0.5687432885169983, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8016, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.4226264639920259, |
|
"grad_norm": 0.887058675289154, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7988, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.4231248442561675, |
|
"grad_norm": 0.12778295576572418, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6586, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.423623224520309, |
|
"grad_norm": 0.13481804728507996, |
|
"learning_rate": 0.0002, |
|
"loss": 1.696, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.42412160478445055, |
|
"grad_norm": 0.1478685438632965, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6758, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.42461998504859205, |
|
"grad_norm": 0.13414372503757477, |
|
"learning_rate": 0.0002, |
|
"loss": 1.657, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.4251183653127336, |
|
"grad_norm": 0.13211821019649506, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6403, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.4256167455768752, |
|
"grad_norm": 0.13594435155391693, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6363, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.4261151258410167, |
|
"grad_norm": 0.13266883790493011, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6632, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.42661350610515825, |
|
"grad_norm": 0.12024448066949844, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6745, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.42711188636929975, |
|
"grad_norm": 0.12828536331653595, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6493, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.4276102666334413, |
|
"grad_norm": 0.12315808236598969, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6803, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.4281086468975829, |
|
"grad_norm": 0.13026510179042816, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6536, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.4286070271617244, |
|
"grad_norm": 0.45274946093559265, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7579, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.42910540742586595, |
|
"grad_norm": 0.12899275124073029, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6603, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.42960378769000745, |
|
"grad_norm": 0.12414630502462387, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6933, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.430102167954149, |
|
"grad_norm": 0.146366149187088, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6799, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.4306005482182906, |
|
"grad_norm": 0.11743781715631485, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6395, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.4310989284824321, |
|
"grad_norm": 0.15248535573482513, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7598, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.43159730874657365, |
|
"grad_norm": 0.11914569139480591, |
|
"learning_rate": 0.0002, |
|
"loss": 1.663, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.43209568901071516, |
|
"grad_norm": 0.11982624977827072, |
|
"learning_rate": 0.0002, |
|
"loss": 1.651, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.4325940692748567, |
|
"grad_norm": 0.12126267701387405, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7153, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.4330924495389983, |
|
"grad_norm": 0.3660570979118347, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6142, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.4335908298031398, |
|
"grad_norm": 0.11174522340297699, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6199, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.43408921006728135, |
|
"grad_norm": 0.12089698761701584, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7026, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.43458759033142286, |
|
"grad_norm": 0.11779413372278214, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6757, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.4350859705955644, |
|
"grad_norm": 0.11461353302001953, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6943, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.435584350859706, |
|
"grad_norm": 0.1294202357530594, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7078, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.4360827311238475, |
|
"grad_norm": 0.1081145629286766, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6078, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.43658111138798905, |
|
"grad_norm": 0.11721238493919373, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6056, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.43707949165213056, |
|
"grad_norm": 0.11436528712511063, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6806, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.4375778719162721, |
|
"grad_norm": 0.11401306092739105, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7225, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.4380762521804137, |
|
"grad_norm": 0.11282623559236526, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6614, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.4385746324445552, |
|
"grad_norm": 0.11592991650104523, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5984, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.43907301270869675, |
|
"grad_norm": 0.10579363256692886, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6349, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.43957139297283826, |
|
"grad_norm": 0.1032218486070633, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6017, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.4400697732369798, |
|
"grad_norm": 0.10277747362852097, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6396, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.44056815350112133, |
|
"grad_norm": 0.12377838790416718, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6298, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.4410665337652629, |
|
"grad_norm": 0.10326054692268372, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6335, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.44156491402940445, |
|
"grad_norm": 0.10518341511487961, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6343, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.44206329429354596, |
|
"grad_norm": 0.10297736525535583, |
|
"learning_rate": 0.0002, |
|
"loss": 1.622, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.4425616745576875, |
|
"grad_norm": 0.10891593992710114, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6928, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.44306005482182903, |
|
"grad_norm": 0.10570312291383743, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5769, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.4435584350859706, |
|
"grad_norm": 0.10274644941091537, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7139, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.44405681535011216, |
|
"grad_norm": 0.11095419526100159, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6141, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.44455519561425366, |
|
"grad_norm": 0.14802560210227966, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6019, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.4450535758783952, |
|
"grad_norm": 0.10468854010105133, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5875, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.44555195614253673, |
|
"grad_norm": 0.10267975926399231, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6071, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.4460503364066783, |
|
"grad_norm": 0.10226966440677643, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6654, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.44654871667081986, |
|
"grad_norm": 0.1046745628118515, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6244, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.44704709693496136, |
|
"grad_norm": 0.5514235496520996, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6949, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.4475454771991029, |
|
"grad_norm": 0.10770034044981003, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6388, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.44804385746324443, |
|
"grad_norm": 0.1274634599685669, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7169, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.448542237727386, |
|
"grad_norm": 0.11944198608398438, |
|
"learning_rate": 0.0002, |
|
"loss": 1.635, |
|
"step": 900 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 4012, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.024817808581591e+19, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|