|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.907258064516129, |
|
"eval_steps": 500, |
|
"global_step": 900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010080645161290322, |
|
"grad_norm": 0.9473515748977661, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.9769, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0020161290322580645, |
|
"grad_norm": 0.9036028981208801, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.9331, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0030241935483870967, |
|
"grad_norm": 0.9499556422233582, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.9852, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004032258064516129, |
|
"grad_norm": 0.903069019317627, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.9668, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005040322580645161, |
|
"grad_norm": 0.5635794997215271, |
|
"learning_rate": 2e-05, |
|
"loss": 1.9327, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006048387096774193, |
|
"grad_norm": 0.9521661996841431, |
|
"learning_rate": 2.4e-05, |
|
"loss": 2.0026, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.007056451612903226, |
|
"grad_norm": 0.4393383860588074, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 1.8885, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.008064516129032258, |
|
"grad_norm": 0.36857879161834717, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 1.8537, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.009072580645161291, |
|
"grad_norm": 0.3844268321990967, |
|
"learning_rate": 3.6e-05, |
|
"loss": 1.8874, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.010080645161290322, |
|
"grad_norm": 0.41415101289749146, |
|
"learning_rate": 4e-05, |
|
"loss": 1.9386, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011088709677419355, |
|
"grad_norm": 0.3869949281215668, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 1.9359, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.012096774193548387, |
|
"grad_norm": 0.3345952033996582, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.903, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01310483870967742, |
|
"grad_norm": 0.3590312600135803, |
|
"learning_rate": 5.2000000000000004e-05, |
|
"loss": 1.9024, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.014112903225806451, |
|
"grad_norm": 0.2288215309381485, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 1.8431, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.015120967741935484, |
|
"grad_norm": 0.20984530448913574, |
|
"learning_rate": 6e-05, |
|
"loss": 1.8522, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.016129032258064516, |
|
"grad_norm": 0.2080329954624176, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 1.9895, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.017137096774193547, |
|
"grad_norm": 0.20060451328754425, |
|
"learning_rate": 6.800000000000001e-05, |
|
"loss": 1.8289, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.018145161290322582, |
|
"grad_norm": 0.16062042117118835, |
|
"learning_rate": 7.2e-05, |
|
"loss": 1.8823, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.019153225806451613, |
|
"grad_norm": 0.15423905849456787, |
|
"learning_rate": 7.6e-05, |
|
"loss": 1.7997, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.020161290322580645, |
|
"grad_norm": 0.15496863424777985, |
|
"learning_rate": 8e-05, |
|
"loss": 1.8237, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021169354838709676, |
|
"grad_norm": 0.16305851936340332, |
|
"learning_rate": 8.4e-05, |
|
"loss": 1.7973, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02217741935483871, |
|
"grad_norm": 0.1680663675069809, |
|
"learning_rate": 8.800000000000001e-05, |
|
"loss": 1.82, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.023185483870967742, |
|
"grad_norm": 0.16471807658672333, |
|
"learning_rate": 9.200000000000001e-05, |
|
"loss": 1.8314, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.024193548387096774, |
|
"grad_norm": 0.13601982593536377, |
|
"learning_rate": 9.6e-05, |
|
"loss": 1.8488, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.025201612903225805, |
|
"grad_norm": 0.12553684413433075, |
|
"learning_rate": 0.0001, |
|
"loss": 1.839, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02620967741935484, |
|
"grad_norm": 0.12679991126060486, |
|
"learning_rate": 0.00010400000000000001, |
|
"loss": 1.8615, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02721774193548387, |
|
"grad_norm": 0.1284348964691162, |
|
"learning_rate": 0.00010800000000000001, |
|
"loss": 1.8215, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.028225806451612902, |
|
"grad_norm": 0.11629381030797958, |
|
"learning_rate": 0.00011200000000000001, |
|
"loss": 1.8536, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.029233870967741934, |
|
"grad_norm": 0.10016848891973495, |
|
"learning_rate": 0.000116, |
|
"loss": 1.8095, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03024193548387097, |
|
"grad_norm": 0.10154619067907333, |
|
"learning_rate": 0.00012, |
|
"loss": 1.8355, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 0.11825895309448242, |
|
"learning_rate": 0.000124, |
|
"loss": 1.7984, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03225806451612903, |
|
"grad_norm": 0.104405976831913, |
|
"learning_rate": 0.00012800000000000002, |
|
"loss": 1.7673, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03326612903225806, |
|
"grad_norm": 0.09943860024213791, |
|
"learning_rate": 0.000132, |
|
"loss": 1.813, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.034274193548387094, |
|
"grad_norm": 0.10970743000507355, |
|
"learning_rate": 0.00013600000000000003, |
|
"loss": 1.9213, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03528225806451613, |
|
"grad_norm": 0.1049584224820137, |
|
"learning_rate": 0.00014, |
|
"loss": 1.7818, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.036290322580645164, |
|
"grad_norm": 0.08986247330904007, |
|
"learning_rate": 0.000144, |
|
"loss": 1.7944, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.037298387096774195, |
|
"grad_norm": 0.09243710339069366, |
|
"learning_rate": 0.000148, |
|
"loss": 1.7158, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.038306451612903226, |
|
"grad_norm": 0.10768643021583557, |
|
"learning_rate": 0.000152, |
|
"loss": 1.8295, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03931451612903226, |
|
"grad_norm": 0.07883578538894653, |
|
"learning_rate": 0.00015600000000000002, |
|
"loss": 1.757, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.04032258064516129, |
|
"grad_norm": 0.10219922661781311, |
|
"learning_rate": 0.00016, |
|
"loss": 1.7423, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04133064516129032, |
|
"grad_norm": 0.08045803755521774, |
|
"learning_rate": 0.000164, |
|
"loss": 1.7649, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04233870967741935, |
|
"grad_norm": 0.07191110402345657, |
|
"learning_rate": 0.000168, |
|
"loss": 1.7441, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04334677419354839, |
|
"grad_norm": 0.08571028709411621, |
|
"learning_rate": 0.000172, |
|
"loss": 1.8094, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04435483870967742, |
|
"grad_norm": 0.08775891363620758, |
|
"learning_rate": 0.00017600000000000002, |
|
"loss": 1.817, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04536290322580645, |
|
"grad_norm": 0.08328275382518768, |
|
"learning_rate": 0.00018, |
|
"loss": 1.7753, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.046370967741935484, |
|
"grad_norm": 0.08221882581710815, |
|
"learning_rate": 0.00018400000000000003, |
|
"loss": 1.7824, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.047379032258064516, |
|
"grad_norm": 0.0885847732424736, |
|
"learning_rate": 0.000188, |
|
"loss": 1.7423, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04838709677419355, |
|
"grad_norm": 0.08126149326562881, |
|
"learning_rate": 0.000192, |
|
"loss": 1.7495, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04939516129032258, |
|
"grad_norm": 0.08296285569667816, |
|
"learning_rate": 0.000196, |
|
"loss": 1.6909, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.05040322580645161, |
|
"grad_norm": 0.09005258232355118, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8159, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05141129032258065, |
|
"grad_norm": 0.08956532180309296, |
|
"learning_rate": 0.00019999986806600454, |
|
"loss": 1.6662, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.05241935483870968, |
|
"grad_norm": 0.08471240848302841, |
|
"learning_rate": 0.00019999947226436628, |
|
"loss": 1.8274, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.05342741935483871, |
|
"grad_norm": 0.09117641299962997, |
|
"learning_rate": 0.00019999881259612963, |
|
"loss": 1.7027, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05443548387096774, |
|
"grad_norm": 0.08552085608243942, |
|
"learning_rate": 0.00019999788906303518, |
|
"loss": 1.7738, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.055443548387096774, |
|
"grad_norm": 0.07708004862070084, |
|
"learning_rate": 0.00019999670166751993, |
|
"loss": 1.7821, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.056451612903225805, |
|
"grad_norm": 0.07826384156942368, |
|
"learning_rate": 0.000199995250412717, |
|
"loss": 1.7579, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.057459677419354836, |
|
"grad_norm": 0.0721641331911087, |
|
"learning_rate": 0.00019999353530245572, |
|
"loss": 1.7372, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.05846774193548387, |
|
"grad_norm": 0.07667742669582367, |
|
"learning_rate": 0.0001999915563412618, |
|
"loss": 1.7323, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.059475806451612906, |
|
"grad_norm": 0.10455285757780075, |
|
"learning_rate": 0.00019998931353435709, |
|
"loss": 1.8221, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.06048387096774194, |
|
"grad_norm": 0.07621350884437561, |
|
"learning_rate": 0.00019998680688765959, |
|
"loss": 1.7305, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06149193548387097, |
|
"grad_norm": 0.08454013615846634, |
|
"learning_rate": 0.00019998403640778358, |
|
"loss": 1.7558, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 0.08005455136299133, |
|
"learning_rate": 0.00019998100210203942, |
|
"loss": 1.6703, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.06350806451612903, |
|
"grad_norm": 0.09527427703142166, |
|
"learning_rate": 0.0001999777039784337, |
|
"loss": 1.7896, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06451612903225806, |
|
"grad_norm": 0.10536834597587585, |
|
"learning_rate": 0.00019997414204566915, |
|
"loss": 1.7909, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0655241935483871, |
|
"grad_norm": 0.08326593041419983, |
|
"learning_rate": 0.0001999703163131445, |
|
"loss": 1.7501, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06653225806451613, |
|
"grad_norm": 0.0823182687163353, |
|
"learning_rate": 0.00019996622679095468, |
|
"loss": 1.7625, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06754032258064516, |
|
"grad_norm": 0.07878896594047546, |
|
"learning_rate": 0.00019996187348989063, |
|
"loss": 1.7235, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.06854838709677419, |
|
"grad_norm": 0.0899212434887886, |
|
"learning_rate": 0.0001999572564214393, |
|
"loss": 1.7685, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06955645161290322, |
|
"grad_norm": 0.07247278839349747, |
|
"learning_rate": 0.00019995237559778363, |
|
"loss": 1.6281, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.07056451612903226, |
|
"grad_norm": 0.08588135987520218, |
|
"learning_rate": 0.00019994723103180265, |
|
"loss": 1.7785, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0715725806451613, |
|
"grad_norm": 0.12004637718200684, |
|
"learning_rate": 0.00019994182273707107, |
|
"loss": 1.7552, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.07258064516129033, |
|
"grad_norm": 0.1002095490694046, |
|
"learning_rate": 0.00019993615072785978, |
|
"loss": 1.715, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.07358870967741936, |
|
"grad_norm": 0.07339724153280258, |
|
"learning_rate": 0.00019993021501913536, |
|
"loss": 1.7019, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.07459677419354839, |
|
"grad_norm": 0.1305348128080368, |
|
"learning_rate": 0.00019992401562656022, |
|
"loss": 1.8078, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07560483870967742, |
|
"grad_norm": 0.09164395183324814, |
|
"learning_rate": 0.0001999175525664926, |
|
"loss": 1.6756, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07661290322580645, |
|
"grad_norm": 0.0749751552939415, |
|
"learning_rate": 0.0001999108258559864, |
|
"loss": 1.7616, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07762096774193548, |
|
"grad_norm": 0.1132885217666626, |
|
"learning_rate": 0.00019990383551279136, |
|
"loss": 1.8232, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07862903225806452, |
|
"grad_norm": 0.0832655057311058, |
|
"learning_rate": 0.00019989658155535262, |
|
"loss": 1.7371, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07963709677419355, |
|
"grad_norm": 0.09641417115926743, |
|
"learning_rate": 0.00019988906400281116, |
|
"loss": 1.7989, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.08064516129032258, |
|
"grad_norm": 0.08800283074378967, |
|
"learning_rate": 0.00019988128287500335, |
|
"loss": 1.7235, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08165322580645161, |
|
"grad_norm": 0.0772438570857048, |
|
"learning_rate": 0.00019987323819246108, |
|
"loss": 1.7488, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.08266129032258064, |
|
"grad_norm": 0.09178374707698822, |
|
"learning_rate": 0.00019986492997641175, |
|
"loss": 1.7018, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.08366935483870967, |
|
"grad_norm": 0.09313932806253433, |
|
"learning_rate": 0.00019985635824877802, |
|
"loss": 1.7914, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0846774193548387, |
|
"grad_norm": 0.0906209945678711, |
|
"learning_rate": 0.00019984752303217797, |
|
"loss": 1.7197, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08568548387096774, |
|
"grad_norm": 0.09081698209047318, |
|
"learning_rate": 0.0001998384243499249, |
|
"loss": 1.7666, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08669354838709678, |
|
"grad_norm": 0.07680635154247284, |
|
"learning_rate": 0.0001998290622260273, |
|
"loss": 1.6946, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08770161290322581, |
|
"grad_norm": 0.0743766576051712, |
|
"learning_rate": 0.00019981943668518888, |
|
"loss": 1.7588, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.08870967741935484, |
|
"grad_norm": 0.07674787193536758, |
|
"learning_rate": 0.00019980954775280832, |
|
"loss": 1.6896, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.08971774193548387, |
|
"grad_norm": 0.07708673924207687, |
|
"learning_rate": 0.00019979939545497933, |
|
"loss": 1.6944, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0907258064516129, |
|
"grad_norm": 0.07248947024345398, |
|
"learning_rate": 0.00019978897981849056, |
|
"loss": 1.7114, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09173387096774194, |
|
"grad_norm": 0.07939179986715317, |
|
"learning_rate": 0.0001997783008708256, |
|
"loss": 1.7552, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.09274193548387097, |
|
"grad_norm": 0.09288234263658524, |
|
"learning_rate": 0.00019976735864016276, |
|
"loss": 1.7554, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 0.08074582368135452, |
|
"learning_rate": 0.00019975615315537506, |
|
"loss": 1.7209, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.09475806451612903, |
|
"grad_norm": 0.08087307959794998, |
|
"learning_rate": 0.0001997446844460302, |
|
"loss": 1.7118, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.09576612903225806, |
|
"grad_norm": 0.08976717293262482, |
|
"learning_rate": 0.00019973295254239044, |
|
"loss": 1.7384, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0967741935483871, |
|
"grad_norm": 0.08545631170272827, |
|
"learning_rate": 0.0001997209574754125, |
|
"loss": 1.7524, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.09778225806451613, |
|
"grad_norm": 0.07703512907028198, |
|
"learning_rate": 0.00019970869927674753, |
|
"loss": 1.6947, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09879032258064516, |
|
"grad_norm": 0.07614375650882721, |
|
"learning_rate": 0.000199696177978741, |
|
"loss": 1.7135, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.09979838709677419, |
|
"grad_norm": 0.0809471607208252, |
|
"learning_rate": 0.0001996833936144326, |
|
"loss": 1.727, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.10080645161290322, |
|
"grad_norm": 0.1023879274725914, |
|
"learning_rate": 0.00019967034621755622, |
|
"loss": 1.7297, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10181451612903226, |
|
"grad_norm": 0.07705037295818329, |
|
"learning_rate": 0.00019965703582253965, |
|
"loss": 1.6571, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.1028225806451613, |
|
"grad_norm": 0.08601151406764984, |
|
"learning_rate": 0.00019964346246450487, |
|
"loss": 1.7404, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.10383064516129033, |
|
"grad_norm": 0.0756453350186348, |
|
"learning_rate": 0.00019962962617926756, |
|
"loss": 1.7311, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.10483870967741936, |
|
"grad_norm": 0.10456051677465439, |
|
"learning_rate": 0.00019961552700333734, |
|
"loss": 1.7517, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.10584677419354839, |
|
"grad_norm": 0.07731463760137558, |
|
"learning_rate": 0.00019960116497391733, |
|
"loss": 1.716, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.10685483870967742, |
|
"grad_norm": 0.0789295881986618, |
|
"learning_rate": 0.00019958654012890435, |
|
"loss": 1.7233, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.10786290322580645, |
|
"grad_norm": 0.08179011940956116, |
|
"learning_rate": 0.0001995716525068887, |
|
"loss": 1.6556, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.10887096774193548, |
|
"grad_norm": 0.08565866947174072, |
|
"learning_rate": 0.00019955650214715406, |
|
"loss": 1.7512, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.10987903225806452, |
|
"grad_norm": 0.08556907624006271, |
|
"learning_rate": 0.00019954108908967736, |
|
"loss": 1.7522, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.11088709677419355, |
|
"grad_norm": 0.08097026497125626, |
|
"learning_rate": 0.00019952541337512868, |
|
"loss": 1.6656, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11189516129032258, |
|
"grad_norm": 0.07853402197360992, |
|
"learning_rate": 0.0001995094750448713, |
|
"loss": 1.7299, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.11290322580645161, |
|
"grad_norm": 0.07205012440681458, |
|
"learning_rate": 0.00019949327414096134, |
|
"loss": 1.7118, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.11391129032258064, |
|
"grad_norm": 0.0683959424495697, |
|
"learning_rate": 0.00019947681070614777, |
|
"loss": 1.6742, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.11491935483870967, |
|
"grad_norm": 0.07890711724758148, |
|
"learning_rate": 0.00019946008478387238, |
|
"loss": 1.6962, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1159274193548387, |
|
"grad_norm": 0.08321288973093033, |
|
"learning_rate": 0.00019944309641826947, |
|
"loss": 1.7552, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.11693548387096774, |
|
"grad_norm": 0.0974084734916687, |
|
"learning_rate": 0.0001994258456541659, |
|
"loss": 1.7971, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.11794354838709678, |
|
"grad_norm": 0.08591660857200623, |
|
"learning_rate": 0.00019940833253708097, |
|
"loss": 1.7644, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.11895161290322581, |
|
"grad_norm": 0.07388189435005188, |
|
"learning_rate": 0.00019939055711322616, |
|
"loss": 1.6513, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.11995967741935484, |
|
"grad_norm": 0.07635471969842911, |
|
"learning_rate": 0.00019937251942950512, |
|
"loss": 1.7005, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.12096774193548387, |
|
"grad_norm": 0.08252502232789993, |
|
"learning_rate": 0.0001993542195335135, |
|
"loss": 1.7267, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1219758064516129, |
|
"grad_norm": 0.10845799744129181, |
|
"learning_rate": 0.0001993356574735389, |
|
"loss": 1.7756, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.12298387096774194, |
|
"grad_norm": 0.07942607253789902, |
|
"learning_rate": 0.00019931683329856066, |
|
"loss": 1.6849, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.12399193548387097, |
|
"grad_norm": 0.08841695636510849, |
|
"learning_rate": 0.00019929774705824973, |
|
"loss": 1.7343, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 0.09001098573207855, |
|
"learning_rate": 0.0001992783988029686, |
|
"loss": 1.7534, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.12600806451612903, |
|
"grad_norm": 0.07412228733301163, |
|
"learning_rate": 0.00019925878858377113, |
|
"loss": 1.7125, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12701612903225806, |
|
"grad_norm": 0.09205227345228195, |
|
"learning_rate": 0.00019923891645240238, |
|
"loss": 1.6712, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1280241935483871, |
|
"grad_norm": 0.07850176095962524, |
|
"learning_rate": 0.00019921878246129858, |
|
"loss": 1.6747, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.12903225806451613, |
|
"grad_norm": 0.07801543176174164, |
|
"learning_rate": 0.00019919838666358688, |
|
"loss": 1.6799, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.13004032258064516, |
|
"grad_norm": 0.08263793587684631, |
|
"learning_rate": 0.00019917772911308524, |
|
"loss": 1.7368, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.1310483870967742, |
|
"grad_norm": 0.10233369469642639, |
|
"learning_rate": 0.00019915680986430233, |
|
"loss": 1.7377, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.13205645161290322, |
|
"grad_norm": 0.08960834890604019, |
|
"learning_rate": 0.00019913562897243736, |
|
"loss": 1.7146, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.13306451612903225, |
|
"grad_norm": 0.07425748556852341, |
|
"learning_rate": 0.00019911418649337997, |
|
"loss": 1.6796, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.13407258064516128, |
|
"grad_norm": 0.11380482465028763, |
|
"learning_rate": 0.00019909248248370988, |
|
"loss": 1.7688, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.1350806451612903, |
|
"grad_norm": 0.09946684539318085, |
|
"learning_rate": 0.00019907051700069714, |
|
"loss": 1.7016, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.13608870967741934, |
|
"grad_norm": 0.07686997205018997, |
|
"learning_rate": 0.0001990482901023016, |
|
"loss": 1.7209, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.13709677419354838, |
|
"grad_norm": 0.08980387449264526, |
|
"learning_rate": 0.0001990258018471729, |
|
"loss": 1.6922, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1381048387096774, |
|
"grad_norm": 0.08946418762207031, |
|
"learning_rate": 0.00019900305229465036, |
|
"loss": 1.7231, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.13911290322580644, |
|
"grad_norm": 0.07228976488113403, |
|
"learning_rate": 0.00019898004150476278, |
|
"loss": 1.6864, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.14012096774193547, |
|
"grad_norm": 0.09577012807130814, |
|
"learning_rate": 0.00019895676953822822, |
|
"loss": 1.6812, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.14112903225806453, |
|
"grad_norm": 0.08688167482614517, |
|
"learning_rate": 0.00019893323645645404, |
|
"loss": 1.738, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.14213709677419356, |
|
"grad_norm": 0.07488682866096497, |
|
"learning_rate": 0.00019890944232153643, |
|
"loss": 1.6202, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1431451612903226, |
|
"grad_norm": 0.09752912074327469, |
|
"learning_rate": 0.00019888538719626053, |
|
"loss": 1.7006, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.14415322580645162, |
|
"grad_norm": 0.08033961057662964, |
|
"learning_rate": 0.0001988610711441001, |
|
"loss": 1.7119, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.14516129032258066, |
|
"grad_norm": 0.07507845759391785, |
|
"learning_rate": 0.00019883649422921745, |
|
"loss": 1.6504, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.1461693548387097, |
|
"grad_norm": 0.07756344974040985, |
|
"learning_rate": 0.00019881165651646317, |
|
"loss": 1.7107, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.14717741935483872, |
|
"grad_norm": 0.07581036537885666, |
|
"learning_rate": 0.00019878655807137603, |
|
"loss": 1.6777, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.14818548387096775, |
|
"grad_norm": 0.06943333894014359, |
|
"learning_rate": 0.0001987611989601828, |
|
"loss": 1.6282, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.14919354838709678, |
|
"grad_norm": 0.07314992696046829, |
|
"learning_rate": 0.00019873557924979804, |
|
"loss": 1.6773, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.1502016129032258, |
|
"grad_norm": 0.08181635290384293, |
|
"learning_rate": 0.000198709699007824, |
|
"loss": 1.668, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.15120967741935484, |
|
"grad_norm": 0.07046262919902802, |
|
"learning_rate": 0.00019868355830255033, |
|
"loss": 1.6857, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.15221774193548387, |
|
"grad_norm": 0.07162804901599884, |
|
"learning_rate": 0.00019865715720295397, |
|
"loss": 1.6299, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.1532258064516129, |
|
"grad_norm": 0.0785004273056984, |
|
"learning_rate": 0.00019863049577869898, |
|
"loss": 1.6651, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.15423387096774194, |
|
"grad_norm": 0.06895990669727325, |
|
"learning_rate": 0.00019860357410013638, |
|
"loss": 1.636, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.15524193548387097, |
|
"grad_norm": 0.0736781507730484, |
|
"learning_rate": 0.00019857639223830377, |
|
"loss": 1.6859, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 0.07190602272748947, |
|
"learning_rate": 0.00019854895026492545, |
|
"loss": 1.706, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.15725806451612903, |
|
"grad_norm": 0.07781372219324112, |
|
"learning_rate": 0.00019852124825241201, |
|
"loss": 1.7015, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.15826612903225806, |
|
"grad_norm": 0.08466929197311401, |
|
"learning_rate": 0.0001984932862738601, |
|
"loss": 1.6684, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1592741935483871, |
|
"grad_norm": 0.08189702033996582, |
|
"learning_rate": 0.00019846506440305257, |
|
"loss": 1.6914, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.16028225806451613, |
|
"grad_norm": 0.08032141625881195, |
|
"learning_rate": 0.00019843658271445776, |
|
"loss": 1.6574, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.16129032258064516, |
|
"grad_norm": 0.08438081294298172, |
|
"learning_rate": 0.00019840784128322985, |
|
"loss": 1.7503, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1622983870967742, |
|
"grad_norm": 0.10350456833839417, |
|
"learning_rate": 0.0001983788401852082, |
|
"loss": 1.697, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.16330645161290322, |
|
"grad_norm": 0.08714311569929123, |
|
"learning_rate": 0.00019834957949691747, |
|
"loss": 1.7595, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.16431451612903225, |
|
"grad_norm": 0.08562017232179642, |
|
"learning_rate": 0.00019832005929556722, |
|
"loss": 1.7502, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.16532258064516128, |
|
"grad_norm": 0.0961882621049881, |
|
"learning_rate": 0.00019829027965905186, |
|
"loss": 1.6875, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.1663306451612903, |
|
"grad_norm": 0.09505471587181091, |
|
"learning_rate": 0.00019826024066595027, |
|
"loss": 1.6958, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.16733870967741934, |
|
"grad_norm": 0.07493823021650314, |
|
"learning_rate": 0.00019822994239552573, |
|
"loss": 1.6677, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.16834677419354838, |
|
"grad_norm": 0.09159812331199646, |
|
"learning_rate": 0.00019819938492772568, |
|
"loss": 1.6994, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.1693548387096774, |
|
"grad_norm": 0.1118432804942131, |
|
"learning_rate": 0.00019816856834318155, |
|
"loss": 1.7143, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.17036290322580644, |
|
"grad_norm": 0.09199640899896622, |
|
"learning_rate": 0.0001981374927232084, |
|
"loss": 1.6896, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.17137096774193547, |
|
"grad_norm": 0.0801042765378952, |
|
"learning_rate": 0.00019810615814980483, |
|
"loss": 1.7292, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.17237903225806453, |
|
"grad_norm": 0.1115993857383728, |
|
"learning_rate": 0.00019807456470565283, |
|
"loss": 1.6995, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.17338709677419356, |
|
"grad_norm": 0.10155931115150452, |
|
"learning_rate": 0.00019804271247411727, |
|
"loss": 1.6984, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1743951612903226, |
|
"grad_norm": 0.07809167355298996, |
|
"learning_rate": 0.00019801060153924608, |
|
"loss": 1.7152, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.17540322580645162, |
|
"grad_norm": 0.08765136450529099, |
|
"learning_rate": 0.0001979782319857697, |
|
"loss": 1.6451, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.17641129032258066, |
|
"grad_norm": 0.07360592484474182, |
|
"learning_rate": 0.00019794560389910102, |
|
"loss": 1.6889, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1774193548387097, |
|
"grad_norm": 0.09308324754238129, |
|
"learning_rate": 0.00019791271736533512, |
|
"loss": 1.7225, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.17842741935483872, |
|
"grad_norm": 0.08810586482286453, |
|
"learning_rate": 0.00019787957247124907, |
|
"loss": 1.6808, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.17943548387096775, |
|
"grad_norm": 0.07750339061021805, |
|
"learning_rate": 0.00019784616930430157, |
|
"loss": 1.6324, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.18044354838709678, |
|
"grad_norm": 0.08474040031433105, |
|
"learning_rate": 0.00019781250795263295, |
|
"loss": 1.6858, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1814516129032258, |
|
"grad_norm": 0.08277326822280884, |
|
"learning_rate": 0.0001977785885050647, |
|
"loss": 1.7043, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.18245967741935484, |
|
"grad_norm": 0.07668858766555786, |
|
"learning_rate": 0.00019774441105109943, |
|
"loss": 1.6599, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.18346774193548387, |
|
"grad_norm": 0.07402200996875763, |
|
"learning_rate": 0.00019770997568092046, |
|
"loss": 1.6524, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.1844758064516129, |
|
"grad_norm": 0.08267819881439209, |
|
"learning_rate": 0.0001976752824853917, |
|
"loss": 1.6973, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.18548387096774194, |
|
"grad_norm": 0.0688646137714386, |
|
"learning_rate": 0.00019764033155605747, |
|
"loss": 1.63, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.18649193548387097, |
|
"grad_norm": 0.0818399116396904, |
|
"learning_rate": 0.00019760512298514198, |
|
"loss": 1.6773, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 0.08086924254894257, |
|
"learning_rate": 0.0001975696568655494, |
|
"loss": 1.7037, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.18850806451612903, |
|
"grad_norm": 0.08136597275733948, |
|
"learning_rate": 0.00019753393329086354, |
|
"loss": 1.6634, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.18951612903225806, |
|
"grad_norm": 0.10008742660284042, |
|
"learning_rate": 0.00019749795235534737, |
|
"loss": 1.7139, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.1905241935483871, |
|
"grad_norm": 0.08657586574554443, |
|
"learning_rate": 0.0001974617141539432, |
|
"loss": 1.6877, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.19153225806451613, |
|
"grad_norm": 0.09825193136930466, |
|
"learning_rate": 0.0001974252187822719, |
|
"loss": 1.7274, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.19254032258064516, |
|
"grad_norm": 0.06964825093746185, |
|
"learning_rate": 0.00019738846633663318, |
|
"loss": 1.6431, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.1935483870967742, |
|
"grad_norm": 0.07197541743516922, |
|
"learning_rate": 0.0001973514569140049, |
|
"loss": 1.6532, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.19455645161290322, |
|
"grad_norm": 0.07691382616758347, |
|
"learning_rate": 0.00019731419061204316, |
|
"loss": 1.6816, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.19556451612903225, |
|
"grad_norm": 0.08229187875986099, |
|
"learning_rate": 0.00019727666752908173, |
|
"loss": 1.6471, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.19657258064516128, |
|
"grad_norm": 0.0788332000374794, |
|
"learning_rate": 0.00019723888776413206, |
|
"loss": 1.6745, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1975806451612903, |
|
"grad_norm": 0.08446817100048065, |
|
"learning_rate": 0.00019720085141688285, |
|
"loss": 1.6863, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.19858870967741934, |
|
"grad_norm": 0.0747678205370903, |
|
"learning_rate": 0.00019716255858769982, |
|
"loss": 1.6553, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.19959677419354838, |
|
"grad_norm": 0.08248293399810791, |
|
"learning_rate": 0.0001971240093776255, |
|
"loss": 1.7021, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.2006048387096774, |
|
"grad_norm": 0.0832241103053093, |
|
"learning_rate": 0.00019708520388837897, |
|
"loss": 1.6832, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.20161290322580644, |
|
"grad_norm": 0.10792431235313416, |
|
"learning_rate": 0.00019704614222235543, |
|
"loss": 1.7196, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20262096774193547, |
|
"grad_norm": 0.09173596650362015, |
|
"learning_rate": 0.0001970068244826261, |
|
"loss": 1.7039, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.20362903225806453, |
|
"grad_norm": 0.07657129317522049, |
|
"learning_rate": 0.00019696725077293796, |
|
"loss": 1.6614, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.20463709677419356, |
|
"grad_norm": 0.08881079405546188, |
|
"learning_rate": 0.00019692742119771338, |
|
"loss": 1.7062, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.2056451612903226, |
|
"grad_norm": 0.11070767790079117, |
|
"learning_rate": 0.00019688733586204976, |
|
"loss": 1.7759, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.20665322580645162, |
|
"grad_norm": 0.07556972652673721, |
|
"learning_rate": 0.00019684699487171957, |
|
"loss": 1.6664, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.20766129032258066, |
|
"grad_norm": 0.11293460428714752, |
|
"learning_rate": 0.00019680639833316975, |
|
"loss": 1.7476, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.2086693548387097, |
|
"grad_norm": 0.08948105573654175, |
|
"learning_rate": 0.00019676554635352154, |
|
"loss": 1.6933, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.20967741935483872, |
|
"grad_norm": 0.1004069596529007, |
|
"learning_rate": 0.00019672443904057024, |
|
"loss": 1.6909, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.21068548387096775, |
|
"grad_norm": 0.0815928652882576, |
|
"learning_rate": 0.00019668307650278492, |
|
"loss": 1.6881, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.21169354838709678, |
|
"grad_norm": 0.10198971629142761, |
|
"learning_rate": 0.00019664145884930808, |
|
"loss": 1.6653, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2127016129032258, |
|
"grad_norm": 0.07174786180257797, |
|
"learning_rate": 0.00019659958618995532, |
|
"loss": 1.6204, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.21370967741935484, |
|
"grad_norm": 0.09819284826517105, |
|
"learning_rate": 0.0001965574586352153, |
|
"loss": 1.6574, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.21471774193548387, |
|
"grad_norm": 0.07578348368406296, |
|
"learning_rate": 0.00019651507629624902, |
|
"loss": 1.7096, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.2157258064516129, |
|
"grad_norm": 0.09160558879375458, |
|
"learning_rate": 0.00019647243928489, |
|
"loss": 1.673, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.21673387096774194, |
|
"grad_norm": 0.07697172462940216, |
|
"learning_rate": 0.00019642954771364362, |
|
"loss": 1.7069, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.21774193548387097, |
|
"grad_norm": 0.0956280305981636, |
|
"learning_rate": 0.00019638640169568702, |
|
"loss": 1.6727, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 0.0775306299328804, |
|
"learning_rate": 0.00019634300134486877, |
|
"loss": 1.6846, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.21975806451612903, |
|
"grad_norm": 0.11724736541509628, |
|
"learning_rate": 0.00019629934677570848, |
|
"loss": 1.6723, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.22076612903225806, |
|
"grad_norm": 0.08374209702014923, |
|
"learning_rate": 0.00019625543810339652, |
|
"loss": 1.6552, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.2217741935483871, |
|
"grad_norm": 0.09895430505275726, |
|
"learning_rate": 0.00019621127544379392, |
|
"loss": 1.6843, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.22278225806451613, |
|
"grad_norm": 0.07595435529947281, |
|
"learning_rate": 0.00019616685891343173, |
|
"loss": 1.6878, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.22379032258064516, |
|
"grad_norm": 0.10327397286891937, |
|
"learning_rate": 0.00019612218862951098, |
|
"loss": 1.641, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.2247983870967742, |
|
"grad_norm": 0.08979543298482895, |
|
"learning_rate": 0.00019607726470990229, |
|
"loss": 1.7116, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.22580645161290322, |
|
"grad_norm": 0.08411210030317307, |
|
"learning_rate": 0.00019603208727314543, |
|
"loss": 1.6503, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.22681451612903225, |
|
"grad_norm": 0.08849965780973434, |
|
"learning_rate": 0.00019598665643844924, |
|
"loss": 1.7119, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.22782258064516128, |
|
"grad_norm": 0.08358252048492432, |
|
"learning_rate": 0.00019594097232569118, |
|
"loss": 1.7034, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.2288306451612903, |
|
"grad_norm": 0.08862830698490143, |
|
"learning_rate": 0.0001958950350554169, |
|
"loss": 1.6937, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.22983870967741934, |
|
"grad_norm": 0.09029026329517365, |
|
"learning_rate": 0.00019584884474884025, |
|
"loss": 1.6537, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.23084677419354838, |
|
"grad_norm": 0.0766313225030899, |
|
"learning_rate": 0.00019580240152784265, |
|
"loss": 1.6399, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.2318548387096774, |
|
"grad_norm": 0.09331216663122177, |
|
"learning_rate": 0.00019575570551497287, |
|
"loss": 1.6876, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.23286290322580644, |
|
"grad_norm": 0.07506153732538223, |
|
"learning_rate": 0.00019570875683344672, |
|
"loss": 1.6339, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.23387096774193547, |
|
"grad_norm": 0.08822404593229294, |
|
"learning_rate": 0.0001956615556071468, |
|
"loss": 1.6883, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.23487903225806453, |
|
"grad_norm": 0.07617950439453125, |
|
"learning_rate": 0.000195614101960622, |
|
"loss": 1.6845, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.23588709677419356, |
|
"grad_norm": 0.0857347846031189, |
|
"learning_rate": 0.00019556639601908728, |
|
"loss": 1.6769, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.2368951612903226, |
|
"grad_norm": 0.08155297487974167, |
|
"learning_rate": 0.00019551843790842338, |
|
"loss": 1.7275, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.23790322580645162, |
|
"grad_norm": 0.08427773416042328, |
|
"learning_rate": 0.00019547022775517645, |
|
"loss": 1.627, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.23891129032258066, |
|
"grad_norm": 0.0765247493982315, |
|
"learning_rate": 0.00019542176568655757, |
|
"loss": 1.6719, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.2399193548387097, |
|
"grad_norm": 0.07752780616283417, |
|
"learning_rate": 0.00019537305183044268, |
|
"loss": 1.6307, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.24092741935483872, |
|
"grad_norm": 0.07956812530755997, |
|
"learning_rate": 0.00019532408631537203, |
|
"loss": 1.6466, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.24193548387096775, |
|
"grad_norm": 0.07456839084625244, |
|
"learning_rate": 0.00019527486927054994, |
|
"loss": 1.6692, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.24294354838709678, |
|
"grad_norm": 0.08381907641887665, |
|
"learning_rate": 0.00019522540082584443, |
|
"loss": 1.679, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.2439516129032258, |
|
"grad_norm": 0.07443513721227646, |
|
"learning_rate": 0.0001951756811117869, |
|
"loss": 1.6867, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.24495967741935484, |
|
"grad_norm": 0.08541234582662582, |
|
"learning_rate": 0.00019512571025957182, |
|
"loss": 1.6424, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.24596774193548387, |
|
"grad_norm": 0.07867056876420975, |
|
"learning_rate": 0.00019507548840105618, |
|
"loss": 1.6847, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.2469758064516129, |
|
"grad_norm": 0.11804165691137314, |
|
"learning_rate": 0.00019502501566875943, |
|
"loss": 1.783, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.24798387096774194, |
|
"grad_norm": 0.0737847164273262, |
|
"learning_rate": 0.00019497429219586296, |
|
"loss": 1.6644, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.24899193548387097, |
|
"grad_norm": 0.08608712255954742, |
|
"learning_rate": 0.00019492331811620976, |
|
"loss": 1.6763, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.09786904603242874, |
|
"learning_rate": 0.00019487209356430413, |
|
"loss": 1.7245, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.25100806451612906, |
|
"grad_norm": 0.10795535892248154, |
|
"learning_rate": 0.00019482061867531127, |
|
"loss": 1.7183, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.25201612903225806, |
|
"grad_norm": 0.0815276950597763, |
|
"learning_rate": 0.0001947688935850569, |
|
"loss": 1.7026, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2530241935483871, |
|
"grad_norm": 0.09202085435390472, |
|
"learning_rate": 0.00019471691843002701, |
|
"loss": 1.6327, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.2540322580645161, |
|
"grad_norm": 0.08682993054389954, |
|
"learning_rate": 0.00019466469334736739, |
|
"loss": 1.6532, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.2550403225806452, |
|
"grad_norm": 0.08007092773914337, |
|
"learning_rate": 0.00019461221847488333, |
|
"loss": 1.6587, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.2560483870967742, |
|
"grad_norm": 0.12094767391681671, |
|
"learning_rate": 0.0001945594939510392, |
|
"loss": 1.7491, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.25705645161290325, |
|
"grad_norm": 0.10074511170387268, |
|
"learning_rate": 0.00019450651991495812, |
|
"loss": 1.7363, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.25806451612903225, |
|
"grad_norm": 0.0891348272562027, |
|
"learning_rate": 0.00019445329650642163, |
|
"loss": 1.6925, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.2590725806451613, |
|
"grad_norm": 0.1022176444530487, |
|
"learning_rate": 0.00019439982386586932, |
|
"loss": 1.6419, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.2600806451612903, |
|
"grad_norm": 0.08925571292638779, |
|
"learning_rate": 0.00019434610213439832, |
|
"loss": 1.6575, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.2610887096774194, |
|
"grad_norm": 0.07562322169542313, |
|
"learning_rate": 0.0001942921314537631, |
|
"loss": 1.6187, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.2620967741935484, |
|
"grad_norm": 0.09982999414205551, |
|
"learning_rate": 0.000194237911966375, |
|
"loss": 1.6341, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.26310483870967744, |
|
"grad_norm": 0.08155392110347748, |
|
"learning_rate": 0.0001941834438153019, |
|
"loss": 1.7189, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.26411290322580644, |
|
"grad_norm": 0.08979921042919159, |
|
"learning_rate": 0.00019412872714426782, |
|
"loss": 1.6556, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.2651209677419355, |
|
"grad_norm": 0.08493686467409134, |
|
"learning_rate": 0.00019407376209765255, |
|
"loss": 1.6919, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.2661290322580645, |
|
"grad_norm": 0.0822565034031868, |
|
"learning_rate": 0.0001940185488204912, |
|
"loss": 1.6205, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.26713709677419356, |
|
"grad_norm": 0.08931294083595276, |
|
"learning_rate": 0.00019396308745847402, |
|
"loss": 1.6848, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.26814516129032256, |
|
"grad_norm": 0.08736932277679443, |
|
"learning_rate": 0.00019390737815794574, |
|
"loss": 1.6882, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.2691532258064516, |
|
"grad_norm": 0.09153414517641068, |
|
"learning_rate": 0.00019385142106590535, |
|
"loss": 1.7596, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.2701612903225806, |
|
"grad_norm": 0.07890645414590836, |
|
"learning_rate": 0.00019379521633000572, |
|
"loss": 1.6987, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.2711693548387097, |
|
"grad_norm": 0.08790858089923859, |
|
"learning_rate": 0.0001937387640985532, |
|
"loss": 1.6744, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.2721774193548387, |
|
"grad_norm": 0.0803663581609726, |
|
"learning_rate": 0.00019368206452050713, |
|
"loss": 1.6846, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.27318548387096775, |
|
"grad_norm": 0.09086322039365768, |
|
"learning_rate": 0.00019362511774547955, |
|
"loss": 1.6878, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.27419354838709675, |
|
"grad_norm": 0.07199586182832718, |
|
"learning_rate": 0.00019356792392373479, |
|
"loss": 1.6316, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.2752016129032258, |
|
"grad_norm": 0.08460623025894165, |
|
"learning_rate": 0.00019351048320618896, |
|
"loss": 1.6558, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.2762096774193548, |
|
"grad_norm": 0.0732608363032341, |
|
"learning_rate": 0.0001934527957444098, |
|
"loss": 1.6752, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.2772177419354839, |
|
"grad_norm": 0.0906132385134697, |
|
"learning_rate": 0.00019339486169061608, |
|
"loss": 1.7395, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2782258064516129, |
|
"grad_norm": 0.07827211916446686, |
|
"learning_rate": 0.00019333668119767716, |
|
"loss": 1.6681, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.27923387096774194, |
|
"grad_norm": 0.08276840299367905, |
|
"learning_rate": 0.00019327825441911275, |
|
"loss": 1.6645, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.28024193548387094, |
|
"grad_norm": 0.09114561229944229, |
|
"learning_rate": 0.00019321958150909243, |
|
"loss": 1.6857, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 0.08729056268930435, |
|
"learning_rate": 0.00019316066262243525, |
|
"loss": 1.6483, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.28225806451612906, |
|
"grad_norm": 0.08572946488857269, |
|
"learning_rate": 0.00019310149791460925, |
|
"loss": 1.6872, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.28326612903225806, |
|
"grad_norm": 0.10044838488101959, |
|
"learning_rate": 0.00019304208754173117, |
|
"loss": 1.6935, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.2842741935483871, |
|
"grad_norm": 0.0785636454820633, |
|
"learning_rate": 0.000192982431660566, |
|
"loss": 1.6613, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2852822580645161, |
|
"grad_norm": 0.08499724417924881, |
|
"learning_rate": 0.00019292253042852648, |
|
"loss": 1.6208, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2862903225806452, |
|
"grad_norm": 0.09399082511663437, |
|
"learning_rate": 0.00019286238400367277, |
|
"loss": 1.619, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.2872983870967742, |
|
"grad_norm": 0.07334808260202408, |
|
"learning_rate": 0.0001928019925447121, |
|
"loss": 1.6813, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.28830645161290325, |
|
"grad_norm": 0.09035395085811615, |
|
"learning_rate": 0.00019274135621099813, |
|
"loss": 1.6265, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.28931451612903225, |
|
"grad_norm": 0.07861501723527908, |
|
"learning_rate": 0.00019268047516253077, |
|
"loss": 1.6808, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.2903225806451613, |
|
"grad_norm": 0.09788773208856583, |
|
"learning_rate": 0.00019261934955995563, |
|
"loss": 1.708, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2913306451612903, |
|
"grad_norm": 0.07571721822023392, |
|
"learning_rate": 0.00019255797956456357, |
|
"loss": 1.6612, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.2923387096774194, |
|
"grad_norm": 0.0836874321103096, |
|
"learning_rate": 0.00019249636533829042, |
|
"loss": 1.6804, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2933467741935484, |
|
"grad_norm": 0.08373916894197464, |
|
"learning_rate": 0.00019243450704371632, |
|
"loss": 1.6317, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.29435483870967744, |
|
"grad_norm": 0.08029752969741821, |
|
"learning_rate": 0.00019237240484406561, |
|
"loss": 1.6782, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.29536290322580644, |
|
"grad_norm": 0.08353215456008911, |
|
"learning_rate": 0.00019231005890320602, |
|
"loss": 1.6517, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.2963709677419355, |
|
"grad_norm": 0.09467596560716629, |
|
"learning_rate": 0.00019224746938564859, |
|
"loss": 1.6862, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2973790322580645, |
|
"grad_norm": 0.10909095406532288, |
|
"learning_rate": 0.000192184636456547, |
|
"loss": 1.6579, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.29838709677419356, |
|
"grad_norm": 0.08434964716434479, |
|
"learning_rate": 0.00019212156028169724, |
|
"loss": 1.6516, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.29939516129032256, |
|
"grad_norm": 0.09146866202354431, |
|
"learning_rate": 0.00019205824102753717, |
|
"loss": 1.6754, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.3004032258064516, |
|
"grad_norm": 0.10936370491981506, |
|
"learning_rate": 0.00019199467886114603, |
|
"loss": 1.6495, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.3014112903225806, |
|
"grad_norm": 0.08099015057086945, |
|
"learning_rate": 0.00019193087395024397, |
|
"loss": 1.6656, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.3024193548387097, |
|
"grad_norm": 0.09252738207578659, |
|
"learning_rate": 0.0001918668264631918, |
|
"loss": 1.6711, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3034274193548387, |
|
"grad_norm": 0.08917499333620071, |
|
"learning_rate": 0.0001918025365689903, |
|
"loss": 1.6356, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.30443548387096775, |
|
"grad_norm": 0.088597372174263, |
|
"learning_rate": 0.00019173800443727994, |
|
"loss": 1.6659, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.30544354838709675, |
|
"grad_norm": 0.09308971464633942, |
|
"learning_rate": 0.00019167323023834033, |
|
"loss": 1.7218, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.3064516129032258, |
|
"grad_norm": 0.07813969999551773, |
|
"learning_rate": 0.00019160821414308988, |
|
"loss": 1.6042, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.3074596774193548, |
|
"grad_norm": 0.08843039721250534, |
|
"learning_rate": 0.0001915429563230853, |
|
"loss": 1.6409, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.3084677419354839, |
|
"grad_norm": 0.09537311643362045, |
|
"learning_rate": 0.00019147745695052097, |
|
"loss": 1.6723, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.3094758064516129, |
|
"grad_norm": 0.08754942566156387, |
|
"learning_rate": 0.00019141171619822882, |
|
"loss": 1.643, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.31048387096774194, |
|
"grad_norm": 0.07768256217241287, |
|
"learning_rate": 0.0001913457342396777, |
|
"loss": 1.6109, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.31149193548387094, |
|
"grad_norm": 0.09593945741653442, |
|
"learning_rate": 0.00019127951124897283, |
|
"loss": 1.6756, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 0.07348258048295975, |
|
"learning_rate": 0.00019121304740085546, |
|
"loss": 1.623, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.31350806451612906, |
|
"grad_norm": 0.08579769730567932, |
|
"learning_rate": 0.0001911463428707025, |
|
"loss": 1.658, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.31451612903225806, |
|
"grad_norm": 0.08485422283411026, |
|
"learning_rate": 0.00019107939783452577, |
|
"loss": 1.655, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.3155241935483871, |
|
"grad_norm": 0.08101114630699158, |
|
"learning_rate": 0.00019101221246897184, |
|
"loss": 1.6391, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.3165322580645161, |
|
"grad_norm": 0.08206996321678162, |
|
"learning_rate": 0.00019094478695132138, |
|
"loss": 1.6131, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.3175403225806452, |
|
"grad_norm": 0.07818609476089478, |
|
"learning_rate": 0.00019087712145948868, |
|
"loss": 1.6632, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.3185483870967742, |
|
"grad_norm": 0.09414539486169815, |
|
"learning_rate": 0.0001908092161720214, |
|
"loss": 1.6717, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.31955645161290325, |
|
"grad_norm": 0.08382460474967957, |
|
"learning_rate": 0.00019074107126809984, |
|
"loss": 1.6867, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.32056451612903225, |
|
"grad_norm": 0.07750436663627625, |
|
"learning_rate": 0.00019067268692753655, |
|
"loss": 1.6311, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.3215725806451613, |
|
"grad_norm": 0.08067768812179565, |
|
"learning_rate": 0.00019060406333077596, |
|
"loss": 1.6681, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.3225806451612903, |
|
"grad_norm": 0.074059396982193, |
|
"learning_rate": 0.00019053520065889375, |
|
"loss": 1.6408, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3235887096774194, |
|
"grad_norm": 0.10559958219528198, |
|
"learning_rate": 0.00019046609909359648, |
|
"loss": 1.7342, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.3245967741935484, |
|
"grad_norm": 0.08121935278177261, |
|
"learning_rate": 0.00019039675881722104, |
|
"loss": 1.6808, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.32560483870967744, |
|
"grad_norm": 0.08211352676153183, |
|
"learning_rate": 0.00019032718001273427, |
|
"loss": 1.6127, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.32661290322580644, |
|
"grad_norm": 0.07450398057699203, |
|
"learning_rate": 0.0001902573628637323, |
|
"loss": 1.6555, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.3276209677419355, |
|
"grad_norm": 0.0976330116391182, |
|
"learning_rate": 0.0001901873075544403, |
|
"loss": 1.6775, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.3286290322580645, |
|
"grad_norm": 0.08012880384922028, |
|
"learning_rate": 0.00019011701426971178, |
|
"loss": 1.6213, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.32963709677419356, |
|
"grad_norm": 0.08508668839931488, |
|
"learning_rate": 0.00019004648319502824, |
|
"loss": 1.5809, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.33064516129032256, |
|
"grad_norm": 0.08622655272483826, |
|
"learning_rate": 0.00018997571451649856, |
|
"loss": 1.666, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.3316532258064516, |
|
"grad_norm": 0.09803669154644012, |
|
"learning_rate": 0.00018990470842085867, |
|
"loss": 1.6784, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.3326612903225806, |
|
"grad_norm": 0.08453961461782455, |
|
"learning_rate": 0.0001898334650954709, |
|
"loss": 1.6109, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3336693548387097, |
|
"grad_norm": 0.07246208935976028, |
|
"learning_rate": 0.00018976198472832364, |
|
"loss": 1.6117, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.3346774193548387, |
|
"grad_norm": 0.08284757286310196, |
|
"learning_rate": 0.00018969026750803063, |
|
"loss": 1.6094, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.33568548387096775, |
|
"grad_norm": 0.08026500046253204, |
|
"learning_rate": 0.00018961831362383067, |
|
"loss": 1.6555, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.33669354838709675, |
|
"grad_norm": 0.08912428468465805, |
|
"learning_rate": 0.00018954612326558707, |
|
"loss": 1.6602, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.3377016129032258, |
|
"grad_norm": 0.08738451451063156, |
|
"learning_rate": 0.00018947369662378704, |
|
"loss": 1.6125, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3387096774193548, |
|
"grad_norm": 0.07017836719751358, |
|
"learning_rate": 0.00018940103388954133, |
|
"loss": 1.6173, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.3397177419354839, |
|
"grad_norm": 0.08264176547527313, |
|
"learning_rate": 0.00018932813525458363, |
|
"loss": 1.6716, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.3407258064516129, |
|
"grad_norm": 0.08516332507133484, |
|
"learning_rate": 0.00018925500091127007, |
|
"loss": 1.6752, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.34173387096774194, |
|
"grad_norm": 0.07101423293352127, |
|
"learning_rate": 0.00018918163105257883, |
|
"loss": 1.6393, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.34274193548387094, |
|
"grad_norm": 0.07172892987728119, |
|
"learning_rate": 0.00018910802587210942, |
|
"loss": 1.6116, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 0.07889813184738159, |
|
"learning_rate": 0.0001890341855640824, |
|
"loss": 1.6107, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.34475806451612906, |
|
"grad_norm": 0.07734905183315277, |
|
"learning_rate": 0.0001889601103233387, |
|
"loss": 1.6686, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.34576612903225806, |
|
"grad_norm": 0.09568161517381668, |
|
"learning_rate": 0.00018888580034533915, |
|
"loss": 1.6914, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.3467741935483871, |
|
"grad_norm": 0.0727929100394249, |
|
"learning_rate": 0.000188811255826164, |
|
"loss": 1.6271, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.3477822580645161, |
|
"grad_norm": 0.07241855561733246, |
|
"learning_rate": 0.0001887364769625124, |
|
"loss": 1.6514, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.3487903225806452, |
|
"grad_norm": 0.07215382158756256, |
|
"learning_rate": 0.00018866146395170178, |
|
"loss": 1.6578, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.3497983870967742, |
|
"grad_norm": 0.07429207116365433, |
|
"learning_rate": 0.00018858621699166755, |
|
"loss": 1.6176, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.35080645161290325, |
|
"grad_norm": 0.07516060024499893, |
|
"learning_rate": 0.00018851073628096225, |
|
"loss": 1.6735, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.35181451612903225, |
|
"grad_norm": 0.08864877372980118, |
|
"learning_rate": 0.0001884350220187554, |
|
"loss": 1.6044, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.3528225806451613, |
|
"grad_norm": 0.0749056488275528, |
|
"learning_rate": 0.00018835907440483267, |
|
"loss": 1.6316, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3538306451612903, |
|
"grad_norm": 0.09181974828243256, |
|
"learning_rate": 0.0001882828936395955, |
|
"loss": 1.6834, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.3548387096774194, |
|
"grad_norm": 0.08013599365949631, |
|
"learning_rate": 0.00018820647992406054, |
|
"loss": 1.6367, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.3558467741935484, |
|
"grad_norm": 0.0809824988245964, |
|
"learning_rate": 0.00018812983345985914, |
|
"loss": 1.658, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.35685483870967744, |
|
"grad_norm": 0.1000952199101448, |
|
"learning_rate": 0.0001880529544492368, |
|
"loss": 1.6571, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.35786290322580644, |
|
"grad_norm": 0.074663445353508, |
|
"learning_rate": 0.00018797584309505254, |
|
"loss": 1.6358, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.3588709677419355, |
|
"grad_norm": 0.0898260623216629, |
|
"learning_rate": 0.00018789849960077864, |
|
"loss": 1.6496, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.3598790322580645, |
|
"grad_norm": 0.08878135681152344, |
|
"learning_rate": 0.00018782092417049979, |
|
"loss": 1.6819, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.36088709677419356, |
|
"grad_norm": 0.07256605476140976, |
|
"learning_rate": 0.00018774311700891269, |
|
"loss": 1.6521, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.36189516129032256, |
|
"grad_norm": 0.07939675450325012, |
|
"learning_rate": 0.00018766507832132558, |
|
"loss": 1.6898, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.3629032258064516, |
|
"grad_norm": 0.07508337497711182, |
|
"learning_rate": 0.00018758680831365755, |
|
"loss": 1.6204, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3639112903225806, |
|
"grad_norm": 0.07679913192987442, |
|
"learning_rate": 0.00018750830719243812, |
|
"loss": 1.597, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.3649193548387097, |
|
"grad_norm": 0.07900839298963547, |
|
"learning_rate": 0.00018742957516480657, |
|
"loss": 1.6197, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.3659274193548387, |
|
"grad_norm": 0.08279551565647125, |
|
"learning_rate": 0.00018735061243851158, |
|
"loss": 1.7151, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.36693548387096775, |
|
"grad_norm": 0.10616319626569748, |
|
"learning_rate": 0.00018727141922191047, |
|
"loss": 1.7228, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.36794354838709675, |
|
"grad_norm": 0.08777708560228348, |
|
"learning_rate": 0.00018719199572396882, |
|
"loss": 1.6661, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.3689516129032258, |
|
"grad_norm": 0.0981433242559433, |
|
"learning_rate": 0.00018711234215425978, |
|
"loss": 1.6331, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.3699596774193548, |
|
"grad_norm": 0.07754123210906982, |
|
"learning_rate": 0.00018703245872296365, |
|
"loss": 1.6757, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.3709677419354839, |
|
"grad_norm": 0.09494742751121521, |
|
"learning_rate": 0.00018695234564086724, |
|
"loss": 1.6565, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.3719758064516129, |
|
"grad_norm": 0.100984126329422, |
|
"learning_rate": 0.00018687200311936328, |
|
"loss": 1.6879, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.37298387096774194, |
|
"grad_norm": 0.08996261656284332, |
|
"learning_rate": 0.00018679143137045006, |
|
"loss": 1.6579, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.37399193548387094, |
|
"grad_norm": 0.0966666117310524, |
|
"learning_rate": 0.00018671063060673055, |
|
"loss": 1.5853, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 0.07991211116313934, |
|
"learning_rate": 0.00018662960104141215, |
|
"loss": 1.6355, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.37600806451612906, |
|
"grad_norm": 0.09592580795288086, |
|
"learning_rate": 0.00018654834288830591, |
|
"loss": 1.6172, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.37701612903225806, |
|
"grad_norm": 0.07976924628019333, |
|
"learning_rate": 0.00018646685636182614, |
|
"loss": 1.641, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.3780241935483871, |
|
"grad_norm": 0.08822676539421082, |
|
"learning_rate": 0.00018638514167698965, |
|
"loss": 1.6267, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3790322580645161, |
|
"grad_norm": 0.07680735737085342, |
|
"learning_rate": 0.00018630319904941535, |
|
"loss": 1.6484, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.3800403225806452, |
|
"grad_norm": 0.09095903486013412, |
|
"learning_rate": 0.0001862210286953236, |
|
"loss": 1.6041, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.3810483870967742, |
|
"grad_norm": 0.07204829901456833, |
|
"learning_rate": 0.0001861386308315357, |
|
"loss": 1.6058, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.38205645161290325, |
|
"grad_norm": 0.12447134405374527, |
|
"learning_rate": 0.00018605600567547318, |
|
"loss": 1.6528, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.38306451612903225, |
|
"grad_norm": 0.08234449476003647, |
|
"learning_rate": 0.00018597315344515744, |
|
"loss": 1.6408, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3840725806451613, |
|
"grad_norm": 0.0997692123055458, |
|
"learning_rate": 0.00018589007435920892, |
|
"loss": 1.631, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.3850806451612903, |
|
"grad_norm": 0.10275771468877792, |
|
"learning_rate": 0.0001858067686368468, |
|
"loss": 1.6979, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.3860887096774194, |
|
"grad_norm": 0.07703027874231339, |
|
"learning_rate": 0.00018572323649788822, |
|
"loss": 1.6037, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.3870967741935484, |
|
"grad_norm": 0.08485141396522522, |
|
"learning_rate": 0.0001856394781627477, |
|
"loss": 1.6027, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.38810483870967744, |
|
"grad_norm": 0.09312494099140167, |
|
"learning_rate": 0.00018555549385243674, |
|
"loss": 1.6757, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.38911290322580644, |
|
"grad_norm": 0.09300917387008667, |
|
"learning_rate": 0.000185471283788563, |
|
"loss": 1.6615, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.3901209677419355, |
|
"grad_norm": 0.07911553978919983, |
|
"learning_rate": 0.0001853868481933299, |
|
"loss": 1.6214, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.3911290322580645, |
|
"grad_norm": 0.07960621267557144, |
|
"learning_rate": 0.00018530218728953597, |
|
"loss": 1.6709, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.39213709677419356, |
|
"grad_norm": 0.0723830983042717, |
|
"learning_rate": 0.0001852173013005742, |
|
"loss": 1.6287, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.39314516129032256, |
|
"grad_norm": 0.08178212493658066, |
|
"learning_rate": 0.00018513219045043156, |
|
"loss": 1.5888, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3941532258064516, |
|
"grad_norm": 0.07604778558015823, |
|
"learning_rate": 0.00018504685496368838, |
|
"loss": 1.6097, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.3951612903225806, |
|
"grad_norm": 0.07833520323038101, |
|
"learning_rate": 0.00018496129506551763, |
|
"loss": 1.6119, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.3961693548387097, |
|
"grad_norm": 0.0738687738776207, |
|
"learning_rate": 0.00018487551098168452, |
|
"loss": 1.646, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.3971774193548387, |
|
"grad_norm": 0.08156421035528183, |
|
"learning_rate": 0.0001847895029385458, |
|
"loss": 1.612, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.39818548387096775, |
|
"grad_norm": 0.0760064423084259, |
|
"learning_rate": 0.00018470327116304916, |
|
"loss": 1.6556, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.39919354838709675, |
|
"grad_norm": 0.07635514438152313, |
|
"learning_rate": 0.0001846168158827326, |
|
"loss": 1.5948, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.4002016129032258, |
|
"grad_norm": 0.07415641099214554, |
|
"learning_rate": 0.00018453013732572403, |
|
"loss": 1.6379, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.4012096774193548, |
|
"grad_norm": 0.07627629488706589, |
|
"learning_rate": 0.00018444323572074035, |
|
"loss": 1.6067, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.4022177419354839, |
|
"grad_norm": 0.08279147744178772, |
|
"learning_rate": 0.00018435611129708713, |
|
"loss": 1.6152, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.4032258064516129, |
|
"grad_norm": 0.07391797006130219, |
|
"learning_rate": 0.00018426876428465777, |
|
"loss": 1.6568, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.40423387096774194, |
|
"grad_norm": 0.07815629243850708, |
|
"learning_rate": 0.00018418119491393312, |
|
"loss": 1.6301, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.40524193548387094, |
|
"grad_norm": 0.07491758465766907, |
|
"learning_rate": 0.0001840934034159807, |
|
"loss": 1.6668, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"grad_norm": 0.07878877222537994, |
|
"learning_rate": 0.0001840053900224542, |
|
"loss": 1.6305, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.40725806451612906, |
|
"grad_norm": 0.07592154294252396, |
|
"learning_rate": 0.00018391715496559273, |
|
"loss": 1.6853, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.40826612903225806, |
|
"grad_norm": 0.082845039665699, |
|
"learning_rate": 0.00018382869847822044, |
|
"loss": 1.6918, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.4092741935483871, |
|
"grad_norm": 0.07842651754617691, |
|
"learning_rate": 0.00018374002079374569, |
|
"loss": 1.65, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.4102822580645161, |
|
"grad_norm": 0.07326355576515198, |
|
"learning_rate": 0.0001836511221461604, |
|
"loss": 1.6157, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.4112903225806452, |
|
"grad_norm": 0.08537916839122772, |
|
"learning_rate": 0.00018356200277003975, |
|
"loss": 1.5959, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.4122983870967742, |
|
"grad_norm": 0.09612290561199188, |
|
"learning_rate": 0.00018347266290054116, |
|
"loss": 1.6876, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.41330645161290325, |
|
"grad_norm": 0.07688483595848083, |
|
"learning_rate": 0.00018338310277340406, |
|
"loss": 1.6094, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.41431451612903225, |
|
"grad_norm": 0.09224136173725128, |
|
"learning_rate": 0.00018329332262494887, |
|
"loss": 1.616, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.4153225806451613, |
|
"grad_norm": 0.09629214555025101, |
|
"learning_rate": 0.00018320332269207667, |
|
"loss": 1.6197, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.4163306451612903, |
|
"grad_norm": 0.0956406518816948, |
|
"learning_rate": 0.00018311310321226853, |
|
"loss": 1.6939, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.4173387096774194, |
|
"grad_norm": 0.11505012959241867, |
|
"learning_rate": 0.00018302266442358472, |
|
"loss": 1.6692, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.4183467741935484, |
|
"grad_norm": 0.08150719106197357, |
|
"learning_rate": 0.0001829320065646643, |
|
"loss": 1.6428, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.41935483870967744, |
|
"grad_norm": 0.10705471783876419, |
|
"learning_rate": 0.0001828411298747243, |
|
"loss": 1.7328, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.42036290322580644, |
|
"grad_norm": 0.10280334204435349, |
|
"learning_rate": 0.00018275003459355924, |
|
"loss": 1.6245, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.4213709677419355, |
|
"grad_norm": 0.07620084285736084, |
|
"learning_rate": 0.00018265872096154043, |
|
"loss": 1.6317, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.4223790322580645, |
|
"grad_norm": 0.09292726963758469, |
|
"learning_rate": 0.00018256718921961525, |
|
"loss": 1.6555, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.42338709677419356, |
|
"grad_norm": 0.07884904742240906, |
|
"learning_rate": 0.00018247543960930672, |
|
"loss": 1.6325, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.42439516129032256, |
|
"grad_norm": 0.1114020049571991, |
|
"learning_rate": 0.00018238347237271266, |
|
"loss": 1.6861, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.4254032258064516, |
|
"grad_norm": 0.08363789319992065, |
|
"learning_rate": 0.00018229128775250523, |
|
"loss": 1.6398, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.4264112903225806, |
|
"grad_norm": 0.10317594558000565, |
|
"learning_rate": 0.00018219888599193008, |
|
"loss": 1.5966, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.4274193548387097, |
|
"grad_norm": 0.09324808418750763, |
|
"learning_rate": 0.00018210626733480593, |
|
"loss": 1.6463, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.4284274193548387, |
|
"grad_norm": 0.0866997167468071, |
|
"learning_rate": 0.00018201343202552367, |
|
"loss": 1.5802, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.42943548387096775, |
|
"grad_norm": 0.09528562426567078, |
|
"learning_rate": 0.00018192038030904608, |
|
"loss": 1.6768, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.43044354838709675, |
|
"grad_norm": 0.08449150621891022, |
|
"learning_rate": 0.00018182711243090678, |
|
"loss": 1.6323, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.4314516129032258, |
|
"grad_norm": 0.07713552564382553, |
|
"learning_rate": 0.00018173362863720986, |
|
"loss": 1.6264, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.4324596774193548, |
|
"grad_norm": 0.08549489825963974, |
|
"learning_rate": 0.00018163992917462918, |
|
"loss": 1.6628, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.4334677419354839, |
|
"grad_norm": 0.07783807069063187, |
|
"learning_rate": 0.00018154601429040757, |
|
"loss": 1.6892, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4344758064516129, |
|
"grad_norm": 0.09653409570455551, |
|
"learning_rate": 0.00018145188423235634, |
|
"loss": 1.6651, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.43548387096774194, |
|
"grad_norm": 0.08650687336921692, |
|
"learning_rate": 0.00018135753924885465, |
|
"loss": 1.6113, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.43649193548387094, |
|
"grad_norm": 0.08643219619989395, |
|
"learning_rate": 0.00018126297958884866, |
|
"loss": 1.6111, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 0.08586744964122772, |
|
"learning_rate": 0.00018116820550185107, |
|
"loss": 1.643, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.43850806451612906, |
|
"grad_norm": 0.09063699096441269, |
|
"learning_rate": 0.00018107321723794036, |
|
"loss": 1.6422, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.43951612903225806, |
|
"grad_norm": 0.07849163562059402, |
|
"learning_rate": 0.00018097801504776012, |
|
"loss": 1.6183, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.4405241935483871, |
|
"grad_norm": 0.07795203477144241, |
|
"learning_rate": 0.00018088259918251846, |
|
"loss": 1.6267, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.4415322580645161, |
|
"grad_norm": 0.08508776873350143, |
|
"learning_rate": 0.00018078696989398734, |
|
"loss": 1.6581, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.4425403225806452, |
|
"grad_norm": 0.08001305162906647, |
|
"learning_rate": 0.00018069112743450183, |
|
"loss": 1.6287, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.4435483870967742, |
|
"grad_norm": 0.07482777535915375, |
|
"learning_rate": 0.0001805950720569595, |
|
"loss": 1.6426, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.44455645161290325, |
|
"grad_norm": 0.07578035444021225, |
|
"learning_rate": 0.00018049880401481972, |
|
"loss": 1.6294, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.44556451612903225, |
|
"grad_norm": 0.07782859355211258, |
|
"learning_rate": 0.00018040232356210308, |
|
"loss": 1.5935, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.4465725806451613, |
|
"grad_norm": 0.07492804527282715, |
|
"learning_rate": 0.00018030563095339062, |
|
"loss": 1.5769, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.4475806451612903, |
|
"grad_norm": 0.07825621962547302, |
|
"learning_rate": 0.00018020872644382313, |
|
"loss": 1.5786, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.4485887096774194, |
|
"grad_norm": 0.09208081662654877, |
|
"learning_rate": 0.0001801116102891006, |
|
"loss": 1.6649, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.4495967741935484, |
|
"grad_norm": 0.07900070399045944, |
|
"learning_rate": 0.00018001428274548156, |
|
"loss": 1.6529, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.45060483870967744, |
|
"grad_norm": 0.07847368717193604, |
|
"learning_rate": 0.00017991674406978215, |
|
"loss": 1.6133, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.45161290322580644, |
|
"grad_norm": 0.0754162147641182, |
|
"learning_rate": 0.00017981899451937573, |
|
"loss": 1.6478, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.4526209677419355, |
|
"grad_norm": 0.08314093947410583, |
|
"learning_rate": 0.0001797210343521921, |
|
"loss": 1.5926, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.4536290322580645, |
|
"grad_norm": 0.07506029307842255, |
|
"learning_rate": 0.00017962286382671678, |
|
"loss": 1.6031, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.45463709677419356, |
|
"grad_norm": 0.09021966904401779, |
|
"learning_rate": 0.00017952448320199035, |
|
"loss": 1.5805, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.45564516129032256, |
|
"grad_norm": 0.07435688376426697, |
|
"learning_rate": 0.00017942589273760783, |
|
"loss": 1.6291, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.4566532258064516, |
|
"grad_norm": 0.07785916328430176, |
|
"learning_rate": 0.00017932709269371784, |
|
"loss": 1.6525, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.4576612903225806, |
|
"grad_norm": 0.07916136831045151, |
|
"learning_rate": 0.00017922808333102207, |
|
"loss": 1.6301, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.4586693548387097, |
|
"grad_norm": 0.08399738371372223, |
|
"learning_rate": 0.00017912886491077462, |
|
"loss": 1.6915, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4596774193548387, |
|
"grad_norm": 0.08618689328432083, |
|
"learning_rate": 0.000179029437694781, |
|
"loss": 1.6718, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.46068548387096775, |
|
"grad_norm": 0.07570008933544159, |
|
"learning_rate": 0.00017892980194539798, |
|
"loss": 1.6588, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.46169354838709675, |
|
"grad_norm": 0.09821120649576187, |
|
"learning_rate": 0.00017882995792553228, |
|
"loss": 1.6914, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.4627016129032258, |
|
"grad_norm": 0.07994726300239563, |
|
"learning_rate": 0.00017872990589864034, |
|
"loss": 1.6077, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.4637096774193548, |
|
"grad_norm": 0.08893134444952011, |
|
"learning_rate": 0.00017862964612872748, |
|
"loss": 1.6447, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4647177419354839, |
|
"grad_norm": 0.08347106724977493, |
|
"learning_rate": 0.00017852917888034706, |
|
"loss": 1.6501, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.4657258064516129, |
|
"grad_norm": 0.07879969477653503, |
|
"learning_rate": 0.00017842850441860005, |
|
"loss": 1.643, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.46673387096774194, |
|
"grad_norm": 0.08305401355028152, |
|
"learning_rate": 0.00017832762300913413, |
|
"loss": 1.677, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.46774193548387094, |
|
"grad_norm": 0.0827251598238945, |
|
"learning_rate": 0.00017822653491814304, |
|
"loss": 1.6432, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 0.08472172170877457, |
|
"learning_rate": 0.00017812524041236586, |
|
"loss": 1.654, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.46975806451612906, |
|
"grad_norm": 0.07689754664897919, |
|
"learning_rate": 0.0001780237397590864, |
|
"loss": 1.5642, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.47076612903225806, |
|
"grad_norm": 0.10658534616231918, |
|
"learning_rate": 0.00017792203322613236, |
|
"loss": 1.6561, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.4717741935483871, |
|
"grad_norm": 0.08347711712121964, |
|
"learning_rate": 0.0001778201210818748, |
|
"loss": 1.6595, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.4727822580645161, |
|
"grad_norm": 0.08595866709947586, |
|
"learning_rate": 0.0001777180035952272, |
|
"loss": 1.6185, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.4737903225806452, |
|
"grad_norm": 0.08824612945318222, |
|
"learning_rate": 0.00017761568103564487, |
|
"loss": 1.6779, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4747983870967742, |
|
"grad_norm": 0.07452390342950821, |
|
"learning_rate": 0.0001775131536731244, |
|
"loss": 1.6252, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.47580645161290325, |
|
"grad_norm": 0.09783647954463959, |
|
"learning_rate": 0.00017741042177820258, |
|
"loss": 1.6417, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.47681451612903225, |
|
"grad_norm": 0.07527977973222733, |
|
"learning_rate": 0.0001773074856219561, |
|
"loss": 1.6128, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.4778225806451613, |
|
"grad_norm": 0.07836946099996567, |
|
"learning_rate": 0.00017720434547600043, |
|
"loss": 1.625, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.4788306451612903, |
|
"grad_norm": 0.07427874952554703, |
|
"learning_rate": 0.00017710100161248945, |
|
"loss": 1.6261, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.4798387096774194, |
|
"grad_norm": 0.09168553352355957, |
|
"learning_rate": 0.0001769974543041145, |
|
"loss": 1.702, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.4808467741935484, |
|
"grad_norm": 0.0791415199637413, |
|
"learning_rate": 0.00017689370382410386, |
|
"loss": 1.6129, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.48185483870967744, |
|
"grad_norm": 0.07638856768608093, |
|
"learning_rate": 0.00017678975044622174, |
|
"loss": 1.593, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.48286290322580644, |
|
"grad_norm": 0.08905162662267685, |
|
"learning_rate": 0.00017668559444476793, |
|
"loss": 1.6803, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.4838709677419355, |
|
"grad_norm": 0.08039755374193192, |
|
"learning_rate": 0.00017658123609457668, |
|
"loss": 1.6624, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4848790322580645, |
|
"grad_norm": 0.07831753045320511, |
|
"learning_rate": 0.00017647667567101632, |
|
"loss": 1.6602, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.48588709677419356, |
|
"grad_norm": 0.07645969092845917, |
|
"learning_rate": 0.00017637191344998837, |
|
"loss": 1.6462, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.48689516129032256, |
|
"grad_norm": 0.0790887251496315, |
|
"learning_rate": 0.00017626694970792673, |
|
"loss": 1.581, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.4879032258064516, |
|
"grad_norm": 0.07644886523485184, |
|
"learning_rate": 0.00017616178472179715, |
|
"loss": 1.6035, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.4889112903225806, |
|
"grad_norm": 0.08160758763551712, |
|
"learning_rate": 0.0001760564187690964, |
|
"loss": 1.6169, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.4899193548387097, |
|
"grad_norm": 0.09234445542097092, |
|
"learning_rate": 0.00017595085212785146, |
|
"loss": 1.5878, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.4909274193548387, |
|
"grad_norm": 0.09042947739362717, |
|
"learning_rate": 0.0001758450850766189, |
|
"loss": 1.6629, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.49193548387096775, |
|
"grad_norm": 0.08583879470825195, |
|
"learning_rate": 0.00017573911789448414, |
|
"loss": 1.6398, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.49294354838709675, |
|
"grad_norm": 0.07878076285123825, |
|
"learning_rate": 0.00017563295086106063, |
|
"loss": 1.64, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.4939516129032258, |
|
"grad_norm": 0.08849604427814484, |
|
"learning_rate": 0.00017552658425648923, |
|
"loss": 1.6015, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4949596774193548, |
|
"grad_norm": 0.07961837202310562, |
|
"learning_rate": 0.00017542001836143731, |
|
"loss": 1.6392, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.4959677419354839, |
|
"grad_norm": 0.08883430808782578, |
|
"learning_rate": 0.00017531325345709816, |
|
"loss": 1.6417, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.4969758064516129, |
|
"grad_norm": 0.07420235127210617, |
|
"learning_rate": 0.00017520628982519023, |
|
"loss": 1.635, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.49798387096774194, |
|
"grad_norm": 0.08477555215358734, |
|
"learning_rate": 0.0001750991277479563, |
|
"loss": 1.6264, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.49899193548387094, |
|
"grad_norm": 0.07410185784101486, |
|
"learning_rate": 0.00017499176750816276, |
|
"loss": 1.6414, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.08427213877439499, |
|
"learning_rate": 0.00017488420938909893, |
|
"loss": 1.6546, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.501008064516129, |
|
"grad_norm": 0.0739702582359314, |
|
"learning_rate": 0.00017477645367457628, |
|
"loss": 1.6316, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.5020161290322581, |
|
"grad_norm": 0.08044146001338959, |
|
"learning_rate": 0.00017466850064892762, |
|
"loss": 1.6256, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.5030241935483871, |
|
"grad_norm": 0.08690078556537628, |
|
"learning_rate": 0.0001745603505970064, |
|
"loss": 1.589, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.5040322580645161, |
|
"grad_norm": 0.07842793315649033, |
|
"learning_rate": 0.00017445200380418607, |
|
"loss": 1.6352, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5050403225806451, |
|
"grad_norm": 0.08214239776134491, |
|
"learning_rate": 0.00017434346055635912, |
|
"loss": 1.6244, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.5060483870967742, |
|
"grad_norm": 0.07770374417304993, |
|
"learning_rate": 0.00017423472113993634, |
|
"loss": 1.65, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.5070564516129032, |
|
"grad_norm": 0.08378950506448746, |
|
"learning_rate": 0.00017412578584184637, |
|
"loss": 1.6129, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.5080645161290323, |
|
"grad_norm": 0.07839113473892212, |
|
"learning_rate": 0.00017401665494953453, |
|
"loss": 1.6479, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.5090725806451613, |
|
"grad_norm": 0.0775337815284729, |
|
"learning_rate": 0.00017390732875096227, |
|
"loss": 1.6005, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.5100806451612904, |
|
"grad_norm": 0.08532094955444336, |
|
"learning_rate": 0.00017379780753460654, |
|
"loss": 1.6669, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.5110887096774194, |
|
"grad_norm": 0.07484716176986694, |
|
"learning_rate": 0.00017368809158945872, |
|
"loss": 1.6786, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.5120967741935484, |
|
"grad_norm": 0.08861152827739716, |
|
"learning_rate": 0.00017357818120502402, |
|
"loss": 1.6753, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.5131048387096774, |
|
"grad_norm": 0.08586420863866806, |
|
"learning_rate": 0.00017346807667132085, |
|
"loss": 1.6483, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.5141129032258065, |
|
"grad_norm": 0.08970779180526733, |
|
"learning_rate": 0.00017335777827887978, |
|
"loss": 1.6776, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5151209677419355, |
|
"grad_norm": 0.08755983412265778, |
|
"learning_rate": 0.00017324728631874298, |
|
"loss": 1.6666, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.5161290322580645, |
|
"grad_norm": 0.08634518831968307, |
|
"learning_rate": 0.00017313660108246337, |
|
"loss": 1.6195, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.5171370967741935, |
|
"grad_norm": 0.08298657834529877, |
|
"learning_rate": 0.00017302572286210382, |
|
"loss": 1.5564, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.5181451612903226, |
|
"grad_norm": 0.07834544777870178, |
|
"learning_rate": 0.00017291465195023653, |
|
"loss": 1.6109, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.5191532258064516, |
|
"grad_norm": 0.09181385487318039, |
|
"learning_rate": 0.000172803388639942, |
|
"loss": 1.6387, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.5201612903225806, |
|
"grad_norm": 0.07698329538106918, |
|
"learning_rate": 0.00017269193322480856, |
|
"loss": 1.6223, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.5211693548387096, |
|
"grad_norm": 0.10118810087442398, |
|
"learning_rate": 0.00017258028599893136, |
|
"loss": 1.6365, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.5221774193548387, |
|
"grad_norm": 0.08565083891153336, |
|
"learning_rate": 0.00017246844725691166, |
|
"loss": 1.5905, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.5231854838709677, |
|
"grad_norm": 0.08563411980867386, |
|
"learning_rate": 0.00017235641729385615, |
|
"loss": 1.6141, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.5241935483870968, |
|
"grad_norm": 0.07669138163328171, |
|
"learning_rate": 0.00017224419640537598, |
|
"loss": 1.6278, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5252016129032258, |
|
"grad_norm": 0.09773047268390656, |
|
"learning_rate": 0.00017213178488758622, |
|
"loss": 1.7324, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.5262096774193549, |
|
"grad_norm": 0.07799120247364044, |
|
"learning_rate": 0.00017201918303710482, |
|
"loss": 1.5967, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.5272177419354839, |
|
"grad_norm": 0.0810832753777504, |
|
"learning_rate": 0.0001719063911510521, |
|
"loss": 1.6204, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.5282258064516129, |
|
"grad_norm": 0.08055137097835541, |
|
"learning_rate": 0.0001717934095270497, |
|
"loss": 1.6138, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.5292338709677419, |
|
"grad_norm": 0.08200159668922424, |
|
"learning_rate": 0.0001716802384632199, |
|
"loss": 1.6211, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.530241935483871, |
|
"grad_norm": 0.0793243944644928, |
|
"learning_rate": 0.00017156687825818504, |
|
"loss": 1.579, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.53125, |
|
"grad_norm": 0.08332548290491104, |
|
"learning_rate": 0.00017145332921106633, |
|
"loss": 1.5874, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.532258064516129, |
|
"grad_norm": 0.07582446932792664, |
|
"learning_rate": 0.00017133959162148336, |
|
"loss": 1.5871, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.5332661290322581, |
|
"grad_norm": 0.0803590714931488, |
|
"learning_rate": 0.00017122566578955324, |
|
"loss": 1.6451, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.5342741935483871, |
|
"grad_norm": 0.07705288380384445, |
|
"learning_rate": 0.00017111155201588978, |
|
"loss": 1.5892, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5352822580645161, |
|
"grad_norm": 0.08003994822502136, |
|
"learning_rate": 0.0001709972506016027, |
|
"loss": 1.6701, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.5362903225806451, |
|
"grad_norm": 0.07644215226173401, |
|
"learning_rate": 0.00017088276184829685, |
|
"loss": 1.6271, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.5372983870967742, |
|
"grad_norm": 0.08193427324295044, |
|
"learning_rate": 0.00017076808605807138, |
|
"loss": 1.5906, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.5383064516129032, |
|
"grad_norm": 0.08339913934469223, |
|
"learning_rate": 0.00017065322353351903, |
|
"loss": 1.6452, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.5393145161290323, |
|
"grad_norm": 0.08375068008899689, |
|
"learning_rate": 0.0001705381745777252, |
|
"loss": 1.6573, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5403225806451613, |
|
"grad_norm": 0.07980147749185562, |
|
"learning_rate": 0.00017042293949426726, |
|
"loss": 1.5999, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.5413306451612904, |
|
"grad_norm": 0.07945246994495392, |
|
"learning_rate": 0.00017030751858721375, |
|
"loss": 1.6372, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.5423387096774194, |
|
"grad_norm": 0.07931476086378098, |
|
"learning_rate": 0.00017019191216112342, |
|
"loss": 1.6244, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.5433467741935484, |
|
"grad_norm": 0.07984746247529984, |
|
"learning_rate": 0.00017007612052104474, |
|
"loss": 1.5592, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.5443548387096774, |
|
"grad_norm": 0.09376467764377594, |
|
"learning_rate": 0.00016996014397251466, |
|
"loss": 1.6774, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5453629032258065, |
|
"grad_norm": 0.08642607182264328, |
|
"learning_rate": 0.00016984398282155825, |
|
"loss": 1.6101, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.5463709677419355, |
|
"grad_norm": 0.07891902327537537, |
|
"learning_rate": 0.00016972763737468758, |
|
"loss": 1.6109, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.5473790322580645, |
|
"grad_norm": 0.07893992215394974, |
|
"learning_rate": 0.00016961110793890108, |
|
"loss": 1.643, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.5483870967741935, |
|
"grad_norm": 0.08107249438762665, |
|
"learning_rate": 0.00016949439482168255, |
|
"loss": 1.6093, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.5493951612903226, |
|
"grad_norm": 0.08450604975223541, |
|
"learning_rate": 0.00016937749833100064, |
|
"loss": 1.6406, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5504032258064516, |
|
"grad_norm": 0.08088622242212296, |
|
"learning_rate": 0.0001692604187753077, |
|
"loss": 1.6293, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.5514112903225806, |
|
"grad_norm": 0.09227669984102249, |
|
"learning_rate": 0.0001691431564635392, |
|
"loss": 1.6022, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.5524193548387096, |
|
"grad_norm": 0.08562039583921432, |
|
"learning_rate": 0.00016902571170511292, |
|
"loss": 1.6341, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.5534274193548387, |
|
"grad_norm": 0.09240545332431793, |
|
"learning_rate": 0.0001689080848099279, |
|
"loss": 1.643, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.5544354838709677, |
|
"grad_norm": 0.09082893282175064, |
|
"learning_rate": 0.00016879027608836394, |
|
"loss": 1.6132, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5554435483870968, |
|
"grad_norm": 0.08730785548686981, |
|
"learning_rate": 0.00016867228585128047, |
|
"loss": 1.631, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.5564516129032258, |
|
"grad_norm": 0.08937687426805496, |
|
"learning_rate": 0.000168554114410016, |
|
"loss": 1.7034, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.5574596774193549, |
|
"grad_norm": 0.07652641087770462, |
|
"learning_rate": 0.0001684357620763872, |
|
"loss": 1.6019, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.5584677419354839, |
|
"grad_norm": 0.08145558089017868, |
|
"learning_rate": 0.00016831722916268787, |
|
"loss": 1.6705, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.5594758064516129, |
|
"grad_norm": 0.09578656405210495, |
|
"learning_rate": 0.0001681985159816885, |
|
"loss": 1.6889, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5604838709677419, |
|
"grad_norm": 0.085781030356884, |
|
"learning_rate": 0.00016807962284663518, |
|
"loss": 1.6362, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.561491935483871, |
|
"grad_norm": 0.07998887449502945, |
|
"learning_rate": 0.0001679605500712488, |
|
"loss": 1.6045, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 0.09279566258192062, |
|
"learning_rate": 0.00016784129796972431, |
|
"loss": 1.5786, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.563508064516129, |
|
"grad_norm": 0.08150017261505127, |
|
"learning_rate": 0.0001677218668567299, |
|
"loss": 1.6313, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.5645161290322581, |
|
"grad_norm": 0.08562348783016205, |
|
"learning_rate": 0.00016760225704740594, |
|
"loss": 1.6047, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5655241935483871, |
|
"grad_norm": 0.09371492266654968, |
|
"learning_rate": 0.00016748246885736452, |
|
"loss": 1.6599, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.5665322580645161, |
|
"grad_norm": 0.08150923997163773, |
|
"learning_rate": 0.00016736250260268828, |
|
"loss": 1.6556, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.5675403225806451, |
|
"grad_norm": 0.08109602332115173, |
|
"learning_rate": 0.0001672423585999298, |
|
"loss": 1.6143, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.5685483870967742, |
|
"grad_norm": 0.07796693593263626, |
|
"learning_rate": 0.0001671220371661106, |
|
"loss": 1.6046, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.5695564516129032, |
|
"grad_norm": 0.08694635331630707, |
|
"learning_rate": 0.0001670015386187205, |
|
"loss": 1.6564, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5705645161290323, |
|
"grad_norm": 0.08142531663179398, |
|
"learning_rate": 0.00016688086327571648, |
|
"loss": 1.6406, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.5715725806451613, |
|
"grad_norm": 0.07907096296548843, |
|
"learning_rate": 0.00016676001145552228, |
|
"loss": 1.5948, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.5725806451612904, |
|
"grad_norm": 0.08147318661212921, |
|
"learning_rate": 0.0001666389834770271, |
|
"loss": 1.5789, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.5735887096774194, |
|
"grad_norm": 0.08041603118181229, |
|
"learning_rate": 0.00016651777965958503, |
|
"loss": 1.6229, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.5745967741935484, |
|
"grad_norm": 0.07601971924304962, |
|
"learning_rate": 0.00016639640032301413, |
|
"loss": 1.5722, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5756048387096774, |
|
"grad_norm": 0.08111369609832764, |
|
"learning_rate": 0.0001662748457875957, |
|
"loss": 1.6485, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.5766129032258065, |
|
"grad_norm": 0.07956349104642868, |
|
"learning_rate": 0.00016615311637407316, |
|
"loss": 1.6118, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.5776209677419355, |
|
"grad_norm": 0.08260063081979752, |
|
"learning_rate": 0.00016603121240365152, |
|
"loss": 1.6618, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.5786290322580645, |
|
"grad_norm": 0.077680803835392, |
|
"learning_rate": 0.00016590913419799633, |
|
"loss": 1.6316, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.5796370967741935, |
|
"grad_norm": 0.08391865342855453, |
|
"learning_rate": 0.00016578688207923289, |
|
"loss": 1.6273, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5806451612903226, |
|
"grad_norm": 0.08210872858762741, |
|
"learning_rate": 0.0001656644563699454, |
|
"loss": 1.6222, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.5816532258064516, |
|
"grad_norm": 0.07796725630760193, |
|
"learning_rate": 0.00016554185739317616, |
|
"loss": 1.5981, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.5826612903225806, |
|
"grad_norm": 0.0765356495976448, |
|
"learning_rate": 0.00016541908547242459, |
|
"loss": 1.6164, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.5836693548387096, |
|
"grad_norm": 0.090540811419487, |
|
"learning_rate": 0.00016529614093164648, |
|
"loss": 1.6994, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.5846774193548387, |
|
"grad_norm": 0.08444759249687195, |
|
"learning_rate": 0.00016517302409525315, |
|
"loss": 1.6154, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5856854838709677, |
|
"grad_norm": 0.0766877606511116, |
|
"learning_rate": 0.0001650497352881105, |
|
"loss": 1.6046, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.5866935483870968, |
|
"grad_norm": 0.0797574445605278, |
|
"learning_rate": 0.00016492627483553822, |
|
"loss": 1.6298, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.5877016129032258, |
|
"grad_norm": 0.07783927023410797, |
|
"learning_rate": 0.00016480264306330898, |
|
"loss": 1.5702, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.5887096774193549, |
|
"grad_norm": 0.08371485024690628, |
|
"learning_rate": 0.0001646788402976474, |
|
"loss": 1.6215, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.5897177419354839, |
|
"grad_norm": 0.08839402347803116, |
|
"learning_rate": 0.0001645548668652294, |
|
"loss": 1.5996, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5907258064516129, |
|
"grad_norm": 0.07832740247249603, |
|
"learning_rate": 0.0001644307230931811, |
|
"loss": 1.6281, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.5917338709677419, |
|
"grad_norm": 0.07553452998399734, |
|
"learning_rate": 0.00016430640930907827, |
|
"loss": 1.6147, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.592741935483871, |
|
"grad_norm": 0.07809963822364807, |
|
"learning_rate": 0.00016418192584094515, |
|
"loss": 1.5993, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.59375, |
|
"grad_norm": 0.07688596844673157, |
|
"learning_rate": 0.00016405727301725377, |
|
"loss": 1.6019, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.594758064516129, |
|
"grad_norm": 0.07611083984375, |
|
"learning_rate": 0.00016393245116692304, |
|
"loss": 1.5689, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5957661290322581, |
|
"grad_norm": 0.08132312446832657, |
|
"learning_rate": 0.00016380746061931786, |
|
"loss": 1.6307, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.5967741935483871, |
|
"grad_norm": 0.07959824800491333, |
|
"learning_rate": 0.00016368230170424826, |
|
"loss": 1.5851, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.5977822580645161, |
|
"grad_norm": 0.08210327476263046, |
|
"learning_rate": 0.0001635569747519686, |
|
"loss": 1.6139, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.5987903225806451, |
|
"grad_norm": 0.1014091745018959, |
|
"learning_rate": 0.00016343148009317657, |
|
"loss": 1.564, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.5997983870967742, |
|
"grad_norm": 0.08163224905729294, |
|
"learning_rate": 0.00016330581805901239, |
|
"loss": 1.5896, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.6008064516129032, |
|
"grad_norm": 0.08205213397741318, |
|
"learning_rate": 0.00016317998898105797, |
|
"loss": 1.6271, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.6018145161290323, |
|
"grad_norm": 0.07970026135444641, |
|
"learning_rate": 0.00016305399319133595, |
|
"loss": 1.6024, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.6028225806451613, |
|
"grad_norm": 0.07718155533075333, |
|
"learning_rate": 0.00016292783102230888, |
|
"loss": 1.5951, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.6038306451612904, |
|
"grad_norm": 0.09728401899337769, |
|
"learning_rate": 0.00016280150280687834, |
|
"loss": 1.6838, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.6048387096774194, |
|
"grad_norm": 0.08184093236923218, |
|
"learning_rate": 0.00016267500887838412, |
|
"loss": 1.5902, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6058467741935484, |
|
"grad_norm": 0.08744041621685028, |
|
"learning_rate": 0.00016254834957060309, |
|
"loss": 1.6292, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.6068548387096774, |
|
"grad_norm": 0.09200835227966309, |
|
"learning_rate": 0.00016242152521774874, |
|
"loss": 1.6393, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.6078629032258065, |
|
"grad_norm": 0.08810313045978546, |
|
"learning_rate": 0.0001622945361544699, |
|
"loss": 1.6201, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.6088709677419355, |
|
"grad_norm": 0.09700248390436172, |
|
"learning_rate": 0.00016216738271584999, |
|
"loss": 1.5638, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.6098790322580645, |
|
"grad_norm": 0.08686663955450058, |
|
"learning_rate": 0.00016204006523740634, |
|
"loss": 1.5734, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.6108870967741935, |
|
"grad_norm": 0.07873237133026123, |
|
"learning_rate": 0.00016191258405508896, |
|
"loss": 1.5469, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.6118951612903226, |
|
"grad_norm": 0.08019126206636429, |
|
"learning_rate": 0.0001617849395052799, |
|
"loss": 1.6431, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.6129032258064516, |
|
"grad_norm": 0.08971964567899704, |
|
"learning_rate": 0.00016165713192479227, |
|
"loss": 1.6535, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.6139112903225806, |
|
"grad_norm": 0.07752855867147446, |
|
"learning_rate": 0.00016152916165086936, |
|
"loss": 1.5829, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.6149193548387096, |
|
"grad_norm": 0.08348417282104492, |
|
"learning_rate": 0.00016140102902118377, |
|
"loss": 1.6305, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6159274193548387, |
|
"grad_norm": 0.0761261060833931, |
|
"learning_rate": 0.0001612727343738365, |
|
"loss": 1.5835, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.6169354838709677, |
|
"grad_norm": 0.11013983935117722, |
|
"learning_rate": 0.00016114427804735603, |
|
"loss": 1.6364, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.6179435483870968, |
|
"grad_norm": 0.086505226790905, |
|
"learning_rate": 0.00016101566038069756, |
|
"loss": 1.61, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.6189516129032258, |
|
"grad_norm": 0.08692600578069687, |
|
"learning_rate": 0.00016088688171324184, |
|
"loss": 1.6153, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.6199596774193549, |
|
"grad_norm": 0.09537503123283386, |
|
"learning_rate": 0.0001607579423847946, |
|
"loss": 1.6053, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.6209677419354839, |
|
"grad_norm": 0.08204115927219391, |
|
"learning_rate": 0.00016062884273558545, |
|
"loss": 1.5939, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.6219758064516129, |
|
"grad_norm": 0.08595214784145355, |
|
"learning_rate": 0.00016049958310626708, |
|
"loss": 1.6162, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.6229838709677419, |
|
"grad_norm": 0.08318503201007843, |
|
"learning_rate": 0.00016037016383791425, |
|
"loss": 1.6401, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.623991935483871, |
|
"grad_norm": 0.08207780867815018, |
|
"learning_rate": 0.00016024058527202298, |
|
"loss": 1.6226, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 0.08268122375011444, |
|
"learning_rate": 0.00016011084775050959, |
|
"loss": 1.6522, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.626008064516129, |
|
"grad_norm": 0.07751034945249557, |
|
"learning_rate": 0.00015998095161570995, |
|
"loss": 1.5455, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.6270161290322581, |
|
"grad_norm": 0.08539839088916779, |
|
"learning_rate": 0.00015985089721037832, |
|
"loss": 1.6116, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.6280241935483871, |
|
"grad_norm": 0.08065900206565857, |
|
"learning_rate": 0.00015972068487768665, |
|
"loss": 1.6102, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.6290322580645161, |
|
"grad_norm": 0.07968778163194656, |
|
"learning_rate": 0.00015959031496122364, |
|
"loss": 1.6065, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.6300403225806451, |
|
"grad_norm": 0.08040513843297958, |
|
"learning_rate": 0.00015945978780499375, |
|
"loss": 1.5974, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6310483870967742, |
|
"grad_norm": 0.0841718390583992, |
|
"learning_rate": 0.00015932910375341639, |
|
"loss": 1.5943, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.6320564516129032, |
|
"grad_norm": 0.07834211736917496, |
|
"learning_rate": 0.0001591982631513249, |
|
"loss": 1.5856, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.6330645161290323, |
|
"grad_norm": 0.08371677994728088, |
|
"learning_rate": 0.00015906726634396575, |
|
"loss": 1.5972, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.6340725806451613, |
|
"grad_norm": 0.09251397848129272, |
|
"learning_rate": 0.00015893611367699762, |
|
"loss": 1.6529, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.6350806451612904, |
|
"grad_norm": 0.080534428358078, |
|
"learning_rate": 0.00015880480549649038, |
|
"loss": 1.5786, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6360887096774194, |
|
"grad_norm": 0.09134898334741592, |
|
"learning_rate": 0.00015867334214892436, |
|
"loss": 1.6303, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.6370967741935484, |
|
"grad_norm": 0.08673352748155594, |
|
"learning_rate": 0.00015854172398118913, |
|
"loss": 1.6281, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.6381048387096774, |
|
"grad_norm": 0.11661474406719208, |
|
"learning_rate": 0.000158409951340583, |
|
"loss": 1.6826, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.6391129032258065, |
|
"grad_norm": 0.08508265018463135, |
|
"learning_rate": 0.0001582780245748118, |
|
"loss": 1.5785, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.6401209677419355, |
|
"grad_norm": 0.09865213930606842, |
|
"learning_rate": 0.00015814594403198794, |
|
"loss": 1.619, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6411290322580645, |
|
"grad_norm": 0.08882018178701401, |
|
"learning_rate": 0.00015801371006062982, |
|
"loss": 1.6076, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.6421370967741935, |
|
"grad_norm": 0.10395356267690659, |
|
"learning_rate": 0.00015788132300966046, |
|
"loss": 1.6193, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.6431451612903226, |
|
"grad_norm": 0.08556309342384338, |
|
"learning_rate": 0.00015774878322840694, |
|
"loss": 1.6313, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.6441532258064516, |
|
"grad_norm": 0.08463555574417114, |
|
"learning_rate": 0.00015761609106659935, |
|
"loss": 1.5852, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 0.08253596723079681, |
|
"learning_rate": 0.0001574832468743698, |
|
"loss": 1.65, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6461693548387096, |
|
"grad_norm": 0.09345366060733795, |
|
"learning_rate": 0.0001573502510022516, |
|
"loss": 1.5869, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.6471774193548387, |
|
"grad_norm": 0.08240879327058792, |
|
"learning_rate": 0.00015721710380117826, |
|
"loss": 1.6057, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.6481854838709677, |
|
"grad_norm": 0.08767805248498917, |
|
"learning_rate": 0.0001570838056224827, |
|
"loss": 1.5864, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.6491935483870968, |
|
"grad_norm": 0.08595956861972809, |
|
"learning_rate": 0.0001569503568178961, |
|
"loss": 1.593, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.6502016129032258, |
|
"grad_norm": 0.0859324112534523, |
|
"learning_rate": 0.0001568167577395471, |
|
"loss": 1.6248, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6512096774193549, |
|
"grad_norm": 0.07949813455343246, |
|
"learning_rate": 0.00015668300873996095, |
|
"loss": 1.6269, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.6522177419354839, |
|
"grad_norm": 0.08270735293626785, |
|
"learning_rate": 0.00015654911017205846, |
|
"loss": 1.6161, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.6532258064516129, |
|
"grad_norm": 0.08057011663913727, |
|
"learning_rate": 0.000156415062389155, |
|
"loss": 1.615, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.6542338709677419, |
|
"grad_norm": 0.07924232631921768, |
|
"learning_rate": 0.00015628086574495992, |
|
"loss": 1.5898, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.655241935483871, |
|
"grad_norm": 0.08501306176185608, |
|
"learning_rate": 0.00015614652059357508, |
|
"loss": 1.6709, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.65625, |
|
"grad_norm": 0.08682959526777267, |
|
"learning_rate": 0.00015601202728949436, |
|
"loss": 1.6214, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.657258064516129, |
|
"grad_norm": 0.08149803429841995, |
|
"learning_rate": 0.00015587738618760258, |
|
"loss": 1.6337, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.6582661290322581, |
|
"grad_norm": 0.09022454917430878, |
|
"learning_rate": 0.00015574259764317448, |
|
"loss": 1.5809, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.6592741935483871, |
|
"grad_norm": 0.08189895004034042, |
|
"learning_rate": 0.00015560766201187386, |
|
"loss": 1.6188, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.6602822580645161, |
|
"grad_norm": 0.080174021422863, |
|
"learning_rate": 0.00015547257964975273, |
|
"loss": 1.5991, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6612903225806451, |
|
"grad_norm": 0.08346089720726013, |
|
"learning_rate": 0.0001553373509132501, |
|
"loss": 1.5734, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.6622983870967742, |
|
"grad_norm": 0.07657915353775024, |
|
"learning_rate": 0.00015520197615919145, |
|
"loss": 1.5422, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.6633064516129032, |
|
"grad_norm": 0.08029603213071823, |
|
"learning_rate": 0.0001550664557447873, |
|
"loss": 1.5886, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.6643145161290323, |
|
"grad_norm": 0.08529450744390488, |
|
"learning_rate": 0.0001549307900276327, |
|
"loss": 1.629, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.6653225806451613, |
|
"grad_norm": 0.07882041484117508, |
|
"learning_rate": 0.0001547949793657061, |
|
"loss": 1.66, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6663306451612904, |
|
"grad_norm": 0.08514705300331116, |
|
"learning_rate": 0.00015465902411736828, |
|
"loss": 1.6113, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.6673387096774194, |
|
"grad_norm": 0.07738941162824631, |
|
"learning_rate": 0.00015452292464136167, |
|
"loss": 1.5959, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.6683467741935484, |
|
"grad_norm": 0.08031867444515228, |
|
"learning_rate": 0.0001543866812968092, |
|
"loss": 1.601, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.6693548387096774, |
|
"grad_norm": 0.08055873215198517, |
|
"learning_rate": 0.00015425029444321347, |
|
"loss": 1.5731, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.6703629032258065, |
|
"grad_norm": 0.08486857265233994, |
|
"learning_rate": 0.0001541137644404557, |
|
"loss": 1.5703, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.6713709677419355, |
|
"grad_norm": 0.07934212684631348, |
|
"learning_rate": 0.0001539770916487949, |
|
"loss": 1.6163, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.6723790322580645, |
|
"grad_norm": 0.08954691141843796, |
|
"learning_rate": 0.0001538402764288668, |
|
"loss": 1.6139, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.6733870967741935, |
|
"grad_norm": 0.08842763304710388, |
|
"learning_rate": 0.00015370331914168296, |
|
"loss": 1.6322, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.6743951612903226, |
|
"grad_norm": 0.08686459064483643, |
|
"learning_rate": 0.00015356622014862988, |
|
"loss": 1.59, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.6754032258064516, |
|
"grad_norm": 0.07980991154909134, |
|
"learning_rate": 0.00015342897981146785, |
|
"loss": 1.576, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6764112903225806, |
|
"grad_norm": 0.08613515645265579, |
|
"learning_rate": 0.00015329159849233022, |
|
"loss": 1.6328, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.6774193548387096, |
|
"grad_norm": 0.10668696463108063, |
|
"learning_rate": 0.0001531540765537223, |
|
"loss": 1.6482, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.6784274193548387, |
|
"grad_norm": 0.07826445251703262, |
|
"learning_rate": 0.00015301641435852046, |
|
"loss": 1.5984, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.6794354838709677, |
|
"grad_norm": 0.09749854356050491, |
|
"learning_rate": 0.00015287861226997125, |
|
"loss": 1.586, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.6804435483870968, |
|
"grad_norm": 0.09301649779081345, |
|
"learning_rate": 0.00015274067065169017, |
|
"loss": 1.6806, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6814516129032258, |
|
"grad_norm": 0.08719351887702942, |
|
"learning_rate": 0.00015260258986766104, |
|
"loss": 1.5568, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.6824596774193549, |
|
"grad_norm": 0.08005709946155548, |
|
"learning_rate": 0.00015246437028223486, |
|
"loss": 1.6252, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.6834677419354839, |
|
"grad_norm": 0.08304545283317566, |
|
"learning_rate": 0.00015232601226012886, |
|
"loss": 1.6137, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.6844758064516129, |
|
"grad_norm": 0.07949443906545639, |
|
"learning_rate": 0.0001521875161664256, |
|
"loss": 1.5808, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.6854838709677419, |
|
"grad_norm": 0.08979618549346924, |
|
"learning_rate": 0.00015204888236657188, |
|
"loss": 1.6164, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.686491935483871, |
|
"grad_norm": 0.07843173295259476, |
|
"learning_rate": 0.00015191011122637796, |
|
"loss": 1.6246, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 0.09026903659105301, |
|
"learning_rate": 0.00015177120311201647, |
|
"loss": 1.6352, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.688508064516129, |
|
"grad_norm": 0.09385894238948822, |
|
"learning_rate": 0.00015163215839002146, |
|
"loss": 1.622, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.6895161290322581, |
|
"grad_norm": 0.07961908727884293, |
|
"learning_rate": 0.0001514929774272874, |
|
"loss": 1.5745, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.6905241935483871, |
|
"grad_norm": 0.08670490235090256, |
|
"learning_rate": 0.00015135366059106832, |
|
"loss": 1.5945, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.6915322580645161, |
|
"grad_norm": 0.08476680517196655, |
|
"learning_rate": 0.00015121420824897678, |
|
"loss": 1.6316, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.6925403225806451, |
|
"grad_norm": 0.0937148854136467, |
|
"learning_rate": 0.00015107462076898289, |
|
"loss": 1.6054, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.6935483870967742, |
|
"grad_norm": 0.08981835842132568, |
|
"learning_rate": 0.00015093489851941328, |
|
"loss": 1.6683, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.6945564516129032, |
|
"grad_norm": 0.08677362650632858, |
|
"learning_rate": 0.0001507950418689503, |
|
"loss": 1.6306, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.6955645161290323, |
|
"grad_norm": 0.07769922912120819, |
|
"learning_rate": 0.00015065505118663078, |
|
"loss": 1.6164, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6965725806451613, |
|
"grad_norm": 0.08614321053028107, |
|
"learning_rate": 0.00015051492684184546, |
|
"loss": 1.5615, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.6975806451612904, |
|
"grad_norm": 0.09230528026819229, |
|
"learning_rate": 0.00015037466920433753, |
|
"loss": 1.6901, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.6985887096774194, |
|
"grad_norm": 0.09350752830505371, |
|
"learning_rate": 0.00015023427864420202, |
|
"loss": 1.6465, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.6995967741935484, |
|
"grad_norm": 0.09468571841716766, |
|
"learning_rate": 0.00015009375553188468, |
|
"loss": 1.6485, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.7006048387096774, |
|
"grad_norm": 0.08464954793453217, |
|
"learning_rate": 0.00014995310023818107, |
|
"loss": 1.5865, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.7016129032258065, |
|
"grad_norm": 0.09060323238372803, |
|
"learning_rate": 0.00014981231313423545, |
|
"loss": 1.6074, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.7026209677419355, |
|
"grad_norm": 0.08714771270751953, |
|
"learning_rate": 0.00014967139459153993, |
|
"loss": 1.5824, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.7036290322580645, |
|
"grad_norm": 0.0776834785938263, |
|
"learning_rate": 0.00014953034498193341, |
|
"loss": 1.5689, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.7046370967741935, |
|
"grad_norm": 0.08315813541412354, |
|
"learning_rate": 0.0001493891646776007, |
|
"loss": 1.6187, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.7056451612903226, |
|
"grad_norm": 0.07914920896291733, |
|
"learning_rate": 0.00014924785405107143, |
|
"loss": 1.5417, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7066532258064516, |
|
"grad_norm": 0.08314627408981323, |
|
"learning_rate": 0.00014910641347521907, |
|
"loss": 1.6298, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.7076612903225806, |
|
"grad_norm": 0.07665257155895233, |
|
"learning_rate": 0.0001489648433232601, |
|
"loss": 1.5464, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.7086693548387096, |
|
"grad_norm": 0.09670589119195938, |
|
"learning_rate": 0.00014882314396875274, |
|
"loss": 1.654, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.7096774193548387, |
|
"grad_norm": 0.08459917455911636, |
|
"learning_rate": 0.00014868131578559633, |
|
"loss": 1.6326, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.7106854838709677, |
|
"grad_norm": 0.08236029744148254, |
|
"learning_rate": 0.00014853935914802994, |
|
"loss": 1.59, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.7116935483870968, |
|
"grad_norm": 0.07780009508132935, |
|
"learning_rate": 0.0001483972744306318, |
|
"loss": 1.5801, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.7127016129032258, |
|
"grad_norm": 0.0835953950881958, |
|
"learning_rate": 0.00014825506200831794, |
|
"loss": 1.5765, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.7137096774193549, |
|
"grad_norm": 0.08014727383852005, |
|
"learning_rate": 0.00014811272225634145, |
|
"loss": 1.6156, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.7147177419354839, |
|
"grad_norm": 0.08108653128147125, |
|
"learning_rate": 0.00014797025555029133, |
|
"loss": 1.5825, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.7157258064516129, |
|
"grad_norm": 0.08455085754394531, |
|
"learning_rate": 0.00014782766226609166, |
|
"loss": 1.6218, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7167338709677419, |
|
"grad_norm": 0.07630985975265503, |
|
"learning_rate": 0.00014768494278000048, |
|
"loss": 1.5889, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.717741935483871, |
|
"grad_norm": 0.08318428695201874, |
|
"learning_rate": 0.00014754209746860878, |
|
"loss": 1.5827, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.71875, |
|
"grad_norm": 0.08248715102672577, |
|
"learning_rate": 0.00014739912670883967, |
|
"loss": 1.621, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.719758064516129, |
|
"grad_norm": 0.07857991755008698, |
|
"learning_rate": 0.00014725603087794716, |
|
"loss": 1.5605, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.7207661290322581, |
|
"grad_norm": 0.08540824055671692, |
|
"learning_rate": 0.0001471128103535154, |
|
"loss": 1.5471, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.7217741935483871, |
|
"grad_norm": 0.0777583196759224, |
|
"learning_rate": 0.00014696946551345747, |
|
"loss": 1.5029, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.7227822580645161, |
|
"grad_norm": 0.08295831829309464, |
|
"learning_rate": 0.00014682599673601458, |
|
"loss": 1.5709, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.7237903225806451, |
|
"grad_norm": 0.08069245517253876, |
|
"learning_rate": 0.00014668240439975482, |
|
"loss": 1.5601, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.7247983870967742, |
|
"grad_norm": 0.08142071962356567, |
|
"learning_rate": 0.00014653868888357249, |
|
"loss": 1.6004, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.7258064516129032, |
|
"grad_norm": 0.09048129618167877, |
|
"learning_rate": 0.0001463948505666868, |
|
"loss": 1.6614, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7268145161290323, |
|
"grad_norm": 0.09065764397382736, |
|
"learning_rate": 0.00014625088982864098, |
|
"loss": 1.6612, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.7278225806451613, |
|
"grad_norm": 0.0859372541308403, |
|
"learning_rate": 0.00014610680704930142, |
|
"loss": 1.5914, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.7288306451612904, |
|
"grad_norm": 0.0821571797132492, |
|
"learning_rate": 0.0001459626026088564, |
|
"loss": 1.5458, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.7298387096774194, |
|
"grad_norm": 0.08414388447999954, |
|
"learning_rate": 0.0001458182768878153, |
|
"loss": 1.5608, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.7308467741935484, |
|
"grad_norm": 0.08222994953393936, |
|
"learning_rate": 0.00014567383026700752, |
|
"loss": 1.5943, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.7318548387096774, |
|
"grad_norm": 0.08996201306581497, |
|
"learning_rate": 0.0001455292631275814, |
|
"loss": 1.5524, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.7328629032258065, |
|
"grad_norm": 0.08061891794204712, |
|
"learning_rate": 0.0001453845758510034, |
|
"loss": 1.6428, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.7338709677419355, |
|
"grad_norm": 0.09720771759748459, |
|
"learning_rate": 0.0001452397688190569, |
|
"loss": 1.6538, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.7348790322580645, |
|
"grad_norm": 0.08087541162967682, |
|
"learning_rate": 0.00014509484241384134, |
|
"loss": 1.6078, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.7358870967741935, |
|
"grad_norm": 0.09106358885765076, |
|
"learning_rate": 0.00014494979701777102, |
|
"loss": 1.589, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7368951612903226, |
|
"grad_norm": 0.07827623188495636, |
|
"learning_rate": 0.00014480463301357445, |
|
"loss": 1.5937, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.7379032258064516, |
|
"grad_norm": 0.09681122750043869, |
|
"learning_rate": 0.00014465935078429286, |
|
"loss": 1.6308, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.7389112903225806, |
|
"grad_norm": 0.0876043364405632, |
|
"learning_rate": 0.00014451395071327964, |
|
"loss": 1.6136, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.7399193548387096, |
|
"grad_norm": 0.10326588153839111, |
|
"learning_rate": 0.00014436843318419896, |
|
"loss": 1.5964, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.7409274193548387, |
|
"grad_norm": 0.08790312707424164, |
|
"learning_rate": 0.00014422279858102504, |
|
"loss": 1.5992, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.7419354838709677, |
|
"grad_norm": 0.0805894061923027, |
|
"learning_rate": 0.00014407704728804097, |
|
"loss": 1.5503, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.7429435483870968, |
|
"grad_norm": 0.0813809409737587, |
|
"learning_rate": 0.00014393117968983777, |
|
"loss": 1.5807, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.7439516129032258, |
|
"grad_norm": 0.0871429443359375, |
|
"learning_rate": 0.0001437851961713133, |
|
"loss": 1.6493, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.7449596774193549, |
|
"grad_norm": 0.08929460495710373, |
|
"learning_rate": 0.0001436390971176714, |
|
"loss": 1.58, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.7459677419354839, |
|
"grad_norm": 0.08278234302997589, |
|
"learning_rate": 0.0001434928829144206, |
|
"loss": 1.6442, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7469758064516129, |
|
"grad_norm": 0.09997319430112839, |
|
"learning_rate": 0.00014334655394737355, |
|
"loss": 1.5756, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.7479838709677419, |
|
"grad_norm": 0.07914005219936371, |
|
"learning_rate": 0.0001432001106026454, |
|
"loss": 1.5642, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.748991935483871, |
|
"grad_norm": 0.09618489444255829, |
|
"learning_rate": 0.00014305355326665339, |
|
"loss": 1.6108, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.09149473160505295, |
|
"learning_rate": 0.00014290688232611526, |
|
"loss": 1.6007, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.751008064516129, |
|
"grad_norm": 0.08550098538398743, |
|
"learning_rate": 0.00014276009816804885, |
|
"loss": 1.588, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.7520161290322581, |
|
"grad_norm": 0.08285672217607498, |
|
"learning_rate": 0.00014261320117977042, |
|
"loss": 1.5845, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.7530241935483871, |
|
"grad_norm": 0.09440962970256805, |
|
"learning_rate": 0.00014246619174889422, |
|
"loss": 1.7127, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.7540322580645161, |
|
"grad_norm": 0.08045286685228348, |
|
"learning_rate": 0.00014231907026333098, |
|
"loss": 1.6066, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.7550403225806451, |
|
"grad_norm": 0.08301718533039093, |
|
"learning_rate": 0.0001421718371112873, |
|
"loss": 1.5732, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.7560483870967742, |
|
"grad_norm": 0.08225584775209427, |
|
"learning_rate": 0.00014202449268126426, |
|
"loss": 1.563, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7570564516129032, |
|
"grad_norm": 0.08871738612651825, |
|
"learning_rate": 0.00014187703736205667, |
|
"loss": 1.6364, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.7580645161290323, |
|
"grad_norm": 0.08189701288938522, |
|
"learning_rate": 0.00014172947154275195, |
|
"loss": 1.5972, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.7590725806451613, |
|
"grad_norm": 0.08560924977064133, |
|
"learning_rate": 0.00014158179561272907, |
|
"loss": 1.5971, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.7600806451612904, |
|
"grad_norm": 0.08616410940885544, |
|
"learning_rate": 0.00014143400996165746, |
|
"loss": 1.6331, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.7610887096774194, |
|
"grad_norm": 0.08963197469711304, |
|
"learning_rate": 0.00014128611497949626, |
|
"loss": 1.5887, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.7620967741935484, |
|
"grad_norm": 0.09272851049900055, |
|
"learning_rate": 0.0001411381110564929, |
|
"loss": 1.5692, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.7631048387096774, |
|
"grad_norm": 0.08667407929897308, |
|
"learning_rate": 0.0001409899985831824, |
|
"loss": 1.5852, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.7641129032258065, |
|
"grad_norm": 0.08354497700929642, |
|
"learning_rate": 0.00014084177795038613, |
|
"loss": 1.6024, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.7651209677419355, |
|
"grad_norm": 0.09121601283550262, |
|
"learning_rate": 0.00014069344954921096, |
|
"loss": 1.5896, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.7661290322580645, |
|
"grad_norm": 0.09622003138065338, |
|
"learning_rate": 0.00014054501377104797, |
|
"loss": 1.5781, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7671370967741935, |
|
"grad_norm": 0.08506747335195541, |
|
"learning_rate": 0.00014039647100757177, |
|
"loss": 1.5752, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.7681451612903226, |
|
"grad_norm": 0.09725549817085266, |
|
"learning_rate": 0.00014024782165073912, |
|
"loss": 1.599, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.7691532258064516, |
|
"grad_norm": 0.08023160696029663, |
|
"learning_rate": 0.00014009906609278806, |
|
"loss": 1.5503, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.7701612903225806, |
|
"grad_norm": 0.092674620449543, |
|
"learning_rate": 0.00013995020472623693, |
|
"loss": 1.6196, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.7711693548387096, |
|
"grad_norm": 0.07756571471691132, |
|
"learning_rate": 0.0001398012379438832, |
|
"loss": 1.599, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.7721774193548387, |
|
"grad_norm": 0.09609861671924591, |
|
"learning_rate": 0.00013965216613880257, |
|
"loss": 1.6356, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.7731854838709677, |
|
"grad_norm": 0.08073242753744125, |
|
"learning_rate": 0.00013950298970434775, |
|
"loss": 1.5975, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.7741935483870968, |
|
"grad_norm": 0.08342421054840088, |
|
"learning_rate": 0.00013935370903414768, |
|
"loss": 1.594, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.7752016129032258, |
|
"grad_norm": 0.07886181771755219, |
|
"learning_rate": 0.00013920432452210619, |
|
"loss": 1.5947, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.7762096774193549, |
|
"grad_norm": 0.08256496489048004, |
|
"learning_rate": 0.00013905483656240125, |
|
"loss": 1.5772, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7772177419354839, |
|
"grad_norm": 0.08527923375368118, |
|
"learning_rate": 0.0001389052455494837, |
|
"loss": 1.5936, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.7782258064516129, |
|
"grad_norm": 0.08340179920196533, |
|
"learning_rate": 0.00013875555187807637, |
|
"loss": 1.5786, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.7792338709677419, |
|
"grad_norm": 0.07682585716247559, |
|
"learning_rate": 0.00013860575594317292, |
|
"loss": 1.542, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.780241935483871, |
|
"grad_norm": 0.08884165436029434, |
|
"learning_rate": 0.00013845585814003684, |
|
"loss": 1.5969, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 0.07785353809595108, |
|
"learning_rate": 0.00013830585886420054, |
|
"loss": 1.5671, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.782258064516129, |
|
"grad_norm": 0.08034134656190872, |
|
"learning_rate": 0.000138155758511464, |
|
"loss": 1.5774, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.7832661290322581, |
|
"grad_norm": 0.0796407014131546, |
|
"learning_rate": 0.0001380055574778941, |
|
"loss": 1.5606, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.7842741935483871, |
|
"grad_norm": 0.07933478057384491, |
|
"learning_rate": 0.00013785525615982319, |
|
"loss": 1.5651, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.7852822580645161, |
|
"grad_norm": 0.08734553307294846, |
|
"learning_rate": 0.00013770485495384843, |
|
"loss": 1.6262, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.7862903225806451, |
|
"grad_norm": 0.08349025249481201, |
|
"learning_rate": 0.0001375543542568304, |
|
"loss": 1.5835, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7872983870967742, |
|
"grad_norm": 0.09640732407569885, |
|
"learning_rate": 0.00013740375446589232, |
|
"loss": 1.586, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.7883064516129032, |
|
"grad_norm": 0.09520639479160309, |
|
"learning_rate": 0.00013725305597841878, |
|
"loss": 1.6521, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.7893145161290323, |
|
"grad_norm": 0.07939834147691727, |
|
"learning_rate": 0.00013710225919205484, |
|
"loss": 1.5062, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.7903225806451613, |
|
"grad_norm": 0.08648645132780075, |
|
"learning_rate": 0.000136951364504705, |
|
"loss": 1.6303, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.7913306451612904, |
|
"grad_norm": 0.09467138350009918, |
|
"learning_rate": 0.00013680037231453203, |
|
"loss": 1.6333, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.7923387096774194, |
|
"grad_norm": 0.08505504578351974, |
|
"learning_rate": 0.000136649283019956, |
|
"loss": 1.5953, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.7933467741935484, |
|
"grad_norm": 0.0903257429599762, |
|
"learning_rate": 0.00013649809701965311, |
|
"loss": 1.5841, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.7943548387096774, |
|
"grad_norm": 0.08327475190162659, |
|
"learning_rate": 0.00013634681471255493, |
|
"loss": 1.578, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.7953629032258065, |
|
"grad_norm": 0.09311467409133911, |
|
"learning_rate": 0.000136195436497847, |
|
"loss": 1.5911, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.7963709677419355, |
|
"grad_norm": 0.09214780479669571, |
|
"learning_rate": 0.00013604396277496796, |
|
"loss": 1.6009, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7973790322580645, |
|
"grad_norm": 0.08812731504440308, |
|
"learning_rate": 0.00013589239394360848, |
|
"loss": 1.6141, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.7983870967741935, |
|
"grad_norm": 0.11389174312353134, |
|
"learning_rate": 0.00013574073040371022, |
|
"loss": 1.6369, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.7993951612903226, |
|
"grad_norm": 0.08469700813293457, |
|
"learning_rate": 0.00013558897255546473, |
|
"loss": 1.6009, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.8004032258064516, |
|
"grad_norm": 0.08306135982275009, |
|
"learning_rate": 0.0001354371207993123, |
|
"loss": 1.5556, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.8014112903225806, |
|
"grad_norm": 0.08287226408720016, |
|
"learning_rate": 0.00013528517553594124, |
|
"loss": 1.571, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.8024193548387096, |
|
"grad_norm": 0.0797332376241684, |
|
"learning_rate": 0.00013513313716628637, |
|
"loss": 1.5679, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.8034274193548387, |
|
"grad_norm": 0.07978206872940063, |
|
"learning_rate": 0.0001349810060915283, |
|
"loss": 1.5865, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.8044354838709677, |
|
"grad_norm": 0.07792511582374573, |
|
"learning_rate": 0.00013482878271309226, |
|
"loss": 1.5849, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.8054435483870968, |
|
"grad_norm": 0.07994278520345688, |
|
"learning_rate": 0.000134676467432647, |
|
"loss": 1.6026, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.8064516129032258, |
|
"grad_norm": 0.08317188918590546, |
|
"learning_rate": 0.00013452406065210382, |
|
"loss": 1.6333, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8074596774193549, |
|
"grad_norm": 0.09058106690645218, |
|
"learning_rate": 0.00013437156277361538, |
|
"loss": 1.5936, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.8084677419354839, |
|
"grad_norm": 0.08963512629270554, |
|
"learning_rate": 0.00013421897419957482, |
|
"loss": 1.6422, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.8094758064516129, |
|
"grad_norm": 0.09142173826694489, |
|
"learning_rate": 0.0001340662953326145, |
|
"loss": 1.6779, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.8104838709677419, |
|
"grad_norm": 0.08868789672851562, |
|
"learning_rate": 0.00013391352657560513, |
|
"loss": 1.6594, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.811491935483871, |
|
"grad_norm": 0.08746343106031418, |
|
"learning_rate": 0.0001337606683316545, |
|
"loss": 1.5312, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 0.07589108496904373, |
|
"learning_rate": 0.00013360772100410665, |
|
"loss": 1.5462, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.813508064516129, |
|
"grad_norm": 0.0817432850599289, |
|
"learning_rate": 0.00013345468499654056, |
|
"loss": 1.5393, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.8145161290322581, |
|
"grad_norm": 0.07965264469385147, |
|
"learning_rate": 0.00013330156071276932, |
|
"loss": 1.5687, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.8155241935483871, |
|
"grad_norm": 0.08861200511455536, |
|
"learning_rate": 0.00013314834855683886, |
|
"loss": 1.6412, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.8165322580645161, |
|
"grad_norm": 0.07894746214151382, |
|
"learning_rate": 0.00013299504893302705, |
|
"loss": 1.5738, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8175403225806451, |
|
"grad_norm": 0.07987947016954422, |
|
"learning_rate": 0.00013284166224584253, |
|
"loss": 1.6212, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.8185483870967742, |
|
"grad_norm": 0.09027516096830368, |
|
"learning_rate": 0.0001326881889000236, |
|
"loss": 1.6113, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.8195564516129032, |
|
"grad_norm": 0.11448541283607483, |
|
"learning_rate": 0.00013253462930053742, |
|
"loss": 1.6315, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.8205645161290323, |
|
"grad_norm": 0.08771926164627075, |
|
"learning_rate": 0.00013238098385257848, |
|
"loss": 1.5919, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.8215725806451613, |
|
"grad_norm": 0.09016083925962448, |
|
"learning_rate": 0.00013222725296156807, |
|
"loss": 1.5629, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.8225806451612904, |
|
"grad_norm": 0.08411089330911636, |
|
"learning_rate": 0.0001320734370331527, |
|
"loss": 1.6037, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.8235887096774194, |
|
"grad_norm": 0.09559720754623413, |
|
"learning_rate": 0.0001319195364732034, |
|
"loss": 1.5463, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.8245967741935484, |
|
"grad_norm": 0.10408146679401398, |
|
"learning_rate": 0.00013176555168781451, |
|
"loss": 1.5768, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.8256048387096774, |
|
"grad_norm": 0.09700962156057358, |
|
"learning_rate": 0.00013161148308330257, |
|
"loss": 1.5739, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.8266129032258065, |
|
"grad_norm": 0.10024348646402359, |
|
"learning_rate": 0.00013145733106620532, |
|
"loss": 1.6281, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8276209677419355, |
|
"grad_norm": 0.09777159988880157, |
|
"learning_rate": 0.00013130309604328057, |
|
"loss": 1.6059, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.8286290322580645, |
|
"grad_norm": 0.0887807309627533, |
|
"learning_rate": 0.00013114877842150516, |
|
"loss": 1.5857, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.8296370967741935, |
|
"grad_norm": 0.09031641483306885, |
|
"learning_rate": 0.000130994378608074, |
|
"loss": 1.5523, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.8306451612903226, |
|
"grad_norm": 0.0985943153500557, |
|
"learning_rate": 0.00013083989701039868, |
|
"loss": 1.5464, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.8316532258064516, |
|
"grad_norm": 0.09250693768262863, |
|
"learning_rate": 0.0001306853340361067, |
|
"loss": 1.5564, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.8326612903225806, |
|
"grad_norm": 0.10353913903236389, |
|
"learning_rate": 0.0001305306900930403, |
|
"loss": 1.6126, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.8336693548387096, |
|
"grad_norm": 0.10408423840999603, |
|
"learning_rate": 0.00013037596558925532, |
|
"loss": 1.5946, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.8346774193548387, |
|
"grad_norm": 0.09186139702796936, |
|
"learning_rate": 0.00013022116093302022, |
|
"loss": 1.5692, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.8356854838709677, |
|
"grad_norm": 0.08551473915576935, |
|
"learning_rate": 0.00013006627653281493, |
|
"loss": 1.5486, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.8366935483870968, |
|
"grad_norm": 0.0928485244512558, |
|
"learning_rate": 0.0001299113127973298, |
|
"loss": 1.5435, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8377016129032258, |
|
"grad_norm": 0.08251947164535522, |
|
"learning_rate": 0.00012975627013546453, |
|
"loss": 1.5519, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.8387096774193549, |
|
"grad_norm": 0.09292181581258774, |
|
"learning_rate": 0.0001296011489563271, |
|
"loss": 1.6129, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.8397177419354839, |
|
"grad_norm": 0.07900629937648773, |
|
"learning_rate": 0.00012944594966923263, |
|
"loss": 1.5951, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.8407258064516129, |
|
"grad_norm": 0.08966945856809616, |
|
"learning_rate": 0.00012929067268370234, |
|
"loss": 1.5484, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.8417338709677419, |
|
"grad_norm": 0.08244184404611588, |
|
"learning_rate": 0.00012913531840946248, |
|
"loss": 1.5852, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.842741935483871, |
|
"grad_norm": 0.0986471101641655, |
|
"learning_rate": 0.00012897988725644335, |
|
"loss": 1.5797, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.84375, |
|
"grad_norm": 0.09217972308397293, |
|
"learning_rate": 0.0001288243796347779, |
|
"loss": 1.6433, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.844758064516129, |
|
"grad_norm": 0.07959865033626556, |
|
"learning_rate": 0.00012866879595480098, |
|
"loss": 1.5639, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.8457661290322581, |
|
"grad_norm": 0.08987965434789658, |
|
"learning_rate": 0.0001285131366270482, |
|
"loss": 1.567, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.8467741935483871, |
|
"grad_norm": 0.08139210939407349, |
|
"learning_rate": 0.00012835740206225464, |
|
"loss": 1.5881, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8477822580645161, |
|
"grad_norm": 0.09342298656702042, |
|
"learning_rate": 0.00012820159267135396, |
|
"loss": 1.6147, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.8487903225806451, |
|
"grad_norm": 0.08475241810083389, |
|
"learning_rate": 0.0001280457088654773, |
|
"loss": 1.6063, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.8497983870967742, |
|
"grad_norm": 0.0910174772143364, |
|
"learning_rate": 0.00012788975105595214, |
|
"loss": 1.6055, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.8508064516129032, |
|
"grad_norm": 0.08082278817892075, |
|
"learning_rate": 0.00012773371965430115, |
|
"loss": 1.5668, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.8518145161290323, |
|
"grad_norm": 0.0862516313791275, |
|
"learning_rate": 0.00012757761507224132, |
|
"loss": 1.5415, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.8528225806451613, |
|
"grad_norm": 0.07902859151363373, |
|
"learning_rate": 0.00012742143772168264, |
|
"loss": 1.5333, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.8538306451612904, |
|
"grad_norm": 0.090780109167099, |
|
"learning_rate": 0.00012726518801472718, |
|
"loss": 1.6311, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.8548387096774194, |
|
"grad_norm": 0.08239061385393143, |
|
"learning_rate": 0.0001271088663636679, |
|
"loss": 1.5331, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.8558467741935484, |
|
"grad_norm": 0.08999927341938019, |
|
"learning_rate": 0.0001269524731809875, |
|
"loss": 1.5775, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.8568548387096774, |
|
"grad_norm": 0.07954005897045135, |
|
"learning_rate": 0.00012679600887935768, |
|
"loss": 1.5969, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8578629032258065, |
|
"grad_norm": 0.08286864310503006, |
|
"learning_rate": 0.00012663947387163755, |
|
"loss": 1.551, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.8588709677419355, |
|
"grad_norm": 0.08236175030469894, |
|
"learning_rate": 0.00012648286857087294, |
|
"loss": 1.5575, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.8598790322580645, |
|
"grad_norm": 0.08063997328281403, |
|
"learning_rate": 0.00012632619339029508, |
|
"loss": 1.5899, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.8608870967741935, |
|
"grad_norm": 0.08329153805971146, |
|
"learning_rate": 0.00012616944874331963, |
|
"loss": 1.5523, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.8618951612903226, |
|
"grad_norm": 0.08181768655776978, |
|
"learning_rate": 0.00012601263504354555, |
|
"loss": 1.5743, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.8629032258064516, |
|
"grad_norm": 0.07989370822906494, |
|
"learning_rate": 0.00012585575270475402, |
|
"loss": 1.5629, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.8639112903225806, |
|
"grad_norm": 0.0804544985294342, |
|
"learning_rate": 0.00012569880214090726, |
|
"loss": 1.5573, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.8649193548387096, |
|
"grad_norm": 0.08739953488111496, |
|
"learning_rate": 0.0001255417837661476, |
|
"loss": 1.5705, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.8659274193548387, |
|
"grad_norm": 0.08386445045471191, |
|
"learning_rate": 0.00012538469799479627, |
|
"loss": 1.6106, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.8669354838709677, |
|
"grad_norm": 0.10252925008535385, |
|
"learning_rate": 0.00012522754524135228, |
|
"loss": 1.5472, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8679435483870968, |
|
"grad_norm": 0.08197301626205444, |
|
"learning_rate": 0.0001250703259204916, |
|
"loss": 1.5955, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.8689516129032258, |
|
"grad_norm": 0.09445837140083313, |
|
"learning_rate": 0.00012491304044706553, |
|
"loss": 1.5536, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.8699596774193549, |
|
"grad_norm": 0.0779092088341713, |
|
"learning_rate": 0.00012475568923610015, |
|
"loss": 1.5235, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.8709677419354839, |
|
"grad_norm": 0.08657954633235931, |
|
"learning_rate": 0.00012459827270279499, |
|
"loss": 1.5306, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.8719758064516129, |
|
"grad_norm": 0.08000969886779785, |
|
"learning_rate": 0.0001244407912625218, |
|
"loss": 1.5451, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.8729838709677419, |
|
"grad_norm": 0.1217707023024559, |
|
"learning_rate": 0.00012428324533082376, |
|
"loss": 1.5896, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.873991935483871, |
|
"grad_norm": 0.09770061075687408, |
|
"learning_rate": 0.00012412563532341413, |
|
"loss": 1.5649, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 0.08925329893827438, |
|
"learning_rate": 0.0001239679616561753, |
|
"loss": 1.59, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.876008064516129, |
|
"grad_norm": 0.0919514149427414, |
|
"learning_rate": 0.0001238102247451575, |
|
"loss": 1.6517, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.8770161290322581, |
|
"grad_norm": 0.0922718271613121, |
|
"learning_rate": 0.0001236524250065781, |
|
"loss": 1.6104, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8780241935483871, |
|
"grad_norm": 0.08782748132944107, |
|
"learning_rate": 0.00012349456285682002, |
|
"loss": 1.6027, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.8790322580645161, |
|
"grad_norm": 0.08689384907484055, |
|
"learning_rate": 0.00012333663871243094, |
|
"loss": 1.5969, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.8800403225806451, |
|
"grad_norm": 0.08294008672237396, |
|
"learning_rate": 0.00012317865299012212, |
|
"loss": 1.5852, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.8810483870967742, |
|
"grad_norm": 0.1106681302189827, |
|
"learning_rate": 0.00012302060610676737, |
|
"loss": 1.622, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.8820564516129032, |
|
"grad_norm": 0.10415118932723999, |
|
"learning_rate": 0.00012286249847940178, |
|
"loss": 1.6416, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.8830645161290323, |
|
"grad_norm": 0.08293262124061584, |
|
"learning_rate": 0.00012270433052522073, |
|
"loss": 1.5963, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.8840725806451613, |
|
"grad_norm": 0.09230700880289078, |
|
"learning_rate": 0.0001225461026615789, |
|
"loss": 1.6242, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.8850806451612904, |
|
"grad_norm": 0.08799263834953308, |
|
"learning_rate": 0.00012238781530598896, |
|
"loss": 1.5607, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.8860887096774194, |
|
"grad_norm": 0.08640427887439728, |
|
"learning_rate": 0.00012222946887612056, |
|
"loss": 1.6114, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.8870967741935484, |
|
"grad_norm": 0.08553026616573334, |
|
"learning_rate": 0.0001220710637897992, |
|
"loss": 1.5549, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.8881048387096774, |
|
"grad_norm": 0.0878986194729805, |
|
"learning_rate": 0.00012191260046500525, |
|
"loss": 1.5697, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.8891129032258065, |
|
"grad_norm": 0.08509572595357895, |
|
"learning_rate": 0.00012175407931987273, |
|
"loss": 1.6237, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.8901209677419355, |
|
"grad_norm": 0.09629905223846436, |
|
"learning_rate": 0.0001215955007726881, |
|
"loss": 1.5869, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.8911290322580645, |
|
"grad_norm": 0.07942201942205429, |
|
"learning_rate": 0.00012143686524188954, |
|
"loss": 1.5933, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.8921370967741935, |
|
"grad_norm": 0.0878920629620552, |
|
"learning_rate": 0.00012127817314606526, |
|
"loss": 1.5485, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.8931451612903226, |
|
"grad_norm": 0.07961869984865189, |
|
"learning_rate": 0.00012111942490395305, |
|
"loss": 1.571, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.8941532258064516, |
|
"grad_norm": 0.08690143376588821, |
|
"learning_rate": 0.00012096062093443863, |
|
"loss": 1.5437, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.8951612903225806, |
|
"grad_norm": 0.08331328630447388, |
|
"learning_rate": 0.00012080176165655488, |
|
"loss": 1.5967, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.8961693548387096, |
|
"grad_norm": 0.08849766850471497, |
|
"learning_rate": 0.00012064284748948053, |
|
"loss": 1.6156, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.8971774193548387, |
|
"grad_norm": 0.08413555473089218, |
|
"learning_rate": 0.00012048387885253925, |
|
"loss": 1.5603, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8981854838709677, |
|
"grad_norm": 0.08616600930690765, |
|
"learning_rate": 0.0001203248561651984, |
|
"loss": 1.5682, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.8991935483870968, |
|
"grad_norm": 0.08520584553480148, |
|
"learning_rate": 0.00012016577984706792, |
|
"loss": 1.6327, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.9002016129032258, |
|
"grad_norm": 0.08620157837867737, |
|
"learning_rate": 0.0001200066503178993, |
|
"loss": 1.6143, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.9012096774193549, |
|
"grad_norm": 0.07895144820213318, |
|
"learning_rate": 0.00011984746799758442, |
|
"loss": 1.5533, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.9022177419354839, |
|
"grad_norm": 0.08743470162153244, |
|
"learning_rate": 0.0001196882333061545, |
|
"loss": 1.6004, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.9032258064516129, |
|
"grad_norm": 0.08172673732042313, |
|
"learning_rate": 0.0001195289466637789, |
|
"loss": 1.6032, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.9042338709677419, |
|
"grad_norm": 0.09668843448162079, |
|
"learning_rate": 0.00011936960849076411, |
|
"loss": 1.6198, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.905241935483871, |
|
"grad_norm": 0.08503922075033188, |
|
"learning_rate": 0.00011921021920755253, |
|
"loss": 1.5638, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.90625, |
|
"grad_norm": 0.0889093279838562, |
|
"learning_rate": 0.00011905077923472146, |
|
"loss": 1.624, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.907258064516129, |
|
"grad_norm": 0.08409906178712845, |
|
"learning_rate": 0.00011889128899298198, |
|
"loss": 1.5562, |
|
"step": 900 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1984, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.046666935768187e+19, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|