global_step,perplexity,eval_loss,train_loss 840,tensor(5513.1270),8.614887404780816,8.615015983581543 1680,tensor(2866.9822),7.961015423327261,7.8731489181518555 2520,tensor(2383.4500),7.776304355729812,7.76918888092041 3360,tensor(2167.2241),7.681202540465441,7.873549938201904 4200,tensor(2011.2488),7.606511034671729,7.7114129066467285 5040,tensor(1925.7876),7.563090380899149,7.557165145874023 5880,tensor(1873.8759),7.535764445625775,7.542567253112793 6720,tensor(1828.3774),7.511184253963814,7.672656536102295 7560,tensor(1783.2251),7.4861786467204166,7.458953857421875 8400,tensor(1500.5310),7.313574164964576,7.289700508117676 9240,tensor(1025.8199),6.933247593341846,7.138363361358643 10080,tensor(850.7896),6.74616496370867,6.698551177978516 10920,tensor(718.7275),6.577482442720242,6.610429763793945 11760,tensor(591.0831),6.381956342272284,6.376274585723877 12600,tensor(477.7185),6.16902169458109,6.171719551086426 13440,tensor(357.5952),5.879401738044775,6.041259765625 14280,tensor(300.5546),5.705629161183868,5.919500827789307 15120,tensor(259.2738),5.557884844558499,5.610713958740234 15960,tensor(236.5213),5.46603816380433,5.657792091369629 16800,tensor(216.0584),5.375549020360431,5.34259033203125 17640,tensor(199.4774),5.295700963639535,5.249312400817871 18480,tensor(186.8399),5.230252159715263,5.466444492340088 19320,tensor(174.4325),5.161537850637571,5.218142032623291 20000,tensor(165.4119),5.108438900861695,5.233940601348877 20160,tensor(164.5391),5.103147904454814,5.230566024780273 21000,tensor(154.7863),5.042045109644885,5.188614845275879 21840,tensor(146.6164),4.9878196422522665,5.2135701179504395 22680,tensor(140.0771),4.942193148825406,4.814875602722168 23520,tensor(134.7099),4.903123423951497,4.779435157775879 24360,tensor(127.4303),4.8475694950158,4.873952388763428 25200,tensor(122.9376),4.811676481888758,4.701533317565918 26040,tensor(118.4770),4.77471930381811,4.734566688537598 26880,tensor(113.0559),4.727882242880726,4.733781814575195 27720,tensor(108.0131),4.682252737018169,4.784864902496338 28560,tensor(105.1070),4.654978862870926,4.570435523986816 29400,tensor(102.5748),4.6305917653992275,4.797003746032715 30240,tensor(98.3004),4.588028168791278,4.692539691925049 31080,tensor(95.5100),4.559231265461276,4.472110271453857 31920,tensor(92.8367),4.530842494060643,4.457094192504883 32760,tensor(91.0269),4.511155435824281,4.4847493171691895 33600,tensor(89.1943),4.490817592042317,4.467089653015137 34440,tensor(85.9484),4.453746852151591,4.714705467224121 35280,tensor(84.4949),4.436690606212164,4.459835529327393 36120,tensor(83.1834),4.4210476332931155,4.568218231201172 36960,tensor(81.0199),4.394695270682963,4.207172870635986 37800,tensor(78.6906),4.3655232479222015,4.555455207824707 38640,tensor(77.7475),4.353466031675655,4.264974594116211 39480,tensor(75.5957),4.32539894456547,4.3341593742370605 40000,tensor(76.1601),4.3328377375670515,4.343103408813477 40320,tensor(75.1305),4.3192271779498785,4.320957183837891 41160,tensor(73.8844),4.302502227620491,4.186777591705322 42000,tensor(72.7903),4.287583556785402,4.321813583374023 42840,tensor(71.3143),4.2670973850087535,4.38191556930542 43680,tensor(70.2743),4.2524054344231486,4.150886535644531 44520,tensor(69.5957),4.242702348537355,4.198707580566406 45360,tensor(67.9366),4.218575100198176,4.0208282470703125 46200,tensor(68.2866),4.223713122272944,4.201685428619385 47040,tensor(66.6248),4.1990775677830126,4.237384796142578 47880,tensor(66.2193),4.192972441985144,4.080409049987793 48720,tensor(65.3626),4.179950529930151,4.410434722900391 49560,tensor(64.7271),4.170180269892182,4.0652947425842285 50400,tensor(64.2023),4.162038403099747,4.079613208770752 51240,tensor(62.7907),4.139807715800136,4.082955360412598 52080,tensor(62.3912),4.133424125011499,4.0272297859191895 52920,tensor(62.1102),4.12891009633575,4.157703399658203 53760,tensor(61.3228),4.116151399522031,4.108161926269531 54600,tensor(60.7504),4.106774397935912,4.0041704177856445 55440,tensor(60.3719),4.1005239068614365,4.1067047119140625 56280,tensor(59.6579),4.08862609094918,4.0335893630981445 57120,tensor(59.8366),4.091618014737893,4.1296000480651855 57960,tensor(59.0609),4.078569598672514,4.050683498382568 58800,tensor(58.5802),4.070397336336109,4.029040813446045 59640,tensor(58.5353),4.0696296974381,4.057146072387695 60000,tensor(58.2795),4.065249836275363,4.020354747772217 60480,tensor(58.3460),4.066390033016837,3.941168785095215 61320,tensor(57.4409),4.050756845429046,4.060215950012207 62160,tensor(56.9148),4.041554759463993,3.8870997428894043 63000,tensor(56.7257),4.038228142318002,4.010282039642334 63840,tensor(56.4911),4.034082424019186,3.8918683528900146 64680,tensor(55.8974),4.023517806383106,3.913522481918335 65520,tensor(55.8921),4.023423032173048,3.9048819541931152 66360,tensor(55.6332),4.018779180626169,4.0070977210998535 67200,tensor(55.3677),4.013996507319229,3.8241348266601562 68040,tensor(55.1016),4.009178212468658,3.8613393306732178 68880,tensor(54.4008),3.9963787630271006,3.984696388244629 69720,tensor(54.6007),4.000046700662911,3.971323251724243 70560,tensor(54.7101),4.002047452881438,3.8017985820770264 71400,tensor(54.2510),3.993622047641266,4.045064926147461 72240,tensor(54.2700),3.9939723986584994,3.7902510166168213 73080,tensor(53.8975),3.987083983082342,4.033565998077393 73920,tensor(53.4055),3.97791349718356,3.8635339736938477 74760,tensor(53.2877),3.975705491423042,4.0212907791137695 75600,tensor(53.2083),3.9742152747384747,3.9361703395843506 76440,tensor(53.2604),3.975193798824509,3.7788941860198975 77280,tensor(53.3057),3.9760427565371255,3.9743189811706543 78120,tensor(52.6205),3.9631065192380786,3.8015661239624023 78960,tensor(52.6849),3.964329612198599,3.915081262588501 79800,tensor(53.0415),3.971074112218703,4.116347312927246 80000,tensor(52.3625),3.9581908987596703,3.8000710010528564 80640,tensor(52.4477),3.959817202735286,3.951565742492676 81480,tensor(52.9591),3.969520092010498,3.83418345451355 82320,tensor(52.5453),3.961675506067502,3.841287851333618 83160,tensor(52.6411),3.963497509888563,4.05776309967041 84000,tensor(52.3799),3.9585225909807105,3.8181488513946533