vind's picture
Push trained model 1M
18951ff
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.3410964012145996,
"min": 0.3410964012145996,
"max": 1.418026089668274,
"count": 37
},
"Pyramids.Policy.Entropy.sum": {
"value": 10140.1142578125,
"min": 10140.1142578125,
"max": 43017.23828125,
"count": 37
},
"Pyramids.Step.mean": {
"value": 1109876.0,
"min": 29952.0,
"max": 1109876.0,
"count": 37
},
"Pyramids.Step.sum": {
"value": 1109876.0,
"min": 29952.0,
"max": 1109876.0,
"count": 37
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.5330937504768372,
"min": -0.11661066114902496,
"max": 0.5860244631767273,
"count": 37
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 147.13388061523438,
"min": -28.21977996826172,
"max": 163.50082397460938,
"count": 37
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.0137894656509161,
"min": -0.011680372059345245,
"max": 0.3022136092185974,
"count": 37
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 3.8058924674987793,
"min": -3.188741445541382,
"max": 72.53126525878906,
"count": 37
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06856451607438632,
"min": 0.06532412811454076,
"max": 0.07538307401201953,
"count": 37
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 1.0284677411157948,
"min": 0.4799661327356837,
"max": 1.0553630361682735,
"count": 37
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.017097013458260334,
"min": 0.0005752700689231311,
"max": 0.017097013458260334,
"count": 37
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.256455201873905,
"min": 0.005556170241535496,
"max": 0.256455201873905,
"count": 37
},
"Pyramids.Policy.LearningRate.mean": {
"value": 8.100039299989333e-05,
"min": 8.100039299989333e-05,
"max": 0.00029676708679192377,
"count": 37
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.0012150058949984,
"min": 0.0012150058949984,
"max": 0.0038227867257378,
"count": 37
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.1270001066666667,
"min": 0.1270001066666667,
"max": 0.19892236190476195,
"count": 37
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.9050016000000003,
"min": 1.3794090666666667,
"max": 2.7150253333333336,
"count": 37
},
"Pyramids.Policy.Beta.mean": {
"value": 0.0027073106560000005,
"min": 0.0027073106560000005,
"max": 0.009892343954285714,
"count": 37
},
"Pyramids.Policy.Beta.sum": {
"value": 0.040609659840000006,
"min": 0.040609659840000006,
"max": 0.12743879378,
"count": 37
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.008863682858645916,
"min": 0.008378814905881882,
"max": 0.5857936143875122,
"count": 37
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.13295523822307587,
"min": 0.12021433562040329,
"max": 4.100555419921875,
"count": 37
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 326.3804347826087,
"min": 311.1136363636364,
"max": 999.0,
"count": 37
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 30027.0,
"min": 15984.0,
"max": 32584.0,
"count": 37
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.4996586774030458,
"min": -1.0000000521540642,
"max": 1.6262666512837356,
"count": 37
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 137.9685983210802,
"min": -32.000001668930054,
"max": 141.485198661685,
"count": 37
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.4996586774030458,
"min": -1.0000000521540642,
"max": 1.6262666512837356,
"count": 37
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 137.9685983210802,
"min": -32.000001668930054,
"max": 141.485198661685,
"count": 37
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.030218124058908714,
"min": 0.02747859634728647,
"max": 12.479123975150287,
"count": 37
},
"Pyramids.Policy.RndReward.sum": {
"value": 2.7800674134196015,
"min": 2.390637882213923,
"max": 199.6659836024046,
"count": 37
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 37
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 37
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1684743375",
"python_version": "3.10.11 (main, Apr 5 2023, 14:15:10) [GCC 9.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.11.0+cu102",
"numpy_version": "1.21.2",
"end_time_seconds": "1684746121"
},
"total": 2746.0989240340004,
"count": 1,
"self": 0.8587743580001188,
"children": {
"run_training.setup": {
"total": 0.03641991299991787,
"count": 1,
"self": 0.03641991299991787
},
"TrainerController.start_learning": {
"total": 2745.2037297630004,
"count": 1,
"self": 1.887164998028311,
"children": {
"TrainerController._reset_env": {
"total": 4.454926328000056,
"count": 1,
"self": 4.454926328000056
},
"TrainerController.advance": {
"total": 2738.5262926849723,
"count": 72541,
"self": 1.8485816899878955,
"children": {
"env_step": {
"total": 1980.1429021140018,
"count": 72541,
"self": 1836.1817931670691,
"children": {
"SubprocessEnvManager._take_step": {
"total": 142.84616015798088,
"count": 72541,
"self": 6.112379635981824,
"children": {
"TorchPolicy.evaluate": {
"total": 136.73378052199905,
"count": 71027,
"self": 136.73378052199905
}
}
},
"workers": {
"total": 1.1149487889517786,
"count": 72540,
"self": 0.0,
"children": {
"worker_root": {
"total": 2738.1276783289536,
"count": 72540,
"is_parallel": true,
"self": 1048.755615843943,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0018949119998978858,
"count": 1,
"is_parallel": true,
"self": 0.0006021640001563355,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0012927479997415503,
"count": 8,
"is_parallel": true,
"self": 0.0012927479997415503
}
}
},
"UnityEnvironment.step": {
"total": 0.08471364499996525,
"count": 1,
"is_parallel": true,
"self": 0.0008068949998687458,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0005532090001452161,
"count": 1,
"is_parallel": true,
"self": 0.0005532090001452161
},
"communicator.exchange": {
"total": 0.08117343799995069,
"count": 1,
"is_parallel": true,
"self": 0.08117343799995069
},
"steps_from_proto": {
"total": 0.0021801030000005994,
"count": 1,
"is_parallel": true,
"self": 0.00047861700022622244,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.001701485999774377,
"count": 8,
"is_parallel": true,
"self": 0.001701485999774377
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1689.3720624850107,
"count": 72539,
"is_parallel": true,
"self": 38.87596173010115,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 29.64103448994433,
"count": 72539,
"is_parallel": true,
"self": 29.64103448994433
},
"communicator.exchange": {
"total": 1493.5881846609882,
"count": 72539,
"is_parallel": true,
"self": 1493.5881846609882
},
"steps_from_proto": {
"total": 127.26688160397703,
"count": 72539,
"is_parallel": true,
"self": 26.87443203315047,
"children": {
"_process_rank_one_or_two_observation": {
"total": 100.39244957082656,
"count": 580312,
"is_parallel": true,
"self": 100.39244957082656
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 756.5348088809824,
"count": 72540,
"self": 3.513495315062528,
"children": {
"process_trajectory": {
"total": 135.46897442192426,
"count": 72540,
"self": 135.19047052992414,
"children": {
"RLTrainer._checkpoint": {
"total": 0.27850389200011705,
"count": 2,
"self": 0.27850389200011705
}
}
},
"_update_policy": {
"total": 617.5523391439956,
"count": 514,
"self": 397.3331162459597,
"children": {
"TorchPPOOptimizer.update": {
"total": 220.21922289803592,
"count": 25899,
"self": 220.21922289803592
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.708000127109699e-06,
"count": 1,
"self": 1.708000127109699e-06
},
"TrainerController._save_models": {
"total": 0.33534404399961204,
"count": 1,
"self": 0.0029410389997792663,
"children": {
"RLTrainer._checkpoint": {
"total": 0.3324030049998328,
"count": 1,
"self": 0.3324030049998328
}
}
}
}
}
}
}