learned puppo ppo agent push

22d13ac over 1 year ago

17.5 kB

	{
	"name": "root",
	"gauges": {
	"Huggy.Policy.Entropy.mean": {
	"value": 1.3477641344070435,
	"min": 1.3477641344070435,
	"max": 1.4271644353866577,
	"count": 40
	},
	"Huggy.Policy.Entropy.sum": {
	"value": 67504.1171875,
	"min": 66656.7109375,
	"max": 78797.4375,
	"count": 40
	},
	"Huggy.Environment.EpisodeLength.mean": {
	"value": 82.47078464106845,
	"min": 75.74079754601227,
	"max": 371.68148148148146,
	"count": 40
	},
	"Huggy.Environment.EpisodeLength.sum": {
	"value": 49400.0,
	"min": 48999.0,
	"max": 50177.0,
	"count": 40
	},
	"Huggy.Step.mean": {
	"value": 1999997.0,
	"min": 49872.0,
	"max": 1999997.0,
	"count": 40
	},
	"Huggy.Step.sum": {
	"value": 1999997.0,
	"min": 49872.0,
	"max": 1999997.0,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicValueEstimate.mean": {
	"value": 2.4713692665100098,
	"min": 0.1085435152053833,
	"max": 2.5046255588531494,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicValueEstimate.sum": {
	"value": 1480.3502197265625,
	"min": 14.544831275939941,
	"max": 1585.098388671875,
	"count": 40
	},
	"Huggy.Environment.CumulativeReward.mean": {
	"value": 3.8931645334065457,
	"min": 1.7072422876954079,
	"max": 4.105898760422487,
	"count": 40
	},
	"Huggy.Environment.CumulativeReward.sum": {
	"value": 2332.005555510521,
	"min": 228.77046655118465,
	"max": 2564.6871837973595,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicReward.mean": {
	"value": 3.8931645334065457,
	"min": 1.7072422876954079,
	"max": 4.105898760422487,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicReward.sum": {
	"value": 2332.005555510521,
	"min": 228.77046655118465,
	"max": 2564.6871837973595,
	"count": 40
	},
	"Huggy.Losses.PolicyLoss.mean": {
	"value": 0.015262791563867117,
	"min": 0.015037490063841688,
	"max": 0.01912982018654778,
	"count": 40
	},
	"Huggy.Losses.PolicyLoss.sum": {
	"value": 0.04578837469160135,
	"min": 0.030074980127683375,
	"max": 0.057389460559643335,
	"count": 40
	},
	"Huggy.Losses.ValueLoss.mean": {
	"value": 0.045917419418692595,
	"min": 0.021191489491611717,
	"max": 0.04855267383158207,
	"count": 40
	},
	"Huggy.Losses.ValueLoss.sum": {
	"value": 0.1377522582560778,
	"min": 0.042382978983223435,
	"max": 0.1377522582560778,
	"count": 40
	},
	"Huggy.Policy.LearningRate.mean": {
	"value": 3.4635488455166685e-06,
	"min": 3.4635488455166685e-06,
	"max": 0.000295376476541175,
	"count": 40
	},
	"Huggy.Policy.LearningRate.sum": {
	"value": 1.0390646536550006e-05,
	"min": 1.0390646536550006e-05,
	"max": 0.0008440678686440499,
	"count": 40
	},
	"Huggy.Policy.Epsilon.mean": {
	"value": 0.10115448333333332,
	"min": 0.10115448333333332,
	"max": 0.19845882500000003,
	"count": 40
	},
	"Huggy.Policy.Epsilon.sum": {
	"value": 0.30346344999999997,
	"min": 0.20744010000000002,
	"max": 0.58135595,
	"count": 40
	},
	"Huggy.Policy.Beta.mean": {
	"value": 6.760871833333336e-05,
	"min": 6.760871833333336e-05,
	"max": 0.0049230953675,
	"count": 40
	},
	"Huggy.Policy.Beta.sum": {
	"value": 0.00020282615500000008,
	"min": 0.00020282615500000008,
	"max": 0.014069661904999997,
	"count": 40
	},
	"Huggy.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	},
	"Huggy.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1679890700",
	"python_version": "3.9.16 (main, Mar 8 2023, 14:00:05) \n[GCC 11.2.0]",
	"command_line_arguments": "/home/ppabolu/anaconda3/envs/deepRL/bin/mlagents-learn ./config/ppo/Huggy.yaml --env=./trained-envs-executables/linux/Huggy/Huggy --run-id=Huggy-the-Puppo --no-graphics",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.11.0+cu102",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1679896177"
	},
	"total": 5477.287377696484,
	"count": 1,
	"self": 0.4852929189801216,
	"children": {
	"run_training.setup": {
	"total": 0.07678009197115898,
	"count": 1,
	"self": 0.07678009197115898
	},
	"TrainerController.start_learning": {
	"total": 5476.725304685533,
	"count": 1,
	"self": 5.213195715099573,
	"children": {
	"TrainerController._reset_env": {
	"total": 11.399732645601034,
	"count": 1,
	"self": 11.399732645601034
	},
	"TrainerController.advance": {
	"total": 5459.585739143193,
	"count": 232812,
	"self": 5.387621909379959,
	"children": {
	"env_step": {
	"total": 4695.1606619134545,
	"count": 232812,
	"self": 4323.76669523865,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 367.80504669994116,
	"count": 232812,
	"self": 19.786863684654236,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 348.0181830152869,
	"count": 222948,
	"self": 348.0181830152869
	}
	}
	},
	"workers": {
	"total": 3.588919974863529,
	"count": 232812,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 5466.901013109833,
	"count": 232812,
	"is_parallel": true,
	"self": 1464.959120310843,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0010828077793121338,
	"count": 1,
	"is_parallel": true,
	"self": 0.00031690672039985657,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0007659010589122772,
	"count": 2,
	"is_parallel": true,
	"self": 0.0007659010589122772
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.03627157211303711,
	"count": 1,
	"is_parallel": true,
	"self": 0.00012057647109031677,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0004039853811264038,
	"count": 1,
	"is_parallel": true,
	"self": 0.0004039853811264038
	},
	"communicator.exchange": {
	"total": 0.03511756658554077,
	"count": 1,
	"is_parallel": true,
	"self": 0.03511756658554077
	},
	"steps_from_proto": {
	"total": 0.0006294436752796173,
	"count": 1,
	"is_parallel": true,
	"self": 0.00020921602845191956,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.00042022764682769775,
	"count": 2,
	"is_parallel": true,
	"self": 0.00042022764682769775
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 4001.94189279899,
	"count": 232811,
	"is_parallel": true,
	"self": 22.541271444410086,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 297.64191883429885,
	"count": 232811,
	"is_parallel": true,
	"self": 297.64191883429885
	},
	"communicator.exchange": {
	"total": 3583.317511830479,
	"count": 232811,
	"is_parallel": true,
	"self": 3583.317511830479
	},
	"steps_from_proto": {
	"total": 98.44119068980217,
	"count": 232811,
	"is_parallel": true,
	"self": 36.478584710508585,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 61.962605979293585,
	"count": 465622,
	"is_parallel": true,
	"self": 61.962605979293585
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 759.0374553203583,
	"count": 232812,
	"self": 7.179403081536293,
	"children": {
	"process_trajectory": {
	"total": 171.90152921900153,
	"count": 232812,
	"self": 167.230640091002,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 4.670889127999544,
	"count": 10,
	"self": 4.670889127999544
	}
	}
	},
	"_update_policy": {
	"total": 579.9565230198205,
	"count": 97,
	"self": 498.98211320862174,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 80.97440981119871,
	"count": 4850,
	"self": 80.97440981119871
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.5944242477416992e-06,
	"count": 1,
	"self": 1.5944242477416992e-06
	},
	"TrainerController._save_models": {
	"total": 0.5266355872154236,
	"count": 1,
	"self": 0.0705442950129509,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.4560912922024727,
	"count": 1,
	"self": 0.4560912922024727
	}
	}
	}
	}
	}
	}
	}