autoresearch: phase1 trial 10 results
Agent: pi Tests: N/A Tests-Added: 0 TypeScript: N/A
This commit is contained in:
parent
2d6fe2c962
commit
6c8c5b25a9
|
|
@ -146,3 +146,109 @@
|
|||
[2026-04-13 22:46:54] mean_reward=234.5386 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0009737963906394612, 'timesteps': 47325, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 22:46:54] mean_reward=177.7416 params={'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.0016410214223984076, 'timesteps': 16101, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 22:46:54] mean_reward=56.9474 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0006446108743181142, 'timesteps': 25224, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 22:46:55] [AutoResearch] Git push complete after trial 5
|
||||
[2026-04-13 22:46:57]
|
||||
[AutoResearch] ========== Trial 6/20 ==========
|
||||
[2026-04-13 22:46:57] [AutoResearch] GP UCB top-5 candidates:
|
||||
[2026-04-13 22:46:57] UCB=2.8976 mu=1.3885 sigma=0.7545 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0004916288196083273, 'timesteps': 45159}
|
||||
[2026-04-13 22:46:57] UCB=2.7044 mu=1.9380 sigma=0.3832 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0008675754116606385, 'timesteps': 37769}
|
||||
[2026-04-13 22:46:57] UCB=2.5483 mu=1.0014 sigma=0.7734 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0013296606512799647, 'timesteps': 32320}
|
||||
[2026-04-13 22:46:57] UCB=2.3786 mu=0.5746 sigma=0.9020 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.0002809430632146512, 'timesteps': 38162}
|
||||
[2026-04-13 22:46:57] UCB=2.3450 mu=0.6872 sigma=0.8289 params={'n_steer': 4, 'n_throttle': 5, 'learning_rate': 0.000691104912585418, 'timesteps': 43009}
|
||||
[2026-04-13 22:46:57] [AutoResearch] Proposed: {'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0004916288196083273, 'timesteps': 45159, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 22:46:59] [AutoResearch] Launching trial 6: {'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0004916288196083273, 'timesteps': 45159, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:15:59] [AutoResearch] Trial 6 finished in 1740.1s, returncode=0
|
||||
[2026-04-13 23:15:59] [AutoResearch] Trial 6: mean_reward=230.3458 std_reward=3.0194
|
||||
[2026-04-13 23:15:59] [AutoResearch] === Trial 6 Summary ===
|
||||
[2026-04-13 23:15:59] Total Phase 1 runs: 7
|
||||
[2026-04-13 23:15:59] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
|
||||
[2026-04-13 23:15:59] Top 5:
|
||||
[2026-04-13 23:15:59] mean_reward=615.6443 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.000840799681375933, 'timesteps': 35596, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:15:59] mean_reward=254.5237 params={'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.0010511079430656864, 'timesteps': 43721, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:15:59] mean_reward=234.5386 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0009737963906394612, 'timesteps': 47325, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:15:59] mean_reward=230.3458 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0004916288196083273, 'timesteps': 45159, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:15:59] mean_reward=177.7416 params={'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.0016410214223984076, 'timesteps': 16101, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:16:01]
|
||||
[AutoResearch] ========== Trial 7/20 ==========
|
||||
[2026-04-13 23:16:01] [AutoResearch] GP UCB top-5 candidates:
|
||||
[2026-04-13 23:16:01] UCB=2.8151 mu=0.9820 sigma=0.9165 params={'n_steer': 3, 'n_throttle': 5, 'learning_rate': 0.001574375789043505, 'timesteps': 34055}
|
||||
[2026-04-13 23:16:01] UCB=2.6240 mu=1.0426 sigma=0.7907 params={'n_steer': 3, 'n_throttle': 5, 'learning_rate': 0.001137097715700357, 'timesteps': 30159}
|
||||
[2026-04-13 23:16:01] UCB=2.3629 mu=0.6057 sigma=0.8786 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.0013640068427189318, 'timesteps': 36432}
|
||||
[2026-04-13 23:16:01] UCB=2.3445 mu=0.4178 sigma=0.9633 params={'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.0017303620708703264, 'timesteps': 25882}
|
||||
[2026-04-13 23:16:01] UCB=2.3407 mu=0.7245 sigma=0.8081 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.000811303710708658, 'timesteps': 34107}
|
||||
[2026-04-13 23:16:01] [AutoResearch] Proposed: {'n_steer': 3, 'n_throttle': 5, 'learning_rate': 0.001574375789043505, 'timesteps': 34055, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:16:03] [AutoResearch] Launching trial 7: {'n_steer': 3, 'n_throttle': 5, 'learning_rate': 0.001574375789043505, 'timesteps': 34055, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:44:53] [AutoResearch] Trial 7 finished in 1729.5s, returncode=0
|
||||
[2026-04-13 23:44:53] [AutoResearch] Trial 7: mean_reward=69.0259 std_reward=10.9909
|
||||
[2026-04-13 23:44:53] [AutoResearch] === Trial 7 Summary ===
|
||||
[2026-04-13 23:44:53] Total Phase 1 runs: 8
|
||||
[2026-04-13 23:44:53] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
|
||||
[2026-04-13 23:44:53] Top 5:
|
||||
[2026-04-13 23:44:53] mean_reward=615.6443 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.000840799681375933, 'timesteps': 35596, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:44:53] mean_reward=254.5237 params={'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.0010511079430656864, 'timesteps': 43721, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:44:53] mean_reward=234.5386 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0009737963906394612, 'timesteps': 47325, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:44:53] mean_reward=230.3458 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0004916288196083273, 'timesteps': 45159, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:44:53] mean_reward=177.7416 params={'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.0016410214223984076, 'timesteps': 16101, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:44:55]
|
||||
[AutoResearch] ========== Trial 8/20 ==========
|
||||
[2026-04-13 23:44:55] [AutoResearch] GP UCB top-5 candidates:
|
||||
[2026-04-13 23:44:55] UCB=2.6819 mu=1.1218 sigma=0.7800 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0011680072988353367, 'timesteps': 34177}
|
||||
[2026-04-13 23:44:55] UCB=2.5982 mu=0.9843 sigma=0.8069 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.001106072643855368, 'timesteps': 28977}
|
||||
[2026-04-13 23:44:55] UCB=2.5885 mu=1.2137 sigma=0.6874 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0009950286097591902, 'timesteps': 31387}
|
||||
[2026-04-13 23:44:55] UCB=2.5042 mu=0.6672 sigma=0.9185 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.0015134650324881223, 'timesteps': 31708}
|
||||
[2026-04-13 23:44:55] UCB=2.4989 mu=0.7372 sigma=0.8808 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.0011556578968693356, 'timesteps': 30629}
|
||||
[2026-04-13 23:44:55] [AutoResearch] Proposed: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0011680072988353367, 'timesteps': 34177, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 23:44:57] [AutoResearch] Launching trial 8: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0011680072988353367, 'timesteps': 34177, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:08:12] [AutoResearch] Trial 8 finished in 1395.2s, returncode=0
|
||||
[2026-04-14 00:08:12] [AutoResearch] Trial 8: mean_reward=2296.1891 std_reward=14.0346
|
||||
[2026-04-14 00:08:12] [AutoResearch] === Trial 8 Summary ===
|
||||
[2026-04-14 00:08:12] Total Phase 1 runs: 9
|
||||
[2026-04-14 00:08:12] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
|
||||
[2026-04-14 00:08:12] Top 5:
|
||||
[2026-04-14 00:08:12] mean_reward=2296.1891 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0011680072988353367, 'timesteps': 34177, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:08:12] mean_reward=615.6443 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.000840799681375933, 'timesteps': 35596, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:08:12] mean_reward=254.5237 params={'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.0010511079430656864, 'timesteps': 43721, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:08:12] mean_reward=234.5386 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0009737963906394612, 'timesteps': 47325, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:08:12] mean_reward=230.3458 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0004916288196083273, 'timesteps': 45159, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:08:14]
|
||||
[AutoResearch] ========== Trial 9/20 ==========
|
||||
[2026-04-14 00:08:14] [AutoResearch] GP UCB top-5 candidates:
|
||||
[2026-04-14 00:08:14] UCB=3.4249 mu=2.1949 sigma=0.6150 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0010038571924825726, 'timesteps': 29380}
|
||||
[2026-04-14 00:08:14] UCB=3.4098 mu=1.9348 sigma=0.7375 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0016876197465002791, 'timesteps': 29946}
|
||||
[2026-04-14 00:08:14] UCB=3.3195 mu=1.8981 sigma=0.7107 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.0015133213029551393, 'timesteps': 29545}
|
||||
[2026-04-14 00:08:14] UCB=3.3002 mu=1.7970 sigma=0.7516 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0009092257531208215, 'timesteps': 26759}
|
||||
[2026-04-14 00:08:14] UCB=3.2755 mu=1.8436 sigma=0.7159 params={'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.001032563222262004, 'timesteps': 29035}
|
||||
[2026-04-14 00:08:14] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0010038571924825726, 'timesteps': 29380, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:08:16] [AutoResearch] Launching trial 9: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0010038571924825726, 'timesteps': 29380, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:32:30] [AutoResearch] Trial 9 finished in 1454.2s, returncode=0
|
||||
[2026-04-14 00:32:30] [AutoResearch] Trial 9: mean_reward=62.5084 std_reward=9.1358
|
||||
[2026-04-14 00:32:30] [AutoResearch] === Trial 9 Summary ===
|
||||
[2026-04-14 00:32:30] Total Phase 1 runs: 10
|
||||
[2026-04-14 00:32:30] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
|
||||
[2026-04-14 00:32:30] Top 5:
|
||||
[2026-04-14 00:32:30] mean_reward=2296.1891 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0011680072988353367, 'timesteps': 34177, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:32:30] mean_reward=615.6443 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.000840799681375933, 'timesteps': 35596, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:32:30] mean_reward=254.5237 params={'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.0010511079430656864, 'timesteps': 43721, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:32:30] mean_reward=234.5386 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0009737963906394612, 'timesteps': 47325, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:32:30] mean_reward=230.3458 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0004916288196083273, 'timesteps': 45159, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:32:32]
|
||||
[AutoResearch] ========== Trial 10/20 ==========
|
||||
[2026-04-14 00:32:32] [AutoResearch] GP UCB top-5 candidates:
|
||||
[2026-04-14 00:32:32] UCB=4.3821 mu=3.5544 sigma=0.4138 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.0011311496831886009, 'timesteps': 35197}
|
||||
[2026-04-14 00:32:32] UCB=3.9901 mu=2.2172 sigma=0.8864 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0019647588641987608, 'timesteps': 35583}
|
||||
[2026-04-14 00:32:32] UCB=3.9576 mu=3.1158 sigma=0.4209 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0013877163147355273, 'timesteps': 34726}
|
||||
[2026-04-14 00:32:32] UCB=3.9250 mu=2.5046 sigma=0.7102 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0016362435570891763, 'timesteps': 38090}
|
||||
[2026-04-14 00:32:32] UCB=3.8792 mu=2.3746 sigma=0.7523 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.001709600322137922, 'timesteps': 32114}
|
||||
[2026-04-14 00:32:32] [AutoResearch] Proposed: {'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.0011311496831886009, 'timesteps': 35197, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:32:34] [AutoResearch] Launching trial 10: {'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.0011311496831886009, 'timesteps': 35197, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:56:14] [AutoResearch] Trial 10 finished in 1420.1s, returncode=0
|
||||
[2026-04-14 00:56:14] [AutoResearch] Trial 10: mean_reward=144.7129 std_reward=26.0347
|
||||
[2026-04-14 00:56:14] [AutoResearch] === Trial 10 Summary ===
|
||||
[2026-04-14 00:56:14] Total Phase 1 runs: 11
|
||||
[2026-04-14 00:56:14] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
|
||||
[2026-04-14 00:56:14] Top 5:
|
||||
[2026-04-14 00:56:14] mean_reward=2296.1891 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0011680072988353367, 'timesteps': 34177, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:56:14] mean_reward=615.6443 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.000840799681375933, 'timesteps': 35596, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:56:14] mean_reward=254.5237 params={'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.0010511079430656864, 'timesteps': 43721, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:56:14] mean_reward=234.5386 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0009737963906394612, 'timesteps': 47325, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-14 00:56:14] mean_reward=230.3458 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0004916288196083273, 'timesteps': 45159, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
|
|
|
|||
|
|
@ -4,3 +4,8 @@
|
|||
{"trial": 3, "timestamp": "2026-04-13T21:57:48.402590", "params": {"n_steer": 3, "n_throttle": 4, "learning_rate": 0.000840799681375933, "timesteps": 35596, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": 615.6443, "std_reward": 2.4555, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0003/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 1522.0542554855347, "reward_hacking_suspected": false}
|
||||
{"trial": 4, "timestamp": "2026-04-13T22:17:49.148601", "params": {"n_steer": 3, "n_throttle": 4, "learning_rate": 0.0006446108743181142, "timesteps": 25224, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": 56.9474, "std_reward": 0.4525, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0004/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 1196.7339782714844, "reward_hacking_suspected": false}
|
||||
{"trial": 5, "timestamp": "2026-04-13T22:46:54.148910", "params": {"n_steer": 4, "n_throttle": 4, "learning_rate": 0.0010511079430656864, "timesteps": 43721, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": 254.5237, "std_reward": 34.6249, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0005/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 1740.9856295585632, "reward_hacking_suspected": false}
|
||||
{"trial": 6, "timestamp": "2026-04-13T23:15:59.662616", "params": {"n_steer": 3, "n_throttle": 4, "learning_rate": 0.0004916288196083273, "timesteps": 45159, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": 230.3458, "std_reward": 3.0194, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0006/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 1740.0574042797089, "reward_hacking_suspected": false}
|
||||
{"trial": 7, "timestamp": "2026-04-13T23:44:53.210793", "params": {"n_steer": 3, "n_throttle": 5, "learning_rate": 0.001574375789043505, "timesteps": 34055, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": 69.0259, "std_reward": 10.9909, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0007/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 1729.5354163646698, "reward_hacking_suspected": false}
|
||||
{"trial": 8, "timestamp": "2026-04-14T00:08:12.385350", "params": {"n_steer": 4, "n_throttle": 3, "learning_rate": 0.0011680072988353367, "timesteps": 34177, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": 2296.1891, "std_reward": 14.0346, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0008/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 1395.1626331806183, "reward_hacking_suspected": false}
|
||||
{"trial": 9, "timestamp": "2026-04-14T00:32:30.628681", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.0010038571924825726, "timesteps": 29380, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": 62.5084, "std_reward": 9.1358, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0009/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 1454.230754852295, "reward_hacking_suspected": false}
|
||||
{"trial": 10, "timestamp": "2026-04-14T00:56:14.762140", "params": {"n_steer": 3, "n_throttle": 3, "learning_rate": 0.0011311496831886009, "timesteps": 35197, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": 144.7129, "std_reward": 26.0347, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0010/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 1420.1210803985596, "reward_hacking_suspected": false}
|
||||
|
|
|
|||
Loading…
Reference in New Issue