autoresearch: phase1 trial 40 results

Agent: pi
Tests: N/A
Tests-Added: 0
TypeScript: N/A
This commit is contained in:
Paul Huliganga 2026-04-13 18:15:31 -04:00
parent ed65cf5997
commit 4c9b68dd47
2 changed files with 221 additions and 0 deletions

View File

@ -1558,3 +1558,214 @@
[2026-04-13 17:28:19] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:28:19] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:28:19] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:28:20] [AutoResearch] Git push complete after trial 30
[2026-04-13 17:28:22]
[AutoResearch] ========== Trial 31/50 ==========
[2026-04-13 17:28:22] [AutoResearch] GP UCB top-5 candidates:
[2026-04-13 17:28:22] UCB=1.8293 mu=1.4150 sigma=0.2071 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00026823443520900794, 'timesteps': 3642}
[2026-04-13 17:28:22] UCB=1.7919 mu=-0.1513 sigma=0.9716 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.004668146289604198, 'timesteps': 4165}
[2026-04-13 17:28:22] UCB=1.7859 mu=-0.1629 sigma=0.9744 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.004976003269391529, 'timesteps': 3465}
[2026-04-13 17:28:22] UCB=1.7747 mu=-0.1971 sigma=0.9859 params={'n_steer': 5, 'n_throttle': 5, 'learning_rate': 0.004814301490182538, 'timesteps': 1962}
[2026-04-13 17:28:22] UCB=1.7493 mu=-0.2268 sigma=0.9880 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.004644700370535985, 'timesteps': 1204}
[2026-04-13 17:28:22] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00026823443520900794, 'timesteps': 3642, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:28:24] [AutoResearch] Launching trial 31: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00026823443520900794, 'timesteps': 3642, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:31:58] [AutoResearch] Trial 31 finished in 214.0s, returncode=0
[2026-04-13 17:31:58] [AutoResearch] Trial 31: mean_reward=437.2376 std_reward=0.7096
[2026-04-13 17:31:58] [AutoResearch] === Trial 31 Summary ===
[2026-04-13 17:31:58] Total Phase 1 runs: 63
[2026-04-13 17:31:58] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:31:58] Top 5:
[2026-04-13 17:31:58] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:31:58] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:31:58] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:31:58] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:31:58] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:32:00]
[AutoResearch] ========== Trial 32/50 ==========
[2026-04-13 17:32:00] [AutoResearch] GP UCB top-5 candidates:
[2026-04-13 17:32:00] UCB=1.9266 mu=1.5375 sigma=0.1945 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 9.211025615061542e-05, 'timesteps': 3666}
[2026-04-13 17:32:00] UCB=1.8664 mu=1.2996 sigma=0.2834 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0012635534641106208, 'timesteps': 1084}
[2026-04-13 17:32:00] UCB=1.7961 mu=-0.1766 sigma=0.9864 params={'n_steer': 5, 'n_throttle': 5, 'learning_rate': 0.004925273491069971, 'timesteps': 1904}
[2026-04-13 17:32:00] UCB=1.7562 mu=-0.2134 sigma=0.9848 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.004794684466003254, 'timesteps': 1740}
[2026-04-13 17:32:00] UCB=1.7532 mu=-0.2189 sigma=0.9861 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.00445038188039129, 'timesteps': 1658}
[2026-04-13 17:32:00] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 9.211025615061542e-05, 'timesteps': 3666, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:32:02] [AutoResearch] Launching trial 32: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 9.211025615061542e-05, 'timesteps': 3666, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:35:28] [AutoResearch] Trial 32 finished in 206.1s, returncode=0
[2026-04-13 17:35:28] [AutoResearch] Trial 32: mean_reward=436.5947 std_reward=0.5625
[2026-04-13 17:35:28] [AutoResearch] === Trial 32 Summary ===
[2026-04-13 17:35:28] Total Phase 1 runs: 64
[2026-04-13 17:35:28] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:35:28] Top 5:
[2026-04-13 17:35:28] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:35:28] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:35:28] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:35:28] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:35:28] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:35:30]
[AutoResearch] ========== Trial 33/50 ==========
[2026-04-13 17:35:30] [AutoResearch] GP UCB top-5 candidates:
[2026-04-13 17:35:30] UCB=1.8230 mu=-0.1558 sigma=0.9894 params={'n_steer': 5, 'n_throttle': 5, 'learning_rate': 0.004904321552257595, 'timesteps': 2059}
[2026-04-13 17:35:30] UCB=1.8185 mu=-0.1516 sigma=0.9851 params={'n_steer': 7, 'n_throttle': 5, 'learning_rate': 0.004976831565587714, 'timesteps': 1229}
[2026-04-13 17:35:30] UCB=1.8177 mu=-0.1547 sigma=0.9862 params={'n_steer': 9, 'n_throttle': 5, 'learning_rate': 0.003732008547933269, 'timesteps': 4958}
[2026-04-13 17:35:30] UCB=1.7862 mu=-0.1917 sigma=0.9889 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.004729802758232317, 'timesteps': 1370}
[2026-04-13 17:35:30] UCB=1.7841 mu=-0.1792 sigma=0.9816 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.004976832119872584, 'timesteps': 2341}
[2026-04-13 17:35:30] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 5, 'learning_rate': 0.004904321552257595, 'timesteps': 2059, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:35:32] [AutoResearch] Launching trial 33: {'n_steer': 5, 'n_throttle': 5, 'learning_rate': 0.004904321552257595, 'timesteps': 2059, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:40:09] [AutoResearch] Trial 33 finished in 277.2s, returncode=0
[2026-04-13 17:40:09] [AutoResearch] Trial 33: mean_reward=15.6793 std_reward=0.0505
[2026-04-13 17:40:09] [AutoResearch] === Trial 33 Summary ===
[2026-04-13 17:40:09] Total Phase 1 runs: 65
[2026-04-13 17:40:09] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:40:09] Top 5:
[2026-04-13 17:40:09] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:40:09] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:40:09] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:40:09] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:40:09] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:40:11]
[AutoResearch] ========== Trial 34/50 ==========
[2026-04-13 17:40:11] [AutoResearch] GP UCB top-5 candidates:
[2026-04-13 17:40:11] UCB=1.8067 mu=1.4764 sigma=0.1652 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00038053863101293335, 'timesteps': 3088}
[2026-04-13 17:40:11] UCB=1.8047 mu=-0.1491 sigma=0.9769 params={'n_steer': 7, 'n_throttle': 5, 'learning_rate': 0.004636844298406863, 'timesteps': 4808}
[2026-04-13 17:40:11] UCB=1.6949 mu=-0.2530 sigma=0.9739 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.004917100163005054, 'timesteps': 4026}
[2026-04-13 17:40:11] UCB=1.6353 mu=-0.2991 sigma=0.9672 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.0046635403755889955, 'timesteps': 4363}
[2026-04-13 17:40:11] UCB=1.6196 mu=-0.2378 sigma=0.9287 params={'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.004469470790956243, 'timesteps': 1524}
[2026-04-13 17:40:11] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00038053863101293335, 'timesteps': 3088, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:40:13] [AutoResearch] Launching trial 34: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00038053863101293335, 'timesteps': 3088, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:43:51] [AutoResearch] Trial 34 finished in 217.4s, returncode=0
[2026-04-13 17:43:51] [AutoResearch] Trial 34: mean_reward=638.2092 std_reward=0.5092
[2026-04-13 17:43:51] [AutoResearch] === Trial 34 Summary ===
[2026-04-13 17:43:51] Total Phase 1 runs: 66
[2026-04-13 17:43:51] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:43:51] Top 5:
[2026-04-13 17:43:51] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:43:51] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:43:51] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:43:51] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:43:51] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:43:53]
[AutoResearch] ========== Trial 35/50 ==========
[2026-04-13 17:43:53] [AutoResearch] GP UCB top-5 candidates:
[2026-04-13 17:43:53] UCB=2.9343 mu=2.4626 sigma=0.2358 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0002146725458057153, 'timesteps': 4944}
[2026-04-13 17:43:53] UCB=2.5541 mu=2.1447 sigma=0.2047 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006747069310446365, 'timesteps': 4902}
[2026-04-13 17:43:53] UCB=2.4444 mu=2.0202 sigma=0.2121 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.00030671251398934967, 'timesteps': 4520}
[2026-04-13 17:43:53] UCB=1.8159 mu=-0.1667 sigma=0.9913 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.004716216831648299, 'timesteps': 4776}
[2026-04-13 17:43:53] UCB=1.7143 mu=0.9845 sigma=0.3649 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0022546821210683806, 'timesteps': 1041}
[2026-04-13 17:43:53] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0002146725458057153, 'timesteps': 4944, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:43:55] [AutoResearch] Launching trial 35: {'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0002146725458057153, 'timesteps': 4944, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:49:20] [AutoResearch] Trial 35 finished in 325.0s, returncode=0
[2026-04-13 17:49:20] [AutoResearch] Trial 35: mean_reward=540.5951 std_reward=38.2538
[2026-04-13 17:49:20] [AutoResearch] === Trial 35 Summary ===
[2026-04-13 17:49:20] Total Phase 1 runs: 67
[2026-04-13 17:49:20] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:49:20] Top 5:
[2026-04-13 17:49:20] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:49:20] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:49:20] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:49:20] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:49:20] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:49:22]
[AutoResearch] ========== Trial 36/50 ==========
[2026-04-13 17:49:22] [AutoResearch] GP UCB top-5 candidates:
[2026-04-13 17:49:22] UCB=3.4845 mu=3.1499 sigma=0.1673 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00036413507909569123, 'timesteps': 4891}
[2026-04-13 17:49:22] UCB=3.0729 mu=1.8399 sigma=0.6165 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.0005607137759058633, 'timesteps': 4948}
[2026-04-13 17:49:22] UCB=2.1034 mu=1.1264 sigma=0.4885 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 9.682211641519502e-05, 'timesteps': 4801}
[2026-04-13 17:49:22] UCB=1.9962 mu=1.5360 sigma=0.2301 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00044878665030086243, 'timesteps': 4962}
[2026-04-13 17:49:22] UCB=1.7950 mu=-0.1681 sigma=0.9815 params={'n_steer': 9, 'n_throttle': 5, 'learning_rate': 0.004119006807166534, 'timesteps': 4402}
[2026-04-13 17:49:22] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00036413507909569123, 'timesteps': 4891, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:49:24] [AutoResearch] Launching trial 36: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00036413507909569123, 'timesteps': 4891, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:55:26] [AutoResearch] Trial 36 finished in 362.4s, returncode=0
[2026-04-13 17:55:26] [AutoResearch] Trial 36: mean_reward=1101.0573 std_reward=2.347
[2026-04-13 17:55:26] [AutoResearch] === Trial 36 Summary ===
[2026-04-13 17:55:26] Total Phase 1 runs: 68
[2026-04-13 17:55:26] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:55:26] Top 5:
[2026-04-13 17:55:26] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:55:26] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:55:26] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:55:26] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:55:26] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:55:28]
[AutoResearch] ========== Trial 37/50 ==========
[2026-04-13 17:55:28] [AutoResearch] GP UCB top-5 candidates:
[2026-04-13 17:55:28] UCB=2.5270 mu=2.0453 sigma=0.2408 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0011149861891360494, 'timesteps': 4845}
[2026-04-13 17:55:28] UCB=1.9170 mu=-0.0784 sigma=0.9977 params={'n_steer': 9, 'n_throttle': 5, 'learning_rate': 0.004779159072219799, 'timesteps': 4905}
[2026-04-13 17:55:28] UCB=1.8806 mu=-0.1103 sigma=0.9954 params={'n_steer': 9, 'n_throttle': 5, 'learning_rate': 0.004921533890627206, 'timesteps': 4615}
[2026-04-13 17:55:28] UCB=1.8401 mu=-0.1328 sigma=0.9865 params={'n_steer': 9, 'n_throttle': 5, 'learning_rate': 0.004334444786923186, 'timesteps': 4250}
[2026-04-13 17:55:28] UCB=1.6242 mu=-0.2667 sigma=0.9455 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.0048913353300923485, 'timesteps': 3328}
[2026-04-13 17:55:28] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0011149861891360494, 'timesteps': 4845, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 17:55:30] [AutoResearch] Launching trial 37: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0011149861891360494, 'timesteps': 4845, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:00:33] [AutoResearch] Trial 37 finished in 302.3s, returncode=0
[2026-04-13 18:00:33] [AutoResearch] Trial 37: mean_reward=15.189 std_reward=0.0224
[2026-04-13 18:00:33] [AutoResearch] === Trial 37 Summary ===
[2026-04-13 18:00:33] Total Phase 1 runs: 69
[2026-04-13 18:00:33] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:00:33] Top 5:
[2026-04-13 18:00:33] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:00:33] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:00:33] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:00:33] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:00:33] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:00:35]
[AutoResearch] ========== Trial 38/50 ==========
[2026-04-13 18:00:35] [AutoResearch] GP UCB top-5 candidates:
[2026-04-13 18:00:35] UCB=2.5712 mu=2.2343 sigma=0.1684 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0007984038886242428, 'timesteps': 4697}
[2026-04-13 18:00:35] UCB=2.1377 mu=1.0094 sigma=0.5642 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.0005556402930741495, 'timesteps': 4665}
[2026-04-13 18:00:35] UCB=2.0180 mu=1.7138 sigma=0.1521 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0005979497433581058, 'timesteps': 4838}
[2026-04-13 18:00:35] UCB=1.7800 mu=-0.1920 sigma=0.9860 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.00497349496844323, 'timesteps': 4446}
[2026-04-13 18:00:35] UCB=1.6935 mu=-0.2726 sigma=0.9831 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.004892390050969622, 'timesteps': 4620}
[2026-04-13 18:00:35] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0007984038886242428, 'timesteps': 4697, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:00:37] [AutoResearch] Launching trial 38: {'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0007984038886242428, 'timesteps': 4697, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:05:51] [AutoResearch] Trial 38 finished in 314.1s, returncode=0
[2026-04-13 18:05:51] [AutoResearch] Trial 38: mean_reward=634.4026 std_reward=27.7421
[2026-04-13 18:05:51] [AutoResearch] === Trial 38 Summary ===
[2026-04-13 18:05:51] Total Phase 1 runs: 70
[2026-04-13 18:05:51] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:05:51] Top 5:
[2026-04-13 18:05:51] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:05:51] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:05:51] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:05:51] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:05:51] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:05:53]
[AutoResearch] ========== Trial 39/50 ==========
[2026-04-13 18:05:53] [AutoResearch] GP UCB top-5 candidates:
[2026-04-13 18:05:53] UCB=4.5962 mu=4.2417 sigma=0.1772 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0011274566858403105, 'timesteps': 4920}
[2026-04-13 18:05:53] UCB=4.3882 mu=3.3729 sigma=0.5076 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0011908186822811213, 'timesteps': 4883}
[2026-04-13 18:05:53] UCB=3.9910 mu=3.4870 sigma=0.2520 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006332928008448982, 'timesteps': 4906}
[2026-04-13 18:05:53] UCB=1.7785 mu=-0.1716 sigma=0.9750 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.003871616386476668, 'timesteps': 4924}
[2026-04-13 18:05:53] UCB=1.7617 mu=-0.2085 sigma=0.9851 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.004940867117565567, 'timesteps': 4330}
[2026-04-13 18:05:53] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0011274566858403105, 'timesteps': 4920, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:05:55] [AutoResearch] Launching trial 39: {'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0011274566858403105, 'timesteps': 4920, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:10:40] [AutoResearch] Trial 39 finished in 285.6s, returncode=0
[2026-04-13 18:10:40] [AutoResearch] Trial 39: mean_reward=59.3316 std_reward=0.52
[2026-04-13 18:10:40] [AutoResearch] === Trial 39 Summary ===
[2026-04-13 18:10:40] Total Phase 1 runs: 71
[2026-04-13 18:10:40] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:10:40] Top 5:
[2026-04-13 18:10:40] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:10:40] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:10:40] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:10:40] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:10:40] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:10:42]
[AutoResearch] ========== Trial 40/50 ==========
[2026-04-13 18:10:42] [AutoResearch] GP UCB top-5 candidates:
[2026-04-13 18:10:42] UCB=8.0147 mu=6.2460 sigma=0.8844 params={'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.003545717728698868, 'timesteps': 4844}
[2026-04-13 18:10:42] UCB=7.6008 mu=5.7060 sigma=0.9474 params={'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.0041166941707886885, 'timesteps': 4609}
[2026-04-13 18:10:42] UCB=7.4375 mu=5.7581 sigma=0.8397 params={'n_steer': 5, 'n_throttle': 2, 'learning_rate': 0.003813319367886245, 'timesteps': 4028}
[2026-04-13 18:10:42] UCB=7.1609 mu=5.6563 sigma=0.7523 params={'n_steer': 5, 'n_throttle': 2, 'learning_rate': 0.003093130771847368, 'timesteps': 4995}
[2026-04-13 18:10:42] UCB=6.9083 mu=5.0808 sigma=0.9138 params={'n_steer': 5, 'n_throttle': 2, 'learning_rate': 0.00407600621129732, 'timesteps': 4000}
[2026-04-13 18:10:42] [AutoResearch] Proposed: {'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.003545717728698868, 'timesteps': 4844, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:10:44] [AutoResearch] Launching trial 40: {'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.003545717728698868, 'timesteps': 4844, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:15:31] [AutoResearch] Trial 40 finished in 286.2s, returncode=0
[2026-04-13 18:15:31] [AutoResearch] Trial 40: mean_reward=15.0795 std_reward=0.0038
[2026-04-13 18:15:31] [AutoResearch] === Trial 40 Summary ===
[2026-04-13 18:15:31] Total Phase 1 runs: 72
[2026-04-13 18:15:31] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:15:31] Top 5:
[2026-04-13 18:15:31] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:15:31] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:15:31] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:15:31] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
[2026-04-13 18:15:31] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}

View File

@ -72,3 +72,13 @@
{"trial": 28, "timestamp": "2026-04-13T17:18:46.584035", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.0007283347926280964, "timesteps": 4804, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 1125.892, "std_reward": 9.7091, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0028/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 325.2930808067322, "reward_hacking_suspected": false}
{"trial": 29, "timestamp": "2026-04-13T17:22:42.111254", "params": {"n_steer": 8, "n_throttle": 2, "learning_rate": 0.0006213302398696545, "timesteps": 3064, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 35.5084, "std_reward": 0.0228, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0029/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 231.50703597068787, "reward_hacking_suspected": false}
{"trial": 30, "timestamp": "2026-04-13T17:28:19.047146", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.0003386484278685721, "timesteps": 4977, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 2286.9085, "std_reward": 1.508, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0030/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 332.9170308113098, "reward_hacking_suspected": false}
{"trial": 31, "timestamp": "2026-04-13T17:31:58.502237", "params": {"n_steer": 6, "n_throttle": 3, "learning_rate": 0.00026823443520900794, "timesteps": 3642, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 437.2376, "std_reward": 0.7096, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0031/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 213.97514963150024, "reward_hacking_suspected": false}
{"trial": 32, "timestamp": "2026-04-13T17:35:28.639616", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 9.211025615061542e-05, "timesteps": 3666, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 436.5947, "std_reward": 0.5625, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0032/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 206.1173939704895, "reward_hacking_suspected": false}
{"trial": 33, "timestamp": "2026-04-13T17:40:09.874324", "params": {"n_steer": 5, "n_throttle": 5, "learning_rate": 0.004904321552257595, "timesteps": 2059, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.6793, "std_reward": 0.0505, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0033/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 277.20035672187805, "reward_hacking_suspected": false}
{"trial": 34, "timestamp": "2026-04-13T17:43:51.290172", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.00038053863101293335, "timesteps": 3088, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 638.2092, "std_reward": 0.5092, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0034/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 217.39785742759705, "reward_hacking_suspected": false}
{"trial": 35, "timestamp": "2026-04-13T17:49:20.324336", "params": {"n_steer": 6, "n_throttle": 2, "learning_rate": 0.0002146725458057153, "timesteps": 4944, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 540.5951, "std_reward": 38.2538, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0035/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 325.01239800453186, "reward_hacking_suspected": false}
{"trial": 36, "timestamp": "2026-04-13T17:55:26.757241", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.00036413507909569123, "timesteps": 4891, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 1101.0573, "std_reward": 2.347, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0036/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 362.4135682582855, "reward_hacking_suspected": false}
{"trial": 37, "timestamp": "2026-04-13T18:00:33.085883", "params": {"n_steer": 6, "n_throttle": 3, "learning_rate": 0.0011149861891360494, "timesteps": 4845, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.189, "std_reward": 0.0224, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0037/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 302.3103458881378, "reward_hacking_suspected": false}
{"trial": 38, "timestamp": "2026-04-13T18:05:51.196029", "params": {"n_steer": 7, "n_throttle": 2, "learning_rate": 0.0007984038886242428, "timesteps": 4697, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 634.4026, "std_reward": 27.7421, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0038/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 314.09127950668335, "reward_hacking_suspected": false}
{"trial": 39, "timestamp": "2026-04-13T18:10:40.863332", "params": {"n_steer": 6, "n_throttle": 2, "learning_rate": 0.0011274566858403105, "timesteps": 4920, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 59.3316, "std_reward": 0.52, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0039/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 285.64494466781616, "reward_hacking_suspected": false}
{"trial": 40, "timestamp": "2026-04-13T18:15:31.100267", "params": {"n_steer": 4, "n_throttle": 2, "learning_rate": 0.003545717728698868, "timesteps": 4844, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.0795, "std_reward": 0.0038, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0040/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 286.22130513191223, "reward_hacking_suspected": false}