diff --git a/agent/outerloop-results/autoresearch_phase1_log.txt b/agent/outerloop-results/autoresearch_phase1_log.txt index c1bb6f9..6b965e7 100644 --- a/agent/outerloop-results/autoresearch_phase1_log.txt +++ b/agent/outerloop-results/autoresearch_phase1_log.txt @@ -191,3 +191,214 @@ [2026-04-13 14:07:58] mean_reward=326.6374 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.0001799978550884136, 'timesteps': 2081, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} [2026-04-13 14:07:58] mean_reward=92.4248 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.0011131823295383878, 'timesteps': 1691, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} [2026-04-13 14:07:58] mean_reward=74.2498 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.0010109905842864714, 'timesteps': 1150, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:07:59] [AutoResearch] Git push complete after trial 10 +[2026-04-13 14:08:01] +[AutoResearch] ========== Trial 11/50 ========== +[2026-04-13 14:08:01] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 14:08:01] UCB=3.4245 mu=2.1335 sigma=0.6455 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006487801810851401, 'timesteps': 3497} +[2026-04-13 14:08:01] UCB=3.4026 mu=2.4787 sigma=0.4619 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0005509497648997503, 'timesteps': 3181} +[2026-04-13 14:08:01] UCB=3.3530 mu=1.8438 sigma=0.7546 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00021061724588262036, 'timesteps': 3814} +[2026-04-13 14:08:01] UCB=3.2826 mu=1.7314 sigma=0.7756 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0003018922088252074, 'timesteps': 2774} +[2026-04-13 14:08:01] UCB=3.2756 mu=1.6815 sigma=0.7971 params={'n_steer': 5, 'n_throttle': 2, 'learning_rate': 0.000520158505476463, 'timesteps': 3742} +[2026-04-13 14:08:01] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006487801810851401, 'timesteps': 3497, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:08:03] [AutoResearch] Launching trial 11: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006487801810851401, 'timesteps': 3497, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:11:29] [AutoResearch] Trial 11 finished in 205.5s, returncode=0 +[2026-04-13 14:11:29] [AutoResearch] Trial 11: mean_reward=295.0942 std_reward=8.7983 +[2026-04-13 14:11:29] [AutoResearch] === Trial 11 Summary === +[2026-04-13 14:11:29] Total Phase 1 runs: 11 +[2026-04-13 14:11:29] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:11:29] Top 5: +[2026-04-13 14:11:29] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:11:29] mean_reward=492.1545 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003046182905194233, 'timesteps': 2293, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:11:29] mean_reward=326.6374 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.0001799978550884136, 'timesteps': 2081, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:11:29] mean_reward=295.0942 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006487801810851401, 'timesteps': 3497, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:11:29] mean_reward=92.4248 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.0011131823295383878, 'timesteps': 1691, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:11:31] +[AutoResearch] ========== Trial 12/50 ========== +[2026-04-13 14:11:31] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 14:11:31] UCB=3.8974 mu=2.3114 sigma=0.7930 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00036708488973799465, 'timesteps': 1708} +[2026-04-13 14:11:31] UCB=3.8307 mu=2.4987 sigma=0.6660 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.00029894659146815144, 'timesteps': 2429} +[2026-04-13 14:11:31] UCB=3.7947 mu=2.3099 sigma=0.7424 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0009005388399341564, 'timesteps': 2218} +[2026-04-13 14:11:31] UCB=3.7156 mu=2.0628 sigma=0.8264 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0007350352970779585, 'timesteps': 1803} +[2026-04-13 14:11:31] UCB=3.6879 mu=2.2851 sigma=0.7014 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.001344651976676164, 'timesteps': 2228} +[2026-04-13 14:11:31] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00036708488973799465, 'timesteps': 1708, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:11:33] [AutoResearch] Launching trial 12: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00036708488973799465, 'timesteps': 1708, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:13:24] [AutoResearch] Trial 12 finished in 111.4s, returncode=0 +[2026-04-13 14:13:24] [AutoResearch] Trial 12: mean_reward=57.3599 std_reward=0.3574 +[2026-04-13 14:13:24] [AutoResearch] === Trial 12 Summary === +[2026-04-13 14:13:24] Total Phase 1 runs: 12 +[2026-04-13 14:13:24] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:13:24] Top 5: +[2026-04-13 14:13:24] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:13:24] mean_reward=492.1545 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003046182905194233, 'timesteps': 2293, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:13:24] mean_reward=326.6374 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.0001799978550884136, 'timesteps': 2081, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:13:24] mean_reward=295.0942 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006487801810851401, 'timesteps': 3497, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:13:24] mean_reward=92.4248 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.0011131823295383878, 'timesteps': 1691, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:13:26] +[AutoResearch] ========== Trial 13/50 ========== +[2026-04-13 14:13:26] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 14:13:26] UCB=3.8867 mu=2.4252 sigma=0.7308 params={'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.0007540639059045701, 'timesteps': 2616} +[2026-04-13 14:13:26] UCB=3.4762 mu=1.6947 sigma=0.8907 params={'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.0006448520560867668, 'timesteps': 1828} +[2026-04-13 14:13:26] UCB=3.4298 mu=1.6469 sigma=0.8914 params={'n_steer': 3, 'n_throttle': 2, 'learning_rate': 6.829619904851873e-05, 'timesteps': 3068} +[2026-04-13 14:13:26] UCB=3.0569 mu=1.6819 sigma=0.6875 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0004803523239302434, 'timesteps': 2203} +[2026-04-13 14:13:26] UCB=2.9155 mu=1.2471 sigma=0.8342 params={'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.0014019934947968622, 'timesteps': 2871} +[2026-04-13 14:13:26] [AutoResearch] Proposed: {'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.0007540639059045701, 'timesteps': 2616, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:13:28] [AutoResearch] Launching trial 13: {'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.0007540639059045701, 'timesteps': 2616, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:17:13] [AutoResearch] Trial 13 finished in 225.0s, returncode=0 +[2026-04-13 14:17:13] [AutoResearch] Trial 13: mean_reward=33.683 std_reward=0.1015 +[2026-04-13 14:17:13] [AutoResearch] === Trial 13 Summary === +[2026-04-13 14:17:13] Total Phase 1 runs: 13 +[2026-04-13 14:17:13] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:17:13] Top 5: +[2026-04-13 14:17:13] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:17:13] mean_reward=492.1545 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003046182905194233, 'timesteps': 2293, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:17:13] mean_reward=326.6374 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.0001799978550884136, 'timesteps': 2081, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:17:13] mean_reward=295.0942 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006487801810851401, 'timesteps': 3497, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:17:13] mean_reward=92.4248 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.0011131823295383878, 'timesteps': 1691, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:17:15] +[AutoResearch] ========== Trial 14/50 ========== +[2026-04-13 14:17:15] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 14:17:15] UCB=3.7132 mu=2.7971 sigma=0.4580 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.00065796575384948, 'timesteps': 2500} +[2026-04-13 14:17:15] UCB=3.6826 mu=2.7276 sigma=0.4775 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0005935435040011074, 'timesteps': 2295} +[2026-04-13 14:17:15] UCB=3.0544 mu=1.7790 sigma=0.6377 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0013345870493340923, 'timesteps': 2444} +[2026-04-13 14:17:15] UCB=3.0393 mu=1.4606 sigma=0.7893 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.0010072545445081315, 'timesteps': 2556} +[2026-04-13 14:17:15] UCB=2.9760 mu=1.4846 sigma=0.7457 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.000290256920789712, 'timesteps': 2875} +[2026-04-13 14:17:15] [AutoResearch] Proposed: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.00065796575384948, 'timesteps': 2500, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:17:17] [AutoResearch] Launching trial 14: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.00065796575384948, 'timesteps': 2500, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:20:31] [AutoResearch] Trial 14 finished in 193.8s, returncode=0 +[2026-04-13 14:20:31] [AutoResearch] Trial 14: mean_reward=28.9888 std_reward=0.0928 +[2026-04-13 14:20:31] [AutoResearch] === Trial 14 Summary === +[2026-04-13 14:20:31] Total Phase 1 runs: 14 +[2026-04-13 14:20:31] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:20:31] Top 5: +[2026-04-13 14:20:31] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:20:31] mean_reward=492.1545 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003046182905194233, 'timesteps': 2293, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:20:31] mean_reward=326.6374 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.0001799978550884136, 'timesteps': 2081, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:20:31] mean_reward=295.0942 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006487801810851401, 'timesteps': 3497, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:20:31] mean_reward=92.4248 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.0011131823295383878, 'timesteps': 1691, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:20:33] +[AutoResearch] ========== Trial 15/50 ========== +[2026-04-13 14:20:33] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 14:20:33] UCB=4.1822 mu=3.8136 sigma=0.1843 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00015967596710454723, 'timesteps': 2500} +[2026-04-13 14:20:33] UCB=3.9849 mu=3.6984 sigma=0.1432 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00020203299697276424, 'timesteps': 2915} +[2026-04-13 14:20:33] UCB=3.1212 mu=1.3239 sigma=0.8986 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 5.1439595083155936e-05, 'timesteps': 3411} +[2026-04-13 14:20:33] UCB=3.0219 mu=1.3426 sigma=0.8396 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005125080920325789, 'timesteps': 3284} +[2026-04-13 14:20:33] UCB=2.8614 mu=2.3902 sigma=0.2356 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00013318306340867736, 'timesteps': 2174} +[2026-04-13 14:20:33] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00015967596710454723, 'timesteps': 2500, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:20:35] [AutoResearch] Launching trial 15: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00015967596710454723, 'timesteps': 2500, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:23:58] [AutoResearch] Trial 15 finished in 203.6s, returncode=0 +[2026-04-13 14:23:58] [AutoResearch] Trial 15: mean_reward=296.5245 std_reward=0.8544 +[2026-04-13 14:23:58] [AutoResearch] === Trial 15 Summary === +[2026-04-13 14:23:58] Total Phase 1 runs: 15 +[2026-04-13 14:23:58] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:23:58] Top 5: +[2026-04-13 14:23:58] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:23:58] mean_reward=492.1545 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003046182905194233, 'timesteps': 2293, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:23:58] mean_reward=326.6374 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.0001799978550884136, 'timesteps': 2081, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:23:58] mean_reward=296.5245 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00015967596710454723, 'timesteps': 2500, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:23:58] mean_reward=295.0942 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006487801810851401, 'timesteps': 3497, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:24:00] +[AutoResearch] ========== Trial 16/50 ========== +[2026-04-13 14:24:00] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 14:24:00] UCB=8.3198 mu=7.2782 sigma=0.5208 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0012899142850653915, 'timesteps': 2451} +[2026-04-13 14:24:00] UCB=8.2069 mu=7.0019 sigma=0.6025 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.001243494300822533, 'timesteps': 2623} +[2026-04-13 14:24:00] UCB=7.7757 mu=6.8085 sigma=0.4836 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0006531958020221599, 'timesteps': 2717} +[2026-04-13 14:24:00] UCB=7.4949 mu=6.2955 sigma=0.5997 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0010665298887244244, 'timesteps': 2471} +[2026-04-13 14:24:00] UCB=7.0435 mu=6.1542 sigma=0.4446 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0012582252159183014, 'timesteps': 2805} +[2026-04-13 14:24:00] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0012899142850653915, 'timesteps': 2451, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:24:02] [AutoResearch] Launching trial 16: {'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0012899142850653915, 'timesteps': 2451, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:27:35] [AutoResearch] Trial 16 finished in 212.5s, returncode=0 +[2026-04-13 14:27:35] [AutoResearch] Trial 16: mean_reward=15.5282 std_reward=0.0252 +[2026-04-13 14:27:35] [AutoResearch] === Trial 16 Summary === +[2026-04-13 14:27:35] Total Phase 1 runs: 16 +[2026-04-13 14:27:35] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:27:35] Top 5: +[2026-04-13 14:27:35] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:27:35] mean_reward=492.1545 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003046182905194233, 'timesteps': 2293, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:27:35] mean_reward=326.6374 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.0001799978550884136, 'timesteps': 2081, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:27:35] mean_reward=296.5245 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00015967596710454723, 'timesteps': 2500, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:27:35] mean_reward=295.0942 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006487801810851401, 'timesteps': 3497, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:27:37] +[AutoResearch] ========== Trial 17/50 ========== +[2026-04-13 14:27:37] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 14:27:37] UCB=8.1359 mu=7.3725 sigma=0.3817 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0012604760157625598, 'timesteps': 2679} +[2026-04-13 14:27:37] UCB=8.0777 mu=6.9461 sigma=0.5658 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0017255017088966425, 'timesteps': 2530} +[2026-04-13 14:27:37] UCB=6.8693 mu=5.2870 sigma=0.7911 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0009439788338821218, 'timesteps': 2931} +[2026-04-13 14:27:37] UCB=6.8669 mu=5.7551 sigma=0.5559 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0016476310168858867, 'timesteps': 2769} +[2026-04-13 14:27:37] UCB=6.6556 mu=5.4318 sigma=0.6119 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0009292913262432035, 'timesteps': 2642} +[2026-04-13 14:27:37] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0012604760157625598, 'timesteps': 2679, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:27:39] [AutoResearch] Launching trial 17: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0012604760157625598, 'timesteps': 2679, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:31:18] [AutoResearch] Trial 17 finished in 218.8s, returncode=0 +[2026-04-13 14:31:18] [AutoResearch] Trial 17: mean_reward=25.041 std_reward=0.2538 +[2026-04-13 14:31:18] [AutoResearch] === Trial 17 Summary === +[2026-04-13 14:31:18] Total Phase 1 runs: 17 +[2026-04-13 14:31:18] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:31:18] Top 5: +[2026-04-13 14:31:18] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:31:18] mean_reward=492.1545 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003046182905194233, 'timesteps': 2293, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:31:18] mean_reward=326.6374 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.0001799978550884136, 'timesteps': 2081, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:31:18] mean_reward=296.5245 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00015967596710454723, 'timesteps': 2500, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:31:18] mean_reward=295.0942 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006487801810851401, 'timesteps': 3497, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:31:20] +[AutoResearch] ========== Trial 18/50 ========== +[2026-04-13 14:31:20] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 14:31:20] UCB=7.0244 mu=5.9003 sigma=0.5621 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472} +[2026-04-13 14:31:20] UCB=6.0955 mu=4.8517 sigma=0.6219 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.00020480510091558957, 'timesteps': 2306} +[2026-04-13 14:31:20] UCB=6.0818 mu=4.6603 sigma=0.7107 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0007009084549222966, 'timesteps': 3007} +[2026-04-13 14:31:20] UCB=5.7812 mu=4.0041 sigma=0.8885 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0007689592895820599, 'timesteps': 2581} +[2026-04-13 14:31:20] UCB=5.6269 mu=4.0968 sigma=0.7651 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0009037770199337085, 'timesteps': 2594} +[2026-04-13 14:31:20] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:31:22] [AutoResearch] Launching trial 18: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:35:35] [AutoResearch] Trial 18 finished in 252.8s, returncode=0 +[2026-04-13 14:35:35] [AutoResearch] Trial 18: mean_reward=1389.3806 std_reward=4.4479 +[2026-04-13 14:35:35] [AutoResearch] === Trial 18 Summary === +[2026-04-13 14:35:35] Total Phase 1 runs: 18 +[2026-04-13 14:35:35] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:35:35] Top 5: +[2026-04-13 14:35:35] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:35:35] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:35:35] mean_reward=492.1545 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003046182905194233, 'timesteps': 2293, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:35:35] mean_reward=326.6374 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.0001799978550884136, 'timesteps': 2081, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:35:35] mean_reward=296.5245 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00015967596710454723, 'timesteps': 2500, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:35:37] +[AutoResearch] ========== Trial 19/50 ========== +[2026-04-13 14:35:37] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 14:35:37] UCB=3.7303 mu=3.1686 sigma=0.2808 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914} +[2026-04-13 14:35:37] UCB=3.6627 mu=3.1516 sigma=0.2556 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00030687648195598525, 'timesteps': 2754} +[2026-04-13 14:35:37] UCB=3.3895 mu=2.0524 sigma=0.6685 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0005669280868608377, 'timesteps': 2176} +[2026-04-13 14:35:37] UCB=3.2511 mu=1.6136 sigma=0.8188 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0005635699313680674, 'timesteps': 2489} +[2026-04-13 14:35:37] UCB=3.2373 mu=1.7930 sigma=0.7222 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0005912469317752097, 'timesteps': 1811} +[2026-04-13 14:35:37] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:35:39] [AutoResearch] Launching trial 19: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:39:40] [AutoResearch] Trial 19 finished in 241.7s, returncode=0 +[2026-04-13 14:39:40] [AutoResearch] Trial 19: mean_reward=1072.7063 std_reward=4.9159 +[2026-04-13 14:39:40] [AutoResearch] === Trial 19 Summary === +[2026-04-13 14:39:40] Total Phase 1 runs: 19 +[2026-04-13 14:39:40] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:39:40] Top 5: +[2026-04-13 14:39:40] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:39:40] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:39:40] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:39:40] mean_reward=492.1545 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003046182905194233, 'timesteps': 2293, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:39:40] mean_reward=326.6374 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.0001799978550884136, 'timesteps': 2081, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:39:42] +[AutoResearch] ========== Trial 20/50 ========== +[2026-04-13 14:39:42] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 14:39:42] UCB=4.1899 mu=2.9389 sigma=0.6255 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382} +[2026-04-13 14:39:42] UCB=4.0210 mu=2.6315 sigma=0.6947 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0008397617990643678, 'timesteps': 2216} +[2026-04-13 14:39:42] UCB=3.9967 mu=2.3696 sigma=0.8136 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0013674551870704959, 'timesteps': 1522} +[2026-04-13 14:39:42] UCB=3.8338 mu=2.1883 sigma=0.8228 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0005541858758139742, 'timesteps': 1280} +[2026-04-13 14:39:42] UCB=3.7481 mu=2.5203 sigma=0.6139 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 7.025023485216959e-05, 'timesteps': 2093} +[2026-04-13 14:39:42] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:39:44] [AutoResearch] Launching trial 20: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:41:55] [AutoResearch] Trial 20 finished in 130.6s, returncode=0 +[2026-04-13 14:41:55] [AutoResearch] Trial 20: mean_reward=821.1389 std_reward=234.0365 +[2026-04-13 14:41:55] [AutoResearch] === Trial 20 Summary === +[2026-04-13 14:41:55] Total Phase 1 runs: 20 +[2026-04-13 14:41:55] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:41:55] Top 5: +[2026-04-13 14:41:55] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:41:55] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:41:55] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:41:55] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 14:41:55] mean_reward=492.1545 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003046182905194233, 'timesteps': 2293, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} diff --git a/agent/outerloop-results/autoresearch_results_phase1.jsonl b/agent/outerloop-results/autoresearch_results_phase1.jsonl index 17651ff..5d47630 100644 --- a/agent/outerloop-results/autoresearch_results_phase1.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase1.jsonl @@ -8,3 +8,13 @@ {"trial": 8, "timestamp": "2026-04-13T14:02:13.939963", "params": {"n_steer": 5, "n_throttle": 4, "learning_rate": 0.0003046182905194233, "timesteps": 2293, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 492.1545, "std_reward": 20.4057, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0008/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 221.69839644432068, "reward_hacking_suspected": false} {"trial": 9, "timestamp": "2026-04-13T14:04:07.465136", "params": {"n_steer": 4, "n_throttle": 4, "learning_rate": 0.0010269810535699494, "timesteps": 1405, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 47.3482, "std_reward": 0.07, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0009/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 109.50906157493591, "reward_hacking_suspected": false} {"trial": 10, "timestamp": "2026-04-13T14:07:58.201720", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.00037737321665256695, "timesteps": 2717, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 1157.047, "std_reward": 0.7533, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0010/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 226.72052145004272, "reward_hacking_suspected": false} +{"trial": 11, "timestamp": "2026-04-13T14:11:29.139563", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.0006487801810851401, "timesteps": 3497, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 295.0942, "std_reward": 8.7983, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0011/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 205.52370166778564, "reward_hacking_suspected": false} +{"trial": 12, "timestamp": "2026-04-13T14:13:24.563012", "params": {"n_steer": 6, "n_throttle": 3, "learning_rate": 0.00036708488973799465, "timesteps": 1708, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 57.3599, "std_reward": 0.3574, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0012/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 111.40900731086731, "reward_hacking_suspected": false} +{"trial": 13, "timestamp": "2026-04-13T14:17:13.551130", "params": {"n_steer": 4, "n_throttle": 2, "learning_rate": 0.0007540639059045701, "timesteps": 2616, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 33.683, "std_reward": 0.1015, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0013/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 224.97304940223694, "reward_hacking_suspected": false} +{"trial": 14, "timestamp": "2026-04-13T14:20:31.378085", "params": {"n_steer": 4, "n_throttle": 3, "learning_rate": 0.00065796575384948, "timesteps": 2500, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 28.9888, "std_reward": 0.0928, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0014/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 193.81317043304443, "reward_hacking_suspected": false} +{"trial": 15, "timestamp": "2026-04-13T14:23:58.958318", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.00015967596710454723, "timesteps": 2500, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 296.5245, "std_reward": 0.8544, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0015/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 203.5606245994568, "reward_hacking_suspected": false} +{"trial": 16, "timestamp": "2026-04-13T14:27:35.487293", "params": {"n_steer": 6, "n_throttle": 4, "learning_rate": 0.0012899142850653915, "timesteps": 2451, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.5282, "std_reward": 0.0252, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0016/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 212.512845993042, "reward_hacking_suspected": false} +{"trial": 17, "timestamp": "2026-04-13T14:31:18.330091", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.0012604760157625598, "timesteps": 2679, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 25.041, "std_reward": 0.2538, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0017/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 218.82927250862122, "reward_hacking_suspected": false} +{"trial": 18, "timestamp": "2026-04-13T14:35:35.102980", "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 0.0005504110507719487, "timesteps": 2472, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 1389.3806, "std_reward": 4.4479, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0018/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 252.75571608543396, "reward_hacking_suspected": false} +{"trial": 19, "timestamp": "2026-04-13T14:39:40.830648", "params": {"n_steer": 6, "n_throttle": 3, "learning_rate": 0.00038717401417690916, "timesteps": 2914, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 1072.7063, "std_reward": 4.9159, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0019/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 241.70746493339539, "reward_hacking_suspected": false} +{"trial": 20, "timestamp": "2026-04-13T14:41:55.423640", "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 0.0012685117683148405, "timesteps": 1382, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 821.1389, "std_reward": 234.0365, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0020/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 130.5800895690918, "reward_hacking_suspected": false}