From 3cbe4bd26ec15ced2bc42ca92db433ec85360237 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Mon, 13 Apr 2026 19:17:56 -0400 Subject: [PATCH] autoresearch: phase1 trial 50 results Agent: pi Tests: N/A Tests-Added: 0 TypeScript: N/A --- .../autoresearch_phase1_log.txt | 211 ++++++++++++++++++ .../autoresearch_results_phase1.jsonl | 10 + 2 files changed, 221 insertions(+) diff --git a/agent/outerloop-results/autoresearch_phase1_log.txt b/agent/outerloop-results/autoresearch_phase1_log.txt index c5d8d15..7ab84e0 100644 --- a/agent/outerloop-results/autoresearch_phase1_log.txt +++ b/agent/outerloop-results/autoresearch_phase1_log.txt @@ -1769,3 +1769,214 @@ [2026-04-13 18:15:31] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} [2026-04-13 18:15:31] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} [2026-04-13 18:15:31] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:15:32] [AutoResearch] Git push complete after trial 40 +[2026-04-13 18:15:34] +[AutoResearch] ========== Trial 41/50 ========== +[2026-04-13 18:15:34] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 18:15:34] UCB=7.3140 mu=5.9529 sigma=0.6805 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0032819869113354994, 'timesteps': 4854} +[2026-04-13 18:15:34] UCB=6.7856 mu=5.4612 sigma=0.6622 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0029238891580978905, 'timesteps': 4975} +[2026-04-13 18:15:34] UCB=6.7722 mu=4.9655 sigma=0.9033 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.004491394112651072, 'timesteps': 4986} +[2026-04-13 18:15:34] UCB=6.4527 mu=4.8075 sigma=0.8226 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.003979484400133024, 'timesteps': 4359} +[2026-04-13 18:15:34] UCB=5.5984 mu=4.0364 sigma=0.7810 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0032037950825406776, 'timesteps': 4728} +[2026-04-13 18:15:34] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0032819869113354994, 'timesteps': 4854, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:15:36] [AutoResearch] Launching trial 41: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0032819869113354994, 'timesteps': 4854, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:23:10] [AutoResearch] Trial 41 finished in 454.0s, returncode=0 +[2026-04-13 18:23:10] [AutoResearch] Trial 41: mean_reward=15.6571 std_reward=0.0079 +[2026-04-13 18:23:10] [AutoResearch] === Trial 41 Summary === +[2026-04-13 18:23:10] Total Phase 1 runs: 73 +[2026-04-13 18:23:10] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:23:10] Top 5: +[2026-04-13 18:23:10] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:23:10] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:23:10] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:23:10] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:23:10] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:23:12] +[AutoResearch] ========== Trial 42/50 ========== +[2026-04-13 18:23:12] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 18:23:12] UCB=7.4697 mu=5.6946 sigma=0.8875 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.004494661479461838, 'timesteps': 4811} +[2026-04-13 18:23:12] UCB=7.3759 mu=5.5905 sigma=0.8927 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.004611300185903181, 'timesteps': 4938} +[2026-04-13 18:23:12] UCB=5.4529 mu=3.6798 sigma=0.8865 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.004782538904571954, 'timesteps': 4160} +[2026-04-13 18:23:12] UCB=5.3153 mu=4.0688 sigma=0.6233 params={'n_steer': 5, 'n_throttle': 2, 'learning_rate': 0.004053286435875833, 'timesteps': 4622} +[2026-04-13 18:23:12] UCB=3.7995 mu=2.3517 sigma=0.7239 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0036533517137863577, 'timesteps': 3930} +[2026-04-13 18:23:12] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.004494661479461838, 'timesteps': 4811, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:23:14] [AutoResearch] Launching trial 42: {'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.004494661479461838, 'timesteps': 4811, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:30:58] [AutoResearch] Trial 42 finished in 463.9s, returncode=0 +[2026-04-13 18:30:58] [AutoResearch] Trial 42: mean_reward=980.0742 std_reward=27.1137 +[2026-04-13 18:30:58] [AutoResearch] === Trial 42 Summary === +[2026-04-13 18:30:58] Total Phase 1 runs: 74 +[2026-04-13 18:30:58] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:30:58] Top 5: +[2026-04-13 18:30:58] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:30:58] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:30:58] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:30:58] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:30:58] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:31:00] +[AutoResearch] ========== Trial 43/50 ========== +[2026-04-13 18:31:00] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 18:31:00] UCB=6.9048 mu=5.7145 sigma=0.5951 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.004086162667261962, 'timesteps': 4914} +[2026-04-13 18:31:00] UCB=4.8000 mu=3.4867 sigma=0.6566 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.004220414914967129, 'timesteps': 4781} +[2026-04-13 18:31:00] UCB=4.7529 mu=3.9881 sigma=0.3824 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0033340801031464364, 'timesteps': 4807} +[2026-04-13 18:31:00] UCB=3.8268 mu=2.8515 sigma=0.4877 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0031689897677083267, 'timesteps': 4975} +[2026-04-13 18:31:00] UCB=3.8163 mu=2.2537 sigma=0.7813 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.004053620338510361, 'timesteps': 3939} +[2026-04-13 18:31:00] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.004086162667261962, 'timesteps': 4914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:31:02] [AutoResearch] Launching trial 43: {'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.004086162667261962, 'timesteps': 4914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:38:03] [AutoResearch] Trial 43 finished in 420.3s, returncode=0 +[2026-04-13 18:38:03] [AutoResearch] Trial 43: mean_reward=15.822 std_reward=0.0155 +[2026-04-13 18:38:03] [AutoResearch] === Trial 43 Summary === +[2026-04-13 18:38:03] Total Phase 1 runs: 75 +[2026-04-13 18:38:03] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:38:03] Top 5: +[2026-04-13 18:38:03] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:38:03] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:38:03] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:38:03] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:38:03] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:38:05] +[AutoResearch] ========== Trial 44/50 ========== +[2026-04-13 18:38:05] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 18:38:05] UCB=3.5615 mu=2.1320 sigma=0.7148 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0029018837242704995, 'timesteps': 4982} +[2026-04-13 18:38:05] UCB=2.8781 mu=2.1898 sigma=0.3442 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0041151904231291935, 'timesteps': 4953} +[2026-04-13 18:38:05] UCB=2.6776 mu=1.6178 sigma=0.5299 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0027281830436155017, 'timesteps': 4721} +[2026-04-13 18:38:05] UCB=2.3508 mu=1.1391 sigma=0.6058 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.003576630797117473, 'timesteps': 3955} +[2026-04-13 18:38:05] UCB=2.3297 mu=1.9936 sigma=0.1680 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00105351764257976, 'timesteps': 4976} +[2026-04-13 18:38:05] [AutoResearch] Proposed: {'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0029018837242704995, 'timesteps': 4982, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:38:07] [AutoResearch] Launching trial 44: {'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0029018837242704995, 'timesteps': 4982, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:45:15] [AutoResearch] Trial 44 finished in 428.2s, returncode=0 +[2026-04-13 18:45:15] [AutoResearch] Trial 44: mean_reward=14.9716 std_reward=0.0169 +[2026-04-13 18:45:15] [AutoResearch] === Trial 44 Summary === +[2026-04-13 18:45:15] Total Phase 1 runs: 76 +[2026-04-13 18:45:15] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:45:15] Top 5: +[2026-04-13 18:45:15] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:45:15] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:45:15] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:45:15] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:45:15] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:45:17] +[AutoResearch] ========== Trial 45/50 ========== +[2026-04-13 18:45:17] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 18:45:17] UCB=3.7477 mu=3.0769 sigma=0.3354 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0007707060431765714, 'timesteps': 4976} +[2026-04-13 18:45:17] UCB=2.5970 mu=1.6330 sigma=0.4820 params={'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.00020050085345383605, 'timesteps': 4929} +[2026-04-13 18:45:17] UCB=2.5924 mu=1.4768 sigma=0.5578 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.0004133771186667665, 'timesteps': 4856} +[2026-04-13 18:45:17] UCB=2.2774 mu=0.9909 sigma=0.6433 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.000258239957476511, 'timesteps': 4860} +[2026-04-13 18:45:17] UCB=2.1102 mu=1.4400 sigma=0.3351 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0032581964017107715, 'timesteps': 4659} +[2026-04-13 18:45:17] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0007707060431765714, 'timesteps': 4976, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:45:19] [AutoResearch] Launching trial 45: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0007707060431765714, 'timesteps': 4976, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:51:12] [AutoResearch] Trial 45 finished in 353.6s, returncode=0 +[2026-04-13 18:51:12] [AutoResearch] Trial 45: mean_reward=4314.8893 std_reward=709.8281 +[2026-04-13 18:51:12] [AutoResearch] === Trial 45 Summary === +[2026-04-13 18:51:12] Total Phase 1 runs: 77 +[2026-04-13 18:51:12] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:51:12] Top 5: +[2026-04-13 18:51:12] mean_reward=4314.8893 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0007707060431765714, 'timesteps': 4976, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:51:12] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:51:12] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:51:12] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:51:12] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:51:14] +[AutoResearch] ========== Trial 46/50 ========== +[2026-04-13 18:51:14] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 18:51:14] UCB=6.4999 mu=5.0495 sigma=0.7252 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0023937750629647986, 'timesteps': 4836} +[2026-04-13 18:51:14] UCB=5.7084 mu=4.9106 sigma=0.3989 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0008748090132533172, 'timesteps': 4866} +[2026-04-13 18:51:14] UCB=5.5209 mu=3.8795 sigma=0.8207 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.001388455889997006, 'timesteps': 4861} +[2026-04-13 18:51:14] UCB=5.3207 mu=3.4850 sigma=0.9178 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.002305837623532898, 'timesteps': 4993} +[2026-04-13 18:51:14] UCB=4.4431 mu=2.7688 sigma=0.8372 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.0023504082814860415, 'timesteps': 4636} +[2026-04-13 18:51:14] [AutoResearch] Proposed: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0023937750629647986, 'timesteps': 4836, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:51:16] [AutoResearch] Launching trial 46: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0023937750629647986, 'timesteps': 4836, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:57:23] [AutoResearch] Trial 46 finished in 366.5s, returncode=0 +[2026-04-13 18:57:23] [AutoResearch] Trial 46: mean_reward=14.9853 std_reward=0.0252 +[2026-04-13 18:57:23] [AutoResearch] === Trial 46 Summary === +[2026-04-13 18:57:23] Total Phase 1 runs: 78 +[2026-04-13 18:57:23] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:57:23] Top 5: +[2026-04-13 18:57:23] mean_reward=4314.8893 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0007707060431765714, 'timesteps': 4976, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:57:23] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:57:23] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:57:23] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:57:23] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:57:25] +[AutoResearch] ========== Trial 47/50 ========== +[2026-04-13 18:57:25] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 18:57:25] UCB=4.1357 mu=3.0492 sigma=0.5433 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0004222996001111442, 'timesteps': 4942} +[2026-04-13 18:57:25] UCB=2.2577 mu=1.0331 sigma=0.6123 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.001404145260025, 'timesteps': 4843} +[2026-04-13 18:57:25] UCB=1.8012 mu=0.3762 sigma=0.7125 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0019228183558907053, 'timesteps': 4832} +[2026-04-13 18:57:25] UCB=1.7332 mu=1.2215 sigma=0.2559 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0040978681388418505, 'timesteps': 4846} +[2026-04-13 18:57:25] UCB=1.7225 mu=-0.1961 sigma=0.9593 params={'n_steer': 9, 'n_throttle': 5, 'learning_rate': 0.0034754845633088315, 'timesteps': 4434} +[2026-04-13 18:57:25] [AutoResearch] Proposed: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0004222996001111442, 'timesteps': 4942, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 18:57:27] [AutoResearch] Launching trial 47: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0004222996001111442, 'timesteps': 4942, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:03:22] [AutoResearch] Trial 47 finished in 355.3s, returncode=0 +[2026-04-13 19:03:22] [AutoResearch] Trial 47: mean_reward=4462.293 std_reward=2.1401 +[2026-04-13 19:03:22] [AutoResearch] === Trial 47 Summary === +[2026-04-13 19:03:22] Total Phase 1 runs: 79 +[2026-04-13 19:03:22] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:03:22] Top 5: +[2026-04-13 19:03:22] mean_reward=4462.2930 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0004222996001111442, 'timesteps': 4942, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:03:22] mean_reward=4314.8893 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0007707060431765714, 'timesteps': 4976, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:03:22] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:03:22] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:03:22] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:03:24] +[AutoResearch] ========== Trial 48/50 ========== +[2026-04-13 19:03:24] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 19:03:24] UCB=4.1327 mu=2.7945 sigma=0.6691 params={'n_steer': 3, 'n_throttle': 2, 'learning_rate': 0.0011350422703903862, 'timesteps': 4898} +[2026-04-13 19:03:24] UCB=3.1163 mu=2.2489 sigma=0.4337 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.00121598827031687, 'timesteps': 4696} +[2026-04-13 19:03:24] UCB=2.5824 mu=1.2026 sigma=0.6899 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0016751005980373689, 'timesteps': 4825} +[2026-04-13 19:03:24] UCB=2.2379 mu=1.9070 sigma=0.1655 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.001006125943831317, 'timesteps': 4941} +[2026-04-13 19:03:24] UCB=2.1810 mu=1.1490 sigma=0.5160 params={'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.0005172426207023312, 'timesteps': 4618} +[2026-04-13 19:03:24] [AutoResearch] Proposed: {'n_steer': 3, 'n_throttle': 2, 'learning_rate': 0.0011350422703903862, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:03:26] [AutoResearch] Launching trial 48: {'n_steer': 3, 'n_throttle': 2, 'learning_rate': 0.0011350422703903862, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:08:23] [AutoResearch] Trial 48 finished in 296.5s, returncode=0 +[2026-04-13 19:08:23] [AutoResearch] Trial 48: mean_reward=16.8539 std_reward=0.0201 +[2026-04-13 19:08:23] [AutoResearch] === Trial 48 Summary === +[2026-04-13 19:08:23] Total Phase 1 runs: 80 +[2026-04-13 19:08:23] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:08:23] Top 5: +[2026-04-13 19:08:23] mean_reward=4462.2930 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0004222996001111442, 'timesteps': 4942, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:08:23] mean_reward=4314.8893 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0007707060431765714, 'timesteps': 4976, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:08:23] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:08:23] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:08:23] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:08:25] +[AutoResearch] ========== Trial 49/50 ========== +[2026-04-13 19:08:25] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 19:08:25] UCB=6.7812 mu=6.4357 sigma=0.1727 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0010146909128518657, 'timesteps': 4979} +[2026-04-13 19:08:25] UCB=3.9810 mu=3.6607 sigma=0.1602 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0019488453328446698, 'timesteps': 4998} +[2026-04-13 19:08:25] UCB=2.8979 mu=1.9399 sigma=0.4790 params={'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.0001502626554457094, 'timesteps': 4829} +[2026-04-13 19:08:25] UCB=2.4072 mu=0.8987 sigma=0.7542 params={'n_steer': 4, 'n_throttle': 5, 'learning_rate': 0.0015006496751962756, 'timesteps': 4966} +[2026-04-13 19:08:25] UCB=2.3469 mu=0.6192 sigma=0.8639 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.001942190017735736, 'timesteps': 4781} +[2026-04-13 19:08:25] [AutoResearch] Proposed: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0010146909128518657, 'timesteps': 4979, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:08:27] [AutoResearch] Launching trial 49: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0010146909128518657, 'timesteps': 4979, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:13:25] [AutoResearch] Trial 49 finished in 298.3s, returncode=0 +[2026-04-13 19:13:25] [AutoResearch] Trial 49: mean_reward=3332.0024 std_reward=5.8587 +[2026-04-13 19:13:25] [AutoResearch] === Trial 49 Summary === +[2026-04-13 19:13:25] Total Phase 1 runs: 81 +[2026-04-13 19:13:25] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:13:25] Top 5: +[2026-04-13 19:13:25] mean_reward=4462.2930 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0004222996001111442, 'timesteps': 4942, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:13:25] mean_reward=4314.8893 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0007707060431765714, 'timesteps': 4976, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:13:25] mean_reward=3332.0024 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0010146909128518657, 'timesteps': 4979, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:13:25] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:13:25] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:13:27] +[AutoResearch] ========== Trial 50/50 ========== +[2026-04-13 19:13:27] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 19:13:27] UCB=4.4481 mu=4.0168 sigma=0.2157 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0007722480975921185, 'timesteps': 4972} +[2026-04-13 19:13:27] UCB=3.7707 mu=3.0773 sigma=0.3467 params={'n_steer': 4, 'n_throttle': 2, 'learning_rate': 0.00025395141290818565, 'timesteps': 4833} +[2026-04-13 19:13:27] UCB=3.5632 mu=2.6471 sigma=0.4581 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.003481880426416106, 'timesteps': 4938} +[2026-04-13 19:13:27] UCB=3.4746 mu=1.8819 sigma=0.7964 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.004090738433673601, 'timesteps': 4988} +[2026-04-13 19:13:27] UCB=2.9865 mu=1.3160 sigma=0.8353 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0030171536490468946, 'timesteps': 4972} +[2026-04-13 19:13:27] [AutoResearch] Proposed: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0007722480975921185, 'timesteps': 4972, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:13:29] [AutoResearch] Launching trial 50: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0007722480975921185, 'timesteps': 4972, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:17:56] [AutoResearch] Trial 50 finished in 267.0s, returncode=0 +[2026-04-13 19:17:56] [AutoResearch] Trial 50: mean_reward=20.8886 std_reward=0.0537 +[2026-04-13 19:17:56] [AutoResearch] === Trial 50 Summary === +[2026-04-13 19:17:56] Total Phase 1 runs: 82 +[2026-04-13 19:17:56] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:17:56] Top 5: +[2026-04-13 19:17:56] mean_reward=4462.2930 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0004222996001111442, 'timesteps': 4942, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:17:56] mean_reward=4314.8893 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0007707060431765714, 'timesteps': 4976, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:17:56] mean_reward=3332.0024 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0010146909128518657, 'timesteps': 4979, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:17:56] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 19:17:56] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} diff --git a/agent/outerloop-results/autoresearch_results_phase1.jsonl b/agent/outerloop-results/autoresearch_results_phase1.jsonl index d609d2b..11ddc3f 100644 --- a/agent/outerloop-results/autoresearch_results_phase1.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase1.jsonl @@ -82,3 +82,13 @@ {"trial": 38, "timestamp": "2026-04-13T18:05:51.196029", "params": {"n_steer": 7, "n_throttle": 2, "learning_rate": 0.0007984038886242428, "timesteps": 4697, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 634.4026, "std_reward": 27.7421, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0038/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 314.09127950668335, "reward_hacking_suspected": false} {"trial": 39, "timestamp": "2026-04-13T18:10:40.863332", "params": {"n_steer": 6, "n_throttle": 2, "learning_rate": 0.0011274566858403105, "timesteps": 4920, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 59.3316, "std_reward": 0.52, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0039/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 285.64494466781616, "reward_hacking_suspected": false} {"trial": 40, "timestamp": "2026-04-13T18:15:31.100267", "params": {"n_steer": 4, "n_throttle": 2, "learning_rate": 0.003545717728698868, "timesteps": 4844, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.0795, "std_reward": 0.0038, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0040/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 286.22130513191223, "reward_hacking_suspected": false} +{"trial": 41, "timestamp": "2026-04-13T18:23:10.910633", "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 0.0032819869113354994, "timesteps": 4854, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.6571, "std_reward": 0.0079, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0041/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 454.0444815158844, "reward_hacking_suspected": false} +{"trial": 42, "timestamp": "2026-04-13T18:30:58.806333", "params": {"n_steer": 8, "n_throttle": 2, "learning_rate": 0.004494661479461838, "timesteps": 4811, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 980.0742, "std_reward": 27.1137, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0042/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 463.87768816947937, "reward_hacking_suspected": false} +{"trial": 43, "timestamp": "2026-04-13T18:38:03.089927", "params": {"n_steer": 6, "n_throttle": 2, "learning_rate": 0.004086162667261962, "timesteps": 4914, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.822, "std_reward": 0.0155, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0043/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 420.2614235877991, "reward_hacking_suspected": false} +{"trial": 44, "timestamp": "2026-04-13T18:45:15.325823", "params": {"n_steer": 9, "n_throttle": 2, "learning_rate": 0.0029018837242704995, "timesteps": 4982, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 14.9716, "std_reward": 0.0169, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0044/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 428.2138817310333, "reward_hacking_suspected": false} +{"trial": 45, "timestamp": "2026-04-13T18:51:12.947849", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.0007707060431765714, "timesteps": 4976, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 4314.8893, "std_reward": 709.8281, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0045/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 353.60058331489563, "reward_hacking_suspected": false} +{"trial": 46, "timestamp": "2026-04-13T18:57:23.418007", "params": {"n_steer": 4, "n_throttle": 3, "learning_rate": 0.0023937750629647986, "timesteps": 4836, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 14.9853, "std_reward": 0.0252, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0046/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 366.4553461074829, "reward_hacking_suspected": false} +{"trial": 47, "timestamp": "2026-04-13T19:03:22.777776", "params": {"n_steer": 4, "n_throttle": 3, "learning_rate": 0.0004222996001111442, "timesteps": 4942, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 4462.293, "std_reward": 2.1401, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0047/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 355.3398275375366, "reward_hacking_suspected": false} +{"trial": 48, "timestamp": "2026-04-13T19:08:23.312891", "params": {"n_steer": 3, "n_throttle": 2, "learning_rate": 0.0011350422703903862, "timesteps": 4898, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 16.8539, "std_reward": 0.0201, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0048/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 296.5148799419403, "reward_hacking_suspected": false} +{"trial": 49, "timestamp": "2026-04-13T19:13:25.631631", "params": {"n_steer": 4, "n_throttle": 3, "learning_rate": 0.0010146909128518657, "timesteps": 4979, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 3332.0024, "std_reward": 5.8587, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0049/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 298.2845461368561, "reward_hacking_suspected": false} +{"trial": 50, "timestamp": "2026-04-13T19:17:56.697364", "params": {"n_steer": 4, "n_throttle": 3, "learning_rate": 0.0007722480975921185, "timesteps": 4972, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 20.8886, "std_reward": 0.0537, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0050/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 267.0465648174286, "reward_hacking_suspected": false}