From caf91c9fe61b35e18a057a0f7e0c07f2b2a70539 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Mon, 13 Apr 2026 16:00:23 -0400 Subject: [PATCH] autoresearch: phase1 trial 10 results Agent: pi Tests: N/A Tests-Added: 0 TypeScript: N/A --- .../autoresearch_phase1_log.txt | 312 ++++++++++++++++++ .../autoresearch_results_phase1.jsonl | 14 + 2 files changed, 326 insertions(+) diff --git a/agent/outerloop-results/autoresearch_phase1_log.txt b/agent/outerloop-results/autoresearch_phase1_log.txt index b499ae9..e2cfa3a 100644 --- a/agent/outerloop-results/autoresearch_phase1_log.txt +++ b/agent/outerloop-results/autoresearch_phase1_log.txt @@ -824,3 +824,315 @@ [2026-04-13 15:28:05] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} [2026-04-13 15:28:05] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} [2026-04-13 15:28:05] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:07] [AutoResearch] Git push complete after trial 40 +[2026-04-13 15:28:09] +[AutoResearch] ========== Trial 41/50 ========== +[2026-04-13 15:28:09] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:28:09] UCB=3.8633 mu=2.6844 sigma=0.5894 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.003889704330887667, 'timesteps': 2358} +[2026-04-13 15:28:09] UCB=3.4918 mu=1.9865 sigma=0.7526 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.0002237254690090858, 'timesteps': 3281} +[2026-04-13 15:28:09] UCB=3.3272 mu=1.7103 sigma=0.8085 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.003364105777883071, 'timesteps': 2949} +[2026-04-13 15:28:09] UCB=3.2008 mu=1.9271 sigma=0.6368 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0019096612672065147, 'timesteps': 1173} +[2026-04-13 15:28:09] UCB=2.7984 mu=1.4837 sigma=0.6574 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0036338598508786854, 'timesteps': 2044} +[2026-04-13 15:28:09] [AutoResearch] Proposed: {'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.003889704330887667, 'timesteps': 2358, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:11] [AutoResearch] Launching trial 41: {'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.003889704330887667, 'timesteps': 2358, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:13] [AutoResearch] Trial 41 finished in 2.7s, returncode=100 +[2026-04-13 15:28:13] [AutoResearch] Trial 41: mean_reward=None std_reward=None +[2026-04-13 15:28:13] [AutoResearch] === Trial 41 Summary === +[2026-04-13 15:28:13] Total Phase 1 runs: 32 +[2026-04-13 15:28:13] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:13] Top 5: +[2026-04-13 15:28:13] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:13] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:13] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:13] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:13] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:15] +[AutoResearch] ========== Trial 42/50 ========== +[2026-04-13 15:28:15] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:28:15] UCB=3.5665 mu=2.4790 sigma=0.5437 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.0003019383611398774, 'timesteps': 2781} +[2026-04-13 15:28:15] UCB=3.3654 mu=1.9637 sigma=0.7009 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.00014844026245908166, 'timesteps': 3459} +[2026-04-13 15:28:15] UCB=3.3592 mu=2.5184 sigma=0.4204 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0010885983228779504, 'timesteps': 1043} +[2026-04-13 15:28:15] UCB=3.3498 mu=2.6283 sigma=0.3607 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0032895057033497216, 'timesteps': 2546} +[2026-04-13 15:28:15] UCB=3.3203 mu=2.2130 sigma=0.5536 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0015695836762903508, 'timesteps': 1060} +[2026-04-13 15:28:15] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.0003019383611398774, 'timesteps': 2781, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:17] [AutoResearch] Launching trial 42: {'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.0003019383611398774, 'timesteps': 2781, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:20] [AutoResearch] Trial 42 finished in 2.7s, returncode=100 +[2026-04-13 15:28:20] [AutoResearch] Trial 42: mean_reward=None std_reward=None +[2026-04-13 15:28:20] [AutoResearch] === Trial 42 Summary === +[2026-04-13 15:28:20] Total Phase 1 runs: 32 +[2026-04-13 15:28:20] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:20] Top 5: +[2026-04-13 15:28:20] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:20] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:20] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:20] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:20] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:22] +[AutoResearch] ========== Trial 43/50 ========== +[2026-04-13 15:28:22] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:28:22] UCB=3.7562 mu=2.5461 sigma=0.6050 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0011774218521194368, 'timesteps': 1055} +[2026-04-13 15:28:22] UCB=3.7529 mu=2.8818 sigma=0.4355 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0013193140990772738, 'timesteps': 1000} +[2026-04-13 15:28:22] UCB=3.2836 mu=1.9527 sigma=0.6654 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0020286332388212746, 'timesteps': 1134} +[2026-04-13 15:28:22] UCB=3.2030 mu=2.4299 sigma=0.3866 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0034641823188944136, 'timesteps': 2703} +[2026-04-13 15:28:22] UCB=2.9956 mu=1.4257 sigma=0.7850 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.00017238085750060874, 'timesteps': 3444} +[2026-04-13 15:28:22] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0011774218521194368, 'timesteps': 1055, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:24] [AutoResearch] Launching trial 43: {'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0011774218521194368, 'timesteps': 1055, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:27] [AutoResearch] Trial 43 finished in 2.7s, returncode=100 +[2026-04-13 15:28:27] [AutoResearch] Trial 43: mean_reward=None std_reward=None +[2026-04-13 15:28:27] [AutoResearch] === Trial 43 Summary === +[2026-04-13 15:28:27] Total Phase 1 runs: 32 +[2026-04-13 15:28:27] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:27] Top 5: +[2026-04-13 15:28:27] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:27] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:27] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:27] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:27] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:29] +[AutoResearch] ========== Trial 44/50 ========== +[2026-04-13 15:28:29] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:28:29] UCB=3.6688 mu=2.1358 sigma=0.7665 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0035907044857161814, 'timesteps': 2398} +[2026-04-13 15:28:29] UCB=3.6026 mu=2.7375 sigma=0.4325 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0001492703303519168, 'timesteps': 2495} +[2026-04-13 15:28:29] UCB=3.2482 mu=1.7169 sigma=0.7657 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.00019487299853207732, 'timesteps': 3583} +[2026-04-13 15:28:29] UCB=3.1606 mu=1.8266 sigma=0.6670 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.004078225263142901, 'timesteps': 2617} +[2026-04-13 15:28:29] UCB=3.1520 mu=1.5537 sigma=0.7991 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0001880042865711781, 'timesteps': 3713} +[2026-04-13 15:28:29] [AutoResearch] Proposed: {'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0035907044857161814, 'timesteps': 2398, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:31] [AutoResearch] Launching trial 44: {'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0035907044857161814, 'timesteps': 2398, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:34] [AutoResearch] Trial 44 finished in 2.7s, returncode=100 +[2026-04-13 15:28:34] [AutoResearch] Trial 44: mean_reward=None std_reward=None +[2026-04-13 15:28:34] [AutoResearch] === Trial 44 Summary === +[2026-04-13 15:28:34] Total Phase 1 runs: 32 +[2026-04-13 15:28:34] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:34] Top 5: +[2026-04-13 15:28:34] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:34] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:34] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:34] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:34] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:36] +[AutoResearch] ========== Trial 45/50 ========== +[2026-04-13 15:28:36] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:28:36] UCB=4.4174 mu=3.2704 sigma=0.5735 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.00010809987579177334, 'timesteps': 2828} +[2026-04-13 15:28:36] UCB=3.4234 mu=2.3887 sigma=0.5174 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0038836965543492395, 'timesteps': 2908} +[2026-04-13 15:28:36] UCB=3.2779 mu=2.0187 sigma=0.6296 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.003299624894545091, 'timesteps': 2117} +[2026-04-13 15:28:36] UCB=3.0053 mu=1.7752 sigma=0.6151 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.004061584169979702, 'timesteps': 2354} +[2026-04-13 15:28:36] UCB=2.9944 mu=2.3011 sigma=0.3467 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.000638916285305638, 'timesteps': 1840} +[2026-04-13 15:28:36] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.00010809987579177334, 'timesteps': 2828, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:31:20] ============================================================ +[2026-04-13 15:31:20] [AutoResearch] Phase 1 — Real PPO Training + GP+UCB Optimization +[2026-04-13 15:31:20] [AutoResearch] Max trials: 50 | kappa: 2.0 | push every: 10 +[2026-04-13 15:31:20] [AutoResearch] Results: /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase1.jsonl +[2026-04-13 15:31:20] [AutoResearch] Champion: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion +[2026-04-13 15:31:20] ============================================================ +[2026-04-13 15:31:20] [AutoResearch] Loaded 32 existing Phase 1 results. +[2026-04-13 15:31:20] [AutoResearch] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:31:20] +[AutoResearch] ========== Trial 1/50 ========== +[2026-04-13 15:31:20] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:31:20] UCB=3.7366 mu=2.9210 sigma=0.4078 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.00027249752441387215, 'timesteps': 2768} +[2026-04-13 15:31:20] UCB=3.5322 mu=2.1413 sigma=0.6954 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 7.200709133754677e-05, 'timesteps': 3189} +[2026-04-13 15:31:20] UCB=2.9545 mu=1.4144 sigma=0.7700 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.00021644385806279394, 'timesteps': 3757} +[2026-04-13 15:31:20] UCB=2.7812 mu=1.6942 sigma=0.5435 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.0006140384934385441, 'timesteps': 1601} +[2026-04-13 15:31:20] UCB=2.7496 mu=0.8450 sigma=0.9523 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0001968080135620267, 'timesteps': 3906} +[2026-04-13 15:31:20] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.00027249752441387215, 'timesteps': 2768, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:31:22] [AutoResearch] Launching trial 1: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.00027249752441387215, 'timesteps': 2768, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:34:50] [AutoResearch] Trial 1 finished in 208.5s, returncode=0 +[2026-04-13 15:34:50] [AutoResearch] Trial 1: mean_reward=619.9873 std_reward=0.5713 +[2026-04-13 15:34:50] [AutoResearch] === Trial 1 Summary === +[2026-04-13 15:34:50] Total Phase 1 runs: 33 +[2026-04-13 15:34:50] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:34:50] Top 5: +[2026-04-13 15:34:50] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:34:50] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:34:50] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:34:50] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:34:50] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:34:52] +[AutoResearch] ========== Trial 2/50 ========== +[2026-04-13 15:34:52] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:34:52] UCB=3.0943 mu=1.8643 sigma=0.6150 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0010178217900925352, 'timesteps': 1138} +[2026-04-13 15:34:52] UCB=2.8458 mu=1.7127 sigma=0.5666 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.0002392280897745687, 'timesteps': 2148} +[2026-04-13 15:34:52] UCB=2.7366 mu=1.7553 sigma=0.4907 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.0011962893358962853, 'timesteps': 1336} +[2026-04-13 15:34:52] UCB=2.6824 mu=1.7018 sigma=0.4903 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.001160325530975692, 'timesteps': 1426} +[2026-04-13 15:34:52] UCB=2.6355 mu=1.6142 sigma=0.5106 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0031704780737375126, 'timesteps': 2863} +[2026-04-13 15:34:52] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0010178217900925352, 'timesteps': 1138, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:34:54] [AutoResearch] Launching trial 2: {'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0010178217900925352, 'timesteps': 1138, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:36:43] [AutoResearch] Trial 2 finished in 108.8s, returncode=0 +[2026-04-13 15:36:43] [AutoResearch] Trial 2: mean_reward=43.2368 std_reward=0.0435 +[2026-04-13 15:36:43] [AutoResearch] === Trial 2 Summary === +[2026-04-13 15:36:43] Total Phase 1 runs: 34 +[2026-04-13 15:36:43] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:36:43] Top 5: +[2026-04-13 15:36:43] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:36:43] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:36:43] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:36:43] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:36:43] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:36:45] +[AutoResearch] ========== Trial 3/50 ========== +[2026-04-13 15:36:45] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:36:45] UCB=2.5334 mu=1.4412 sigma=0.5461 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.00017414812807443384, 'timesteps': 2214} +[2026-04-13 15:36:45] UCB=2.3326 mu=2.1642 sigma=0.0842 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006368885696011156, 'timesteps': 1836} +[2026-04-13 15:36:45] UCB=2.2757 mu=1.7536 sigma=0.2611 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0004955871472787, 'timesteps': 2773} +[2026-04-13 15:36:45] UCB=2.1297 mu=0.1789 sigma=0.9754 params={'n_steer': 4, 'n_throttle': 5, 'learning_rate': 0.003247259025720135, 'timesteps': 1013} +[2026-04-13 15:36:45] UCB=2.0617 mu=0.7276 sigma=0.6670 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0017195798959150094, 'timesteps': 1010} +[2026-04-13 15:36:45] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.00017414812807443384, 'timesteps': 2214, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:36:47] [AutoResearch] Launching trial 3: {'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.00017414812807443384, 'timesteps': 2214, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:40:05] [AutoResearch] Trial 3 finished in 198.5s, returncode=0 +[2026-04-13 15:40:05] [AutoResearch] Trial 3: mean_reward=333.5673 std_reward=0.1409 +[2026-04-13 15:40:05] [AutoResearch] === Trial 3 Summary === +[2026-04-13 15:40:05] Total Phase 1 runs: 35 +[2026-04-13 15:40:05] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:40:05] Top 5: +[2026-04-13 15:40:05] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:40:05] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:40:05] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:40:05] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:40:05] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:40:07] +[AutoResearch] ========== Trial 4/50 ========== +[2026-04-13 15:40:07] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:40:07] UCB=3.9731 mu=2.5194 sigma=0.7269 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0037042058707837115, 'timesteps': 2319} +[2026-04-13 15:40:07] UCB=3.6554 mu=2.2181 sigma=0.7186 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.003708338874971097, 'timesteps': 2440} +[2026-04-13 15:40:07] UCB=2.5706 mu=1.8032 sigma=0.3837 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.003955234037292529, 'timesteps': 2079} +[2026-04-13 15:40:07] UCB=2.5238 mu=2.1163 sigma=0.2038 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006821546025584073, 'timesteps': 2654} +[2026-04-13 15:40:07] UCB=2.0849 mu=1.1662 sigma=0.4594 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0028891139882576333, 'timesteps': 2774} +[2026-04-13 15:40:07] [AutoResearch] Proposed: {'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0037042058707837115, 'timesteps': 2319, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:40:09] [AutoResearch] Launching trial 4: {'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0037042058707837115, 'timesteps': 2319, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:44:34] [AutoResearch] Trial 4 finished in 264.3s, returncode=0 +[2026-04-13 15:44:34] [AutoResearch] Trial 4: mean_reward=14.9952 std_reward=0.0012 +[2026-04-13 15:44:34] [AutoResearch] === Trial 4 Summary === +[2026-04-13 15:44:34] Total Phase 1 runs: 36 +[2026-04-13 15:44:34] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:44:34] Top 5: +[2026-04-13 15:44:34] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:44:34] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:44:34] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:44:34] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:44:34] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:44:36] +[AutoResearch] ========== Trial 5/50 ========== +[2026-04-13 15:44:36] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:44:36] UCB=2.3559 mu=1.8208 sigma=0.2676 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849} +[2026-04-13 15:44:36] UCB=2.3145 mu=2.0150 sigma=0.1498 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005938673780871419, 'timesteps': 2123} +[2026-04-13 15:44:36] UCB=2.2943 mu=1.8076 sigma=0.2433 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006906315490947656, 'timesteps': 2765} +[2026-04-13 15:44:36] UCB=2.1792 mu=1.7569 sigma=0.2112 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0005595190308580637, 'timesteps': 2578} +[2026-04-13 15:44:36] UCB=2.1234 mu=1.6990 sigma=0.2122 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.00044225780284592813, 'timesteps': 2282} +[2026-04-13 15:44:36] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:44:38] [AutoResearch] Launching trial 5: {'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:48:04] [AutoResearch] Trial 5 finished in 206.4s, returncode=0 +[2026-04-13 15:48:04] [AutoResearch] Trial 5: mean_reward=1032.0966 std_reward=2.1093 +[2026-04-13 15:48:04] [AutoResearch] === Trial 5 Summary === +[2026-04-13 15:48:04] Total Phase 1 runs: 37 +[2026-04-13 15:48:04] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:48:04] Top 5: +[2026-04-13 15:48:04] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:48:04] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:48:04] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:48:04] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:48:04] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:48:06] +[AutoResearch] ========== Trial 6/50 ========== +[2026-04-13 15:48:06] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:48:06] UCB=2.3180 mu=1.9957 sigma=0.1611 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0008165309928824906, 'timesteps': 2250} +[2026-04-13 15:48:06] UCB=2.1244 mu=0.2338 sigma=0.9453 params={'n_steer': 3, 'n_throttle': 5, 'learning_rate': 0.002787182019228819, 'timesteps': 1216} +[2026-04-13 15:48:06] UCB=2.0859 mu=0.5910 sigma=0.7474 params={'n_steer': 3, 'n_throttle': 5, 'learning_rate': 0.0007018364858104177, 'timesteps': 1103} +[2026-04-13 15:48:06] UCB=1.8739 mu=1.2565 sigma=0.3087 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0035236952784743498, 'timesteps': 2547} +[2026-04-13 15:48:06] UCB=1.8564 mu=1.6769 sigma=0.0898 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0008436170399085055, 'timesteps': 2485} +[2026-04-13 15:48:06] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0008165309928824906, 'timesteps': 2250, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:48:08] [AutoResearch] Launching trial 6: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0008165309928824906, 'timesteps': 2250, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:51:25] [AutoResearch] Trial 6 finished in 196.6s, returncode=0 +[2026-04-13 15:51:25] [AutoResearch] Trial 6: mean_reward=572.0601 std_reward=3.1979 +[2026-04-13 15:51:25] [AutoResearch] === Trial 6 Summary === +[2026-04-13 15:51:25] Total Phase 1 runs: 38 +[2026-04-13 15:51:25] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:51:25] Top 5: +[2026-04-13 15:51:25] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:51:25] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:51:25] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:51:25] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:51:25] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:51:27] +[AutoResearch] ========== Trial 7/50 ========== +[2026-04-13 15:51:27] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:51:27] UCB=2.4842 mu=0.9195 sigma=0.7824 params={'n_steer': 3, 'n_throttle': 5, 'learning_rate': 0.0020283343280176574, 'timesteps': 1060} +[2026-04-13 15:51:27] UCB=2.4623 mu=1.5234 sigma=0.4694 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.003246237136438706, 'timesteps': 3130} +[2026-04-13 15:51:27] UCB=2.1984 mu=1.4877 sigma=0.3553 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.003983244774723074, 'timesteps': 2975} +[2026-04-13 15:51:27] UCB=2.1282 mu=0.8256 sigma=0.6513 params={'n_steer': 4, 'n_throttle': 5, 'learning_rate': 0.0013707605977159977, 'timesteps': 1244} +[2026-04-13 15:51:27] UCB=2.0957 mu=1.7888 sigma=0.1534 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0005782749802131942, 'timesteps': 3014} +[2026-04-13 15:51:27] [AutoResearch] Proposed: {'n_steer': 3, 'n_throttle': 5, 'learning_rate': 0.0020283343280176574, 'timesteps': 1060, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:51:29] [AutoResearch] Launching trial 7: {'n_steer': 3, 'n_throttle': 5, 'learning_rate': 0.0020283343280176574, 'timesteps': 1060, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:53:13] [AutoResearch] Trial 7 finished in 104.2s, returncode=0 +[2026-04-13 15:53:13] [AutoResearch] Trial 7: mean_reward=31.9123 std_reward=0.1159 +[2026-04-13 15:53:13] [AutoResearch] === Trial 7 Summary === +[2026-04-13 15:53:13] Total Phase 1 runs: 39 +[2026-04-13 15:53:13] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:53:13] Top 5: +[2026-04-13 15:53:13] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:53:13] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:53:13] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:53:13] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:53:13] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:53:15] +[AutoResearch] ========== Trial 8/50 ========== +[2026-04-13 15:53:15] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:53:15] UCB=2.3645 mu=1.8877 sigma=0.2384 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0038245903103972813, 'timesteps': 2401} +[2026-04-13 15:53:15] UCB=2.2307 mu=1.6587 sigma=0.2860 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0036860171858864176, 'timesteps': 2774} +[2026-04-13 15:53:15] UCB=2.1993 mu=1.1385 sigma=0.5304 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0035133068828179766, 'timesteps': 3180} +[2026-04-13 15:53:15] UCB=2.0278 mu=1.6637 sigma=0.1821 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00045103564754635163, 'timesteps': 3157} +[2026-04-13 15:53:15] UCB=1.9169 mu=1.3349 sigma=0.2910 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0002792226855563916, 'timesteps': 2550} +[2026-04-13 15:53:15] [AutoResearch] Proposed: {'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0038245903103972813, 'timesteps': 2401, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:53:17] [AutoResearch] Launching trial 8: {'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0038245903103972813, 'timesteps': 2401, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:56:47] [AutoResearch] Trial 8 finished in 209.8s, returncode=0 +[2026-04-13 15:56:47] [AutoResearch] Trial 8: mean_reward=14.5596 std_reward=0.0173 +[2026-04-13 15:56:47] [AutoResearch] === Trial 8 Summary === +[2026-04-13 15:56:47] Total Phase 1 runs: 40 +[2026-04-13 15:56:47] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:56:47] Top 5: +[2026-04-13 15:56:47] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:56:47] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:56:47] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:56:47] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:56:47] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:56:49] +[AutoResearch] ========== Trial 9/50 ========== +[2026-04-13 15:56:49] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:56:49] UCB=2.0502 mu=1.7407 sigma=0.1548 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0003951932722914093, 'timesteps': 1993} +[2026-04-13 15:56:49] UCB=2.0166 mu=1.7821 sigma=0.1173 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0007037030106819803, 'timesteps': 1562} +[2026-04-13 15:56:49] UCB=1.9042 mu=0.2061 sigma=0.8490 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.004093188374182109, 'timesteps': 1907} +[2026-04-13 15:56:49] UCB=1.8829 mu=0.8452 sigma=0.5189 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.003916637540083301, 'timesteps': 2173} +[2026-04-13 15:56:49] UCB=1.8557 mu=-0.1309 sigma=0.9933 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0046622999893220305, 'timesteps': 1198} +[2026-04-13 15:56:49] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0003951932722914093, 'timesteps': 1993, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:56:51] [AutoResearch] Launching trial 9: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0003951932722914093, 'timesteps': 1993, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:58:34] [AutoResearch] Trial 9 finished in 103.3s, returncode=0 +[2026-04-13 15:58:34] [AutoResearch] Trial 9: mean_reward=44.5266 std_reward=0.1726 +[2026-04-13 15:58:34] [AutoResearch] === Trial 9 Summary === +[2026-04-13 15:58:34] Total Phase 1 runs: 41 +[2026-04-13 15:58:34] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:58:34] Top 5: +[2026-04-13 15:58:34] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:58:34] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:58:34] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:58:34] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:58:34] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:58:36] +[AutoResearch] ========== Trial 10/50 ========== +[2026-04-13 15:58:36] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:58:36] UCB=3.2274 mu=2.4983 sigma=0.3646 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0017751713134174076, 'timesteps': 1004} +[2026-04-13 15:58:36] UCB=2.2756 mu=1.7072 sigma=0.2842 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0010234264691151955, 'timesteps': 1076} +[2026-04-13 15:58:36] UCB=2.2585 mu=2.1443 sigma=0.0571 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0006964361674886283, 'timesteps': 2693} +[2026-04-13 15:58:36] UCB=2.1748 mu=0.9093 sigma=0.6327 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.002907030660302143, 'timesteps': 1339} +[2026-04-13 15:58:36] UCB=2.1561 mu=1.2532 sigma=0.4515 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0032424956372853735, 'timesteps': 1140} +[2026-04-13 15:58:36] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0017751713134174076, 'timesteps': 1004, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:58:38] [AutoResearch] Launching trial 10: {'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0017751713134174076, 'timesteps': 1004, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:00:23] [AutoResearch] Trial 10 finished in 105.1s, returncode=0 +[2026-04-13 16:00:23] [AutoResearch] Trial 10: mean_reward=26.063 std_reward=0.1375 +[2026-04-13 16:00:23] [AutoResearch] === Trial 10 Summary === +[2026-04-13 16:00:23] Total Phase 1 runs: 42 +[2026-04-13 16:00:23] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:00:23] Top 5: +[2026-04-13 16:00:23] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:00:23] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:00:23] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:00:23] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:00:23] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} diff --git a/agent/outerloop-results/autoresearch_results_phase1.jsonl b/agent/outerloop-results/autoresearch_results_phase1.jsonl index 37d53ff..aca2feb 100644 --- a/agent/outerloop-results/autoresearch_results_phase1.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase1.jsonl @@ -38,3 +38,17 @@ {"trial": 38, "timestamp": "2026-04-13T15:27:52.121624", "params": {"n_steer": 9, "n_throttle": 3, "learning_rate": 0.0035795756473208304, "timesteps": 2168, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 2.658806324005127, "reward_hacking_suspected": false} {"trial": 39, "timestamp": "2026-04-13T15:27:58.884410", "params": {"n_steer": 6, "n_throttle": 4, "learning_rate": 0.0007519115144017143, "timesteps": 1229, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 2.7456912994384766, "reward_hacking_suspected": false} {"trial": 40, "timestamp": "2026-04-13T15:28:05.834236", "params": {"n_steer": 8, "n_throttle": 4, "learning_rate": 0.00011160573690096532, "timesteps": 3094, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 2.933994770050049, "reward_hacking_suspected": false} +{"trial": 41, "timestamp": "2026-04-13T15:28:13.907680", "params": {"n_steer": 9, "n_throttle": 2, "learning_rate": 0.003889704330887667, "timesteps": 2358, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 2.701258897781372, "reward_hacking_suspected": false} +{"trial": 42, "timestamp": "2026-04-13T15:28:20.639504", "params": {"n_steer": 8, "n_throttle": 4, "learning_rate": 0.0003019383611398774, "timesteps": 2781, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 2.715066432952881, "reward_hacking_suspected": false} +{"trial": 43, "timestamp": "2026-04-13T15:28:27.347975", "params": {"n_steer": 7, "n_throttle": 4, "learning_rate": 0.0011774218521194368, "timesteps": 1055, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 2.691986322402954, "reward_hacking_suspected": false} +{"trial": 44, "timestamp": "2026-04-13T15:28:34.070660", "params": {"n_steer": 9, "n_throttle": 2, "learning_rate": 0.0035907044857161814, "timesteps": 2398, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 2.704636335372925, "reward_hacking_suspected": false} +{"trial": 1, "timestamp": "2026-04-13T15:34:50.594520", "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 0.00027249752441387215, "timesteps": 2768, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 619.9873, "std_reward": 0.5713, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0001/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 208.49696159362793, "reward_hacking_suspected": false} +{"trial": 2, "timestamp": "2026-04-13T15:36:43.419690", "params": {"n_steer": 6, "n_throttle": 4, "learning_rate": 0.0010178217900925352, "timesteps": 1138, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 43.2368, "std_reward": 0.0435, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0002/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 108.80680227279663, "reward_hacking_suspected": false} +{"trial": 3, "timestamp": "2026-04-13T15:40:05.932802", "params": {"n_steer": 8, "n_throttle": 4, "learning_rate": 0.00017414812807443384, "timesteps": 2214, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 333.5673, "std_reward": 0.1409, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0003/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 198.4959580898285, "reward_hacking_suspected": false} +{"trial": 4, "timestamp": "2026-04-13T15:44:34.239964", "params": {"n_steer": 9, "n_throttle": 2, "learning_rate": 0.0037042058707837115, "timesteps": 2319, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 14.9952, "std_reward": 0.0012, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0004/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 264.2855544090271, "reward_hacking_suspected": false} +{"trial": 5, "timestamp": "2026-04-13T15:48:04.624121", "params": {"n_steer": 5, "n_throttle": 4, "learning_rate": 0.0006774569893590574, "timesteps": 2849, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 1032.0966, "std_reward": 2.1093, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0005/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 206.3696632385254, "reward_hacking_suspected": false} +{"trial": 6, "timestamp": "2026-04-13T15:51:25.235546", "params": {"n_steer": 6, "n_throttle": 3, "learning_rate": 0.0008165309928824906, "timesteps": 2250, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 572.0601, "std_reward": 3.1979, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0006/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 196.59648370742798, "reward_hacking_suspected": false} +{"trial": 7, "timestamp": "2026-04-13T15:53:13.479125", "params": {"n_steer": 3, "n_throttle": 5, "learning_rate": 0.0020283343280176574, "timesteps": 1060, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 31.9123, "std_reward": 0.1159, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0007/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 104.22811317443848, "reward_hacking_suspected": false} +{"trial": 8, "timestamp": "2026-04-13T15:56:47.314108", "params": {"n_steer": 9, "n_throttle": 3, "learning_rate": 0.0038245903103972813, "timesteps": 2401, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 14.5596, "std_reward": 0.0173, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0008/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 209.817458152771, "reward_hacking_suspected": false} +{"trial": 9, "timestamp": "2026-04-13T15:58:34.660642", "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 0.0003951932722914093, "timesteps": 1993, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 44.5266, "std_reward": 0.1726, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0009/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 103.33265256881714, "reward_hacking_suspected": false} +{"trial": 10, "timestamp": "2026-04-13T16:00:23.744147", "params": {"n_steer": 8, "n_throttle": 3, "learning_rate": 0.0017751713134174076, "timesteps": 1004, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 26.063, "std_reward": 0.1375, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0010/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 105.06883907318115, "reward_hacking_suspected": false}