From 29a45e017bc775df0bf3307e12442b704986cace Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Mon, 13 Apr 2026 16:38:17 -0400 Subject: [PATCH] autoresearch: phase1 trial 20 results Agent: pi Tests: N/A Tests-Added: 0 TypeScript: N/A --- .../autoresearch_phase1_log.txt | 211 ++++++++++++++++++ .../autoresearch_results_phase1.jsonl | 10 + 2 files changed, 221 insertions(+) diff --git a/agent/outerloop-results/autoresearch_phase1_log.txt b/agent/outerloop-results/autoresearch_phase1_log.txt index e2cfa3a..38c7141 100644 --- a/agent/outerloop-results/autoresearch_phase1_log.txt +++ b/agent/outerloop-results/autoresearch_phase1_log.txt @@ -1136,3 +1136,214 @@ [2026-04-13 16:00:23] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} [2026-04-13 16:00:23] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} [2026-04-13 16:00:23] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:00:25] [AutoResearch] Git push complete after trial 10 +[2026-04-13 16:00:27] +[AutoResearch] ========== Trial 11/50 ========== +[2026-04-13 16:00:27] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:00:27] UCB=3.0451 mu=1.7238 sigma=0.6607 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.00261843600079989, 'timesteps': 1081} +[2026-04-13 16:00:27] UCB=2.6034 mu=2.2509 sigma=0.1762 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0005990972802344085, 'timesteps': 2774} +[2026-04-13 16:00:27] UCB=2.3252 mu=2.2068 sigma=0.0592 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.000721376156088404, 'timesteps': 2907} +[2026-04-13 16:00:27] UCB=2.2116 mu=1.4670 sigma=0.3723 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0011709545997933251, 'timesteps': 1321} +[2026-04-13 16:00:27] UCB=2.1998 mu=1.1176 sigma=0.5411 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0026805657038851353, 'timesteps': 1209} +[2026-04-13 16:00:27] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.00261843600079989, 'timesteps': 1081, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:00:29] [AutoResearch] Launching trial 11: {'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.00261843600079989, 'timesteps': 1081, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:02:12] [AutoResearch] Trial 11 finished in 102.6s, returncode=0 +[2026-04-13 16:02:12] [AutoResearch] Trial 11: mean_reward=15.0063 std_reward=0.0168 +[2026-04-13 16:02:12] [AutoResearch] === Trial 11 Summary === +[2026-04-13 16:02:12] Total Phase 1 runs: 43 +[2026-04-13 16:02:12] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:02:12] Top 5: +[2026-04-13 16:02:12] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:02:12] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:02:12] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:02:12] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:02:12] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:02:14] +[AutoResearch] ========== Trial 12/50 ========== +[2026-04-13 16:02:14] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:02:14] UCB=2.0321 mu=1.9000 sigma=0.0661 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.000494038295411516, 'timesteps': 2549} +[2026-04-13 16:02:14] UCB=1.9393 mu=1.5188 sigma=0.2103 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0014740735858409435, 'timesteps': 1216} +[2026-04-13 16:02:14] UCB=1.8736 mu=1.4721 sigma=0.2007 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00043141485613891153, 'timesteps': 2832} +[2026-04-13 16:02:14] UCB=1.8682 mu=1.0345 sigma=0.4169 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0006118476939445131, 'timesteps': 3068} +[2026-04-13 16:02:14] UCB=1.8532 mu=-0.1166 sigma=0.9849 params={'n_steer': 9, 'n_throttle': 5, 'learning_rate': 0.0023880940961788946, 'timesteps': 4789} +[2026-04-13 16:02:14] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.000494038295411516, 'timesteps': 2549, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:02:16] [AutoResearch] Launching trial 12: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.000494038295411516, 'timesteps': 2549, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:05:50] [AutoResearch] Trial 12 finished in 214.4s, returncode=0 +[2026-04-13 16:05:50] [AutoResearch] Trial 12: mean_reward=743.5528 std_reward=3.1861 +[2026-04-13 16:05:50] [AutoResearch] === Trial 12 Summary === +[2026-04-13 16:05:50] Total Phase 1 runs: 44 +[2026-04-13 16:05:50] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:05:50] Top 5: +[2026-04-13 16:05:50] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:05:50] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:05:50] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:05:50] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:05:50] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:05:52] +[AutoResearch] ========== Trial 13/50 ========== +[2026-04-13 16:05:52] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:05:52] UCB=2.2973 mu=2.1304 sigma=0.0834 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006080707920424922, 'timesteps': 3082} +[2026-04-13 16:05:52] UCB=1.8440 mu=-0.1447 sigma=0.9943 params={'n_steer': 4, 'n_throttle': 5, 'learning_rate': 0.004963788338941324, 'timesteps': 1577} +[2026-04-13 16:05:52] UCB=1.8349 mu=-0.1535 sigma=0.9942 params={'n_steer': 4, 'n_throttle': 5, 'learning_rate': 0.004895602208151989, 'timesteps': 2098} +[2026-04-13 16:05:52] UCB=1.8171 mu=-0.1611 sigma=0.9891 params={'n_steer': 7, 'n_throttle': 5, 'learning_rate': 0.004911350917760141, 'timesteps': 1288} +[2026-04-13 16:05:52] UCB=1.8039 mu=-0.1505 sigma=0.9772 params={'n_steer': 4, 'n_throttle': 5, 'learning_rate': 0.0048778108965635185, 'timesteps': 3392} +[2026-04-13 16:05:52] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006080707920424922, 'timesteps': 3082, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:05:54] [AutoResearch] Launching trial 13: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0006080707920424922, 'timesteps': 3082, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:09:20] [AutoResearch] Trial 13 finished in 205.8s, returncode=0 +[2026-04-13 16:09:20] [AutoResearch] Trial 13: mean_reward=471.0407 std_reward=7.8649 +[2026-04-13 16:09:20] [AutoResearch] === Trial 13 Summary === +[2026-04-13 16:09:20] Total Phase 1 runs: 45 +[2026-04-13 16:09:20] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:09:20] Top 5: +[2026-04-13 16:09:20] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:09:20] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:09:20] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:09:20] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:09:20] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:09:22] +[AutoResearch] ========== Trial 14/50 ========== +[2026-04-13 16:09:22] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:09:22] UCB=3.6895 mu=2.3211 sigma=0.6842 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0016216488882950194, 'timesteps': 1028} +[2026-04-13 16:09:22] UCB=3.5839 mu=2.1682 sigma=0.7079 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.002622019549469901, 'timesteps': 1018} +[2026-04-13 16:09:22] UCB=3.1895 mu=2.4656 sigma=0.3619 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0021469951486806586, 'timesteps': 1073} +[2026-04-13 16:09:22] UCB=2.9809 mu=1.1456 sigma=0.9176 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00354615869547646, 'timesteps': 1296} +[2026-04-13 16:09:22] UCB=2.9648 mu=2.3325 sigma=0.3161 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003130709905464761, 'timesteps': 3373} +[2026-04-13 16:09:22] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0016216488882950194, 'timesteps': 1028, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:09:24] [AutoResearch] Launching trial 14: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0016216488882950194, 'timesteps': 1028, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:11:08] [AutoResearch] Trial 14 finished in 104.0s, returncode=0 +[2026-04-13 16:11:08] [AutoResearch] Trial 14: mean_reward=15.1063 std_reward=0.0076 +[2026-04-13 16:11:08] [AutoResearch] === Trial 14 Summary === +[2026-04-13 16:11:08] Total Phase 1 runs: 46 +[2026-04-13 16:11:08] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:11:08] Top 5: +[2026-04-13 16:11:08] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:11:08] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:11:08] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:11:08] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:11:08] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:11:10] +[AutoResearch] ========== Trial 15/50 ========== +[2026-04-13 16:11:10] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:11:10] UCB=2.3880 mu=1.3579 sigma=0.5150 params={'n_steer': 5, 'n_throttle': 5, 'learning_rate': 0.00031142012560796015, 'timesteps': 3450} +[2026-04-13 16:11:10] UCB=2.3861 mu=2.1519 sigma=0.1171 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.00043425304956394085, 'timesteps': 2955} +[2026-04-13 16:11:10] UCB=2.2442 mu=0.6357 sigma=0.8042 params={'n_steer': 4, 'n_throttle': 4, 'learning_rate': 9.434990518937583e-05, 'timesteps': 4980} +[2026-04-13 16:11:10] UCB=2.1419 mu=1.1831 sigma=0.4794 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.0006021392192267739, 'timesteps': 3258} +[2026-04-13 16:11:10] UCB=2.1294 mu=0.1453 sigma=0.9921 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.004817619902542952, 'timesteps': 2247} +[2026-04-13 16:11:10] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 5, 'learning_rate': 0.00031142012560796015, 'timesteps': 3450, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:11:12] [AutoResearch] Launching trial 15: {'n_steer': 5, 'n_throttle': 5, 'learning_rate': 0.00031142012560796015, 'timesteps': 3450, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:14:16] [AutoResearch] Trial 15 finished in 184.3s, returncode=0 +[2026-04-13 16:14:16] [AutoResearch] Trial 15: mean_reward=21.5609 std_reward=0.0472 +[2026-04-13 16:14:16] [AutoResearch] === Trial 15 Summary === +[2026-04-13 16:14:16] Total Phase 1 runs: 47 +[2026-04-13 16:14:16] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:14:16] Top 5: +[2026-04-13 16:14:16] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:14:16] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:14:16] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:14:16] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:14:16] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:14:18] +[AutoResearch] ========== Trial 16/50 ========== +[2026-04-13 16:14:18] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:14:18] UCB=2.8069 mu=2.0215 sigma=0.3927 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003241673094165768, 'timesteps': 3632} +[2026-04-13 16:14:18] UCB=2.3263 mu=1.8104 sigma=0.2580 params={'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.00033084211302334245, 'timesteps': 2916} +[2026-04-13 16:14:18] UCB=2.2712 mu=1.3861 sigma=0.4426 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.0006784421359377939, 'timesteps': 3060} +[2026-04-13 16:14:18] UCB=2.2307 mu=1.6077 sigma=0.3115 params={'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.0003912882062863388, 'timesteps': 3421} +[2026-04-13 16:14:18] UCB=2.1864 mu=0.2144 sigma=0.9860 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.004468930310099049, 'timesteps': 2507} +[2026-04-13 16:14:18] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003241673094165768, 'timesteps': 3632, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:14:20] [AutoResearch] Launching trial 16: {'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0003241673094165768, 'timesteps': 3632, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:17:44] [AutoResearch] Trial 16 finished in 203.5s, returncode=0 +[2026-04-13 16:17:44] [AutoResearch] Trial 16: mean_reward=501.8865 std_reward=1.0671 +[2026-04-13 16:17:44] [AutoResearch] === Trial 16 Summary === +[2026-04-13 16:17:44] Total Phase 1 runs: 48 +[2026-04-13 16:17:44] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:17:44] Top 5: +[2026-04-13 16:17:44] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:17:44] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:17:44] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:17:44] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:17:44] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:17:46] +[AutoResearch] ========== Trial 17/50 ========== +[2026-04-13 16:17:46] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:17:46] UCB=2.1466 mu=0.1634 sigma=0.9916 params={'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.004551208391752519, 'timesteps': 2467} +[2026-04-13 16:17:46] UCB=2.1356 mu=1.7494 sigma=0.1931 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0005224077374076833, 'timesteps': 2982} +[2026-04-13 16:17:46] UCB=2.1196 mu=0.1238 sigma=0.9979 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.00487944066987397, 'timesteps': 2500} +[2026-04-13 16:17:46] UCB=2.0933 mu=0.1087 sigma=0.9923 params={'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.0046177294398087885, 'timesteps': 2764} +[2026-04-13 16:17:46] UCB=2.0828 mu=0.0933 sigma=0.9947 params={'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.004644956123956311, 'timesteps': 2890} +[2026-04-13 16:17:46] [AutoResearch] Proposed: {'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.004551208391752519, 'timesteps': 2467, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:17:48] [AutoResearch] Launching trial 17: {'n_steer': 3, 'n_throttle': 3, 'learning_rate': 0.004551208391752519, 'timesteps': 2467, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:21:19] [AutoResearch] Trial 17 finished in 210.9s, returncode=0 +[2026-04-13 16:21:19] [AutoResearch] Trial 17: mean_reward=204.9237 std_reward=0.8594 +[2026-04-13 16:21:19] [AutoResearch] === Trial 17 Summary === +[2026-04-13 16:21:19] Total Phase 1 runs: 49 +[2026-04-13 16:21:19] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:21:19] Top 5: +[2026-04-13 16:21:19] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:21:19] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:21:19] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:21:19] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:21:19] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:21:21] +[AutoResearch] ========== Trial 18/50 ========== +[2026-04-13 16:21:21] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:21:21] UCB=1.9736 mu=-0.0151 sigma=0.9943 params={'n_steer': 3, 'n_throttle': 5, 'learning_rate': 0.004635271824040741, 'timesteps': 4579} +[2026-04-13 16:21:21] UCB=1.9719 mu=-0.0100 sigma=0.9910 params={'n_steer': 4, 'n_throttle': 5, 'learning_rate': 0.004702506196963715, 'timesteps': 4515} +[2026-04-13 16:21:21] UCB=1.9680 mu=0.3559 sigma=0.8060 params={'n_steer': 5, 'n_throttle': 2, 'learning_rate': 0.00039611211457936087, 'timesteps': 4776} +[2026-04-13 16:21:21] UCB=1.9488 mu=-0.0208 sigma=0.9848 params={'n_steer': 4, 'n_throttle': 5, 'learning_rate': 0.004976994354630289, 'timesteps': 4295} +[2026-04-13 16:21:21] UCB=1.9237 mu=0.8638 sigma=0.5299 params={'n_steer': 5, 'n_throttle': 2, 'learning_rate': 5.318719404529583e-05, 'timesteps': 3383} +[2026-04-13 16:21:21] [AutoResearch] Proposed: {'n_steer': 3, 'n_throttle': 5, 'learning_rate': 0.004635271824040741, 'timesteps': 4579, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:21:23] [AutoResearch] Launching trial 18: {'n_steer': 3, 'n_throttle': 5, 'learning_rate': 0.004635271824040741, 'timesteps': 4579, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:27:21] [AutoResearch] Trial 18 finished in 357.9s, returncode=0 +[2026-04-13 16:27:21] [AutoResearch] Trial 18: mean_reward=15.1327 std_reward=0.0091 +[2026-04-13 16:27:21] [AutoResearch] === Trial 18 Summary === +[2026-04-13 16:27:21] Total Phase 1 runs: 50 +[2026-04-13 16:27:21] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:27:21] Top 5: +[2026-04-13 16:27:21] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:27:21] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:27:21] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:27:21] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:27:21] mean_reward=1032.0966 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0006774569893590574, 'timesteps': 2849, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:27:23] +[AutoResearch] ========== Trial 19/50 ========== +[2026-04-13 16:27:23] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:27:23] UCB=2.4117 mu=0.7315 sigma=0.8401 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954} +[2026-04-13 16:27:23] UCB=2.1418 mu=1.4716 sigma=0.3351 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0010210660206428537, 'timesteps': 2865} +[2026-04-13 16:27:23] UCB=2.0522 mu=1.8672 sigma=0.0925 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0005484988468040054, 'timesteps': 2664} +[2026-04-13 16:27:23] UCB=1.9170 mu=0.5165 sigma=0.7002 params={'n_steer': 5, 'n_throttle': 2, 'learning_rate': 0.0003221051258700323, 'timesteps': 4009} +[2026-04-13 16:27:23] UCB=1.8812 mu=0.6397 sigma=0.6208 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005319521788798213, 'timesteps': 3471} +[2026-04-13 16:27:23] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:27:25] [AutoResearch] Launching trial 19: {'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:32:33] [AutoResearch] Trial 19 finished in 307.9s, returncode=0 +[2026-04-13 16:32:33] [AutoResearch] Trial 19: mean_reward=2237.9305 std_reward=4.2059 +[2026-04-13 16:32:33] [AutoResearch] === Trial 19 Summary === +[2026-04-13 16:32:33] Total Phase 1 runs: 51 +[2026-04-13 16:32:33] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:32:33] Top 5: +[2026-04-13 16:32:33] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:32:33] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:32:33] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:32:33] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:32:33] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:32:35] +[AutoResearch] ========== Trial 20/50 ========== +[2026-04-13 16:32:35] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:32:35] UCB=3.4259 mu=1.8243 sigma=0.8008 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.002069915888771807, 'timesteps': 4960} +[2026-04-13 16:32:35] UCB=3.0743 mu=1.9887 sigma=0.5428 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0006141122334708564, 'timesteps': 4798} +[2026-04-13 16:32:35] UCB=2.9580 mu=1.7284 sigma=0.6148 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 8.750174245784358e-05, 'timesteps': 3752} +[2026-04-13 16:32:35] UCB=2.9385 mu=1.9153 sigma=0.5116 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0003012438414853693, 'timesteps': 4570} +[2026-04-13 16:32:35] UCB=2.8840 mu=1.3911 sigma=0.7465 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0009444412843513588, 'timesteps': 4795} +[2026-04-13 16:32:35] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.002069915888771807, 'timesteps': 4960, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:32:37] [AutoResearch] Launching trial 20: {'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.002069915888771807, 'timesteps': 4960, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:38:17] [AutoResearch] Trial 20 finished in 340.5s, returncode=0 +[2026-04-13 16:38:17] [AutoResearch] Trial 20: mean_reward=15.1108 std_reward=0.0091 +[2026-04-13 16:38:17] [AutoResearch] === Trial 20 Summary === +[2026-04-13 16:38:17] Total Phase 1 runs: 52 +[2026-04-13 16:38:17] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:38:17] Top 5: +[2026-04-13 16:38:17] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:38:17] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:38:17] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:38:17] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:38:17] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} diff --git a/agent/outerloop-results/autoresearch_results_phase1.jsonl b/agent/outerloop-results/autoresearch_results_phase1.jsonl index aca2feb..0905145 100644 --- a/agent/outerloop-results/autoresearch_results_phase1.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase1.jsonl @@ -52,3 +52,13 @@ {"trial": 8, "timestamp": "2026-04-13T15:56:47.314108", "params": {"n_steer": 9, "n_throttle": 3, "learning_rate": 0.0038245903103972813, "timesteps": 2401, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 14.5596, "std_reward": 0.0173, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0008/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 209.817458152771, "reward_hacking_suspected": false} {"trial": 9, "timestamp": "2026-04-13T15:58:34.660642", "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 0.0003951932722914093, "timesteps": 1993, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 44.5266, "std_reward": 0.1726, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0009/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 103.33265256881714, "reward_hacking_suspected": false} {"trial": 10, "timestamp": "2026-04-13T16:00:23.744147", "params": {"n_steer": 8, "n_throttle": 3, "learning_rate": 0.0017751713134174076, "timesteps": 1004, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 26.063, "std_reward": 0.1375, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0010/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 105.06883907318115, "reward_hacking_suspected": false} +{"trial": 11, "timestamp": "2026-04-13T16:02:12.296020", "params": {"n_steer": 7, "n_throttle": 2, "learning_rate": 0.00261843600079989, "timesteps": 1081, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.0063, "std_reward": 0.0168, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0011/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 102.64761543273926, "reward_hacking_suspected": false} +{"trial": 12, "timestamp": "2026-04-13T16:05:50.731699", "params": {"n_steer": 6, "n_throttle": 3, "learning_rate": 0.000494038295411516, "timesteps": 2549, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 743.5528, "std_reward": 3.1861, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0012/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 214.4203441143036, "reward_hacking_suspected": false} +{"trial": 13, "timestamp": "2026-04-13T16:09:20.560339", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.0006080707920424922, "timesteps": 3082, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 471.0407, "std_reward": 7.8649, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0013/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 205.80152201652527, "reward_hacking_suspected": false} +{"trial": 14, "timestamp": "2026-04-13T16:11:08.606428", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.0016216488882950194, "timesteps": 1028, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.1063, "std_reward": 0.0076, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0014/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 104.02733898162842, "reward_hacking_suspected": false} +{"trial": 15, "timestamp": "2026-04-13T16:14:16.945352", "params": {"n_steer": 5, "n_throttle": 5, "learning_rate": 0.00031142012560796015, "timesteps": 3450, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 21.5609, "std_reward": 0.0472, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0015/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 184.323979139328, "reward_hacking_suspected": false} +{"trial": 16, "timestamp": "2026-04-13T16:17:44.428802", "params": {"n_steer": 5, "n_throttle": 4, "learning_rate": 0.0003241673094165768, "timesteps": 3632, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 501.8865, "std_reward": 1.0671, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0016/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 203.46585607528687, "reward_hacking_suspected": false} +{"trial": 17, "timestamp": "2026-04-13T16:21:19.305036", "params": {"n_steer": 3, "n_throttle": 3, "learning_rate": 0.004551208391752519, "timesteps": 2467, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 204.9237, "std_reward": 0.8594, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0017/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 210.85937237739563, "reward_hacking_suspected": false} +{"trial": 18, "timestamp": "2026-04-13T16:27:21.250692", "params": {"n_steer": 3, "n_throttle": 5, "learning_rate": 0.004635271824040741, "timesteps": 4579, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.1327, "std_reward": 0.0091, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0018/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 357.92918848991394, "reward_hacking_suspected": false} +{"trial": 19, "timestamp": "2026-04-13T16:32:33.152498", "params": {"n_steer": 6, "n_throttle": 2, "learning_rate": 0.0005660634897015402, "timesteps": 4954, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 2237.9305, "std_reward": 4.2059, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0019/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 307.8850815296173, "reward_hacking_suspected": false} +{"trial": 20, "timestamp": "2026-04-13T16:38:17.629499", "params": {"n_steer": 6, "n_throttle": 2, "learning_rate": 0.002069915888771807, "timesteps": 4960, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.1108, "std_reward": 0.0091, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0020/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 340.46046137809753, "reward_hacking_suspected": false}