From 87cff0c9b7d207ce79e1e5944bcb6a75fd8da0b8 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Mon, 13 Apr 2026 15:28:05 -0400 Subject: [PATCH] autoresearch: phase1 trial 40 results Agent: pi Tests: N/A Tests-Added: 0 TypeScript: N/A --- .../autoresearch_phase1_log.txt | 211 ++++++++++++++++++ .../autoresearch_results_phase1.jsonl | 10 + 2 files changed, 221 insertions(+) diff --git a/agent/outerloop-results/autoresearch_phase1_log.txt b/agent/outerloop-results/autoresearch_phase1_log.txt index dc7b3aa..b499ae9 100644 --- a/agent/outerloop-results/autoresearch_phase1_log.txt +++ b/agent/outerloop-results/autoresearch_phase1_log.txt @@ -613,3 +613,214 @@ [2026-04-13 15:13:21] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} [2026-04-13 15:13:21] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} [2026-04-13 15:13:21] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:13:22] [AutoResearch] Git push complete after trial 30 +[2026-04-13 15:13:24] +[AutoResearch] ========== Trial 31/50 ========== +[2026-04-13 15:13:24] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:13:24] UCB=5.5651 mu=4.3580 sigma=0.6036 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.004135349936389352, 'timesteps': 3206} +[2026-04-13 15:13:24] UCB=5.5642 mu=4.5363 sigma=0.5139 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0038596790201358227, 'timesteps': 2946} +[2026-04-13 15:13:24] UCB=4.4093 mu=2.5958 sigma=0.9067 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.004156951783506707, 'timesteps': 3826} +[2026-04-13 15:13:24] UCB=4.2822 mu=2.3905 sigma=0.9458 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.003842385514909252, 'timesteps': 4354} +[2026-04-13 15:13:24] UCB=4.2719 mu=2.3951 sigma=0.9384 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.004118857172828388, 'timesteps': 4203} +[2026-04-13 15:13:24] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.004135349936389352, 'timesteps': 3206, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:13:26] [AutoResearch] Launching trial 31: {'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.004135349936389352, 'timesteps': 3206, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:17:17] [AutoResearch] Trial 31 finished in 230.1s, returncode=0 +[2026-04-13 15:17:17] [AutoResearch] Trial 31: mean_reward=14.3463 std_reward=0.4624 +[2026-04-13 15:17:17] [AutoResearch] === Trial 31 Summary === +[2026-04-13 15:17:17] Total Phase 1 runs: 31 +[2026-04-13 15:17:17] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:17:17] Top 5: +[2026-04-13 15:17:17] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:17:17] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:17:17] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:17:17] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:17:17] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:17:19] +[AutoResearch] ========== Trial 32/50 ========== +[2026-04-13 15:17:19] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:17:19] UCB=3.7776 mu=2.6148 sigma=0.5814 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0033897596454698794, 'timesteps': 1993} +[2026-04-13 15:17:19] UCB=3.2663 mu=1.9851 sigma=0.6406 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.003975169633599844, 'timesteps': 2404} +[2026-04-13 15:17:19] UCB=3.1243 mu=2.2294 sigma=0.4474 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.001085936336153102, 'timesteps': 1347} +[2026-04-13 15:17:19] UCB=3.0807 mu=1.6082 sigma=0.7362 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00021511093369327893, 'timesteps': 3455} +[2026-04-13 15:17:19] UCB=3.0410 mu=1.3292 sigma=0.8559 params={'n_steer': 9, 'n_throttle': 5, 'learning_rate': 0.0033704773533770626, 'timesteps': 2742} +[2026-04-13 15:17:19] [AutoResearch] Proposed: {'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0033897596454698794, 'timesteps': 1993, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:17:21] [AutoResearch] Launching trial 32: {'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0033897596454698794, 'timesteps': 1993, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:19:14] [AutoResearch] Trial 32 finished in 112.9s, returncode=0 +[2026-04-13 15:19:14] [AutoResearch] Trial 32: mean_reward=15.5031 std_reward=0.0044 +[2026-04-13 15:19:14] [AutoResearch] === Trial 32 Summary === +[2026-04-13 15:19:14] Total Phase 1 runs: 32 +[2026-04-13 15:19:14] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:19:14] Top 5: +[2026-04-13 15:19:14] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:19:14] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:19:14] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:19:14] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:19:14] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:19:16] +[AutoResearch] ========== Trial 33/50 ========== +[2026-04-13 15:19:16] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:19:16] UCB=3.5850 mu=2.4733 sigma=0.5558 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0014086345870627532, 'timesteps': 1047} +[2026-04-13 15:19:16] UCB=3.4733 mu=2.0484 sigma=0.7124 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.00021886708641480224, 'timesteps': 3412} +[2026-04-13 15:19:16] UCB=3.3215 mu=2.3680 sigma=0.4768 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0011490025067707607, 'timesteps': 1218} +[2026-04-13 15:19:16] UCB=2.9198 mu=1.7734 sigma=0.5732 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.00018304261685525393, 'timesteps': 1998} +[2026-04-13 15:19:16] UCB=2.8965 mu=1.7977 sigma=0.5494 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.00025906248389237, 'timesteps': 2317} +[2026-04-13 15:19:16] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0014086345870627532, 'timesteps': 1047, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:19:18] [AutoResearch] Launching trial 33: {'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0014086345870627532, 'timesteps': 1047, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:18] [AutoResearch] Trial 33 TIMED OUT after 480.1s +[2026-04-13 15:27:18] [AutoResearch] Trial 33: mean_reward=None std_reward=None +[2026-04-13 15:27:18] [AutoResearch] === Trial 33 Summary === +[2026-04-13 15:27:18] Total Phase 1 runs: 32 +[2026-04-13 15:27:18] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:18] Top 5: +[2026-04-13 15:27:18] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:18] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:18] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:18] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:18] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:20] +[AutoResearch] ========== Trial 34/50 ========== +[2026-04-13 15:27:20] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:27:20] UCB=3.0602 mu=2.0221 sigma=0.5190 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0005797524390357458, 'timesteps': 1730} +[2026-04-13 15:27:20] UCB=2.9639 mu=2.3355 sigma=0.3142 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.003342940719681374, 'timesteps': 2781} +[2026-04-13 15:27:20] UCB=2.8402 mu=1.0555 sigma=0.8923 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.00011009164810198821, 'timesteps': 3597} +[2026-04-13 15:27:20] UCB=2.7623 mu=1.4832 sigma=0.6396 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.002448672401879597, 'timesteps': 1031} +[2026-04-13 15:27:20] UCB=2.6515 mu=1.5184 sigma=0.5666 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.003370243610453799, 'timesteps': 3185} +[2026-04-13 15:27:20] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0005797524390357458, 'timesteps': 1730, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:22] [AutoResearch] Launching trial 34: {'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0005797524390357458, 'timesteps': 1730, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:25] [AutoResearch] Trial 34 finished in 3.0s, returncode=100 +[2026-04-13 15:27:25] [AutoResearch] Trial 34: mean_reward=None std_reward=None +[2026-04-13 15:27:25] [AutoResearch] === Trial 34 Summary === +[2026-04-13 15:27:25] Total Phase 1 runs: 32 +[2026-04-13 15:27:25] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:25] Top 5: +[2026-04-13 15:27:25] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:25] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:25] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:25] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:25] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:27] +[AutoResearch] ========== Trial 35/50 ========== +[2026-04-13 15:27:27] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:27:27] UCB=3.8315 mu=3.2173 sigma=0.3071 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 5.189151979066741e-05, 'timesteps': 2800} +[2026-04-13 15:27:27] UCB=3.5173 mu=1.9176 sigma=0.7999 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0036174691379323937, 'timesteps': 2688} +[2026-04-13 15:27:27] UCB=3.4074 mu=1.8779 sigma=0.7648 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0035558136362367604, 'timesteps': 2380} +[2026-04-13 15:27:27] UCB=3.1016 mu=2.2958 sigma=0.4029 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0007703363956986522, 'timesteps': 1578} +[2026-04-13 15:27:27] UCB=3.0396 mu=2.8296 sigma=0.1050 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.00017347635729668062, 'timesteps': 2581} +[2026-04-13 15:27:27] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 3, 'learning_rate': 5.189151979066741e-05, 'timesteps': 2800, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:29] [AutoResearch] Launching trial 35: {'n_steer': 8, 'n_throttle': 3, 'learning_rate': 5.189151979066741e-05, 'timesteps': 2800, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:31] [AutoResearch] Trial 35 finished in 2.7s, returncode=100 +[2026-04-13 15:27:31] [AutoResearch] Trial 35: mean_reward=None std_reward=None +[2026-04-13 15:27:31] [AutoResearch] === Trial 35 Summary === +[2026-04-13 15:27:31] Total Phase 1 runs: 32 +[2026-04-13 15:27:31] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:31] Top 5: +[2026-04-13 15:27:31] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:31] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:31] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:31] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:31] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:33] +[AutoResearch] ========== Trial 36/50 ========== +[2026-04-13 15:27:33] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:27:33] UCB=3.1494 mu=1.9716 sigma=0.5889 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0017131753633976303, 'timesteps': 1286} +[2026-04-13 15:27:33] UCB=3.0055 mu=1.3764 sigma=0.8145 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.004273088506596801, 'timesteps': 2449} +[2026-04-13 15:27:33] UCB=2.9839 mu=1.9820 sigma=0.5010 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.001505487454944658, 'timesteps': 1324} +[2026-04-13 15:27:33] UCB=2.9113 mu=1.6855 sigma=0.6129 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.000403724803581608, 'timesteps': 2447} +[2026-04-13 15:27:33] UCB=2.8107 mu=1.5968 sigma=0.6069 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0017740090102008168, 'timesteps': 1057} +[2026-04-13 15:27:33] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0017131753633976303, 'timesteps': 1286, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:35] [AutoResearch] Launching trial 36: {'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0017131753633976303, 'timesteps': 1286, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:38] [AutoResearch] Trial 36 finished in 2.7s, returncode=100 +[2026-04-13 15:27:38] [AutoResearch] Trial 36: mean_reward=None std_reward=None +[2026-04-13 15:27:38] [AutoResearch] === Trial 36 Summary === +[2026-04-13 15:27:38] Total Phase 1 runs: 32 +[2026-04-13 15:27:38] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:38] Top 5: +[2026-04-13 15:27:38] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:38] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:38] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:38] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:38] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:40] +[AutoResearch] ========== Trial 37/50 ========== +[2026-04-13 15:27:40] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:27:40] UCB=3.8327 mu=2.3242 sigma=0.7543 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.00381491453203496, 'timesteps': 2570} +[2026-04-13 15:27:40] UCB=3.6164 mu=2.0347 sigma=0.7909 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0036154734154099826, 'timesteps': 2143} +[2026-04-13 15:27:40] UCB=3.5328 mu=2.3139 sigma=0.6095 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0019717822698033754, 'timesteps': 1026} +[2026-04-13 15:27:40] UCB=3.3397 mu=1.8546 sigma=0.7426 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00021835294506320483, 'timesteps': 3476} +[2026-04-13 15:27:40] UCB=3.1978 mu=1.8508 sigma=0.6735 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0022912708080479603, 'timesteps': 1083} +[2026-04-13 15:27:40] [AutoResearch] Proposed: {'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.00381491453203496, 'timesteps': 2570, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:42] [AutoResearch] Launching trial 37: {'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.00381491453203496, 'timesteps': 2570, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:45] [AutoResearch] Trial 37 finished in 2.7s, returncode=100 +[2026-04-13 15:27:45] [AutoResearch] Trial 37: mean_reward=None std_reward=None +[2026-04-13 15:27:45] [AutoResearch] === Trial 37 Summary === +[2026-04-13 15:27:45] Total Phase 1 runs: 32 +[2026-04-13 15:27:45] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:45] Top 5: +[2026-04-13 15:27:45] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:45] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:45] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:45] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:45] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:47] +[AutoResearch] ========== Trial 38/50 ========== +[2026-04-13 15:27:47] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:27:47] UCB=3.3792 mu=2.6015 sigma=0.3889 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0035795756473208304, 'timesteps': 2168} +[2026-04-13 15:27:47] UCB=3.2370 mu=2.4236 sigma=0.4067 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.00133548686236209, 'timesteps': 1359} +[2026-04-13 15:27:47] UCB=3.1648 mu=1.7164 sigma=0.7242 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.00014234046680507736, 'timesteps': 3245} +[2026-04-13 15:27:47] UCB=3.0756 mu=2.1065 sigma=0.4846 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0006639757379204275, 'timesteps': 1652} +[2026-04-13 15:27:47] UCB=2.9504 mu=1.5700 sigma=0.6902 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0023215307871791192, 'timesteps': 1076} +[2026-04-13 15:27:47] [AutoResearch] Proposed: {'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0035795756473208304, 'timesteps': 2168, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:49] [AutoResearch] Launching trial 38: {'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0035795756473208304, 'timesteps': 2168, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:52] [AutoResearch] Trial 38 finished in 2.7s, returncode=100 +[2026-04-13 15:27:52] [AutoResearch] Trial 38: mean_reward=None std_reward=None +[2026-04-13 15:27:52] [AutoResearch] === Trial 38 Summary === +[2026-04-13 15:27:52] Total Phase 1 runs: 32 +[2026-04-13 15:27:52] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:52] Top 5: +[2026-04-13 15:27:52] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:52] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:52] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:52] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:52] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:54] +[AutoResearch] ========== Trial 39/50 ========== +[2026-04-13 15:27:54] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:27:54] UCB=2.9376 mu=1.7644 sigma=0.5866 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0007519115144017143, 'timesteps': 1229} +[2026-04-13 15:27:54] UCB=2.9206 mu=1.6104 sigma=0.6551 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 8.74145742620092e-05, 'timesteps': 2464} +[2026-04-13 15:27:54] UCB=2.8799 mu=2.4729 sigma=0.2035 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0002020477741474642, 'timesteps': 2206} +[2026-04-13 15:27:54] UCB=2.8618 mu=1.4087 sigma=0.7265 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.003978320937123767, 'timesteps': 2257} +[2026-04-13 15:27:54] UCB=2.7374 mu=1.3210 sigma=0.7082 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.0004460127152856436, 'timesteps': 2828} +[2026-04-13 15:27:54] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0007519115144017143, 'timesteps': 1229, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:56] [AutoResearch] Launching trial 39: {'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0007519115144017143, 'timesteps': 1229, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:58] [AutoResearch] Trial 39 finished in 2.7s, returncode=100 +[2026-04-13 15:27:58] [AutoResearch] Trial 39: mean_reward=None std_reward=None +[2026-04-13 15:27:58] [AutoResearch] === Trial 39 Summary === +[2026-04-13 15:27:58] Total Phase 1 runs: 32 +[2026-04-13 15:27:58] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:58] Top 5: +[2026-04-13 15:27:58] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:58] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:58] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:58] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:27:58] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:00] +[AutoResearch] ========== Trial 40/50 ========== +[2026-04-13 15:28:00] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 15:28:00] UCB=4.0033 mu=2.6495 sigma=0.6769 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.00011160573690096532, 'timesteps': 3094} +[2026-04-13 15:28:00] UCB=3.5783 mu=2.7389 sigma=0.4197 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.003355215291754839, 'timesteps': 2513} +[2026-04-13 15:28:00] UCB=3.4820 mu=1.7536 sigma=0.8642 params={'n_steer': 8, 'n_throttle': 4, 'learning_rate': 8.1223641552854e-05, 'timesteps': 3316} +[2026-04-13 15:28:00] UCB=3.4324 mu=1.8215 sigma=0.8055 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.003472795016041811, 'timesteps': 3138} +[2026-04-13 15:28:00] UCB=3.1700 mu=2.2029 sigma=0.4836 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.000693815329398727, 'timesteps': 1830} +[2026-04-13 15:28:00] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.00011160573690096532, 'timesteps': 3094, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:02] [AutoResearch] Launching trial 40: {'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.00011160573690096532, 'timesteps': 3094, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:05] [AutoResearch] Trial 40 finished in 2.9s, returncode=100 +[2026-04-13 15:28:05] [AutoResearch] Trial 40: mean_reward=None std_reward=None +[2026-04-13 15:28:05] [AutoResearch] === Trial 40 Summary === +[2026-04-13 15:28:05] Total Phase 1 runs: 32 +[2026-04-13 15:28:05] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:05] Top 5: +[2026-04-13 15:28:05] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:05] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:05] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:05] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 15:28:05] mean_reward=821.1389 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0012685117683148405, 'timesteps': 1382, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} diff --git a/agent/outerloop-results/autoresearch_results_phase1.jsonl b/agent/outerloop-results/autoresearch_results_phase1.jsonl index a4b551f..37d53ff 100644 --- a/agent/outerloop-results/autoresearch_results_phase1.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase1.jsonl @@ -28,3 +28,13 @@ {"trial": 28, "timestamp": "2026-04-13T15:08:07.609903", "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 7.57033141698586e-05, "timesteps": 1795, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 82.7727, "std_reward": 0.8551, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0028/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 110.72014856338501, "reward_hacking_suspected": false} {"trial": 29, "timestamp": "2026-04-13T15:09:54.212428", "params": {"n_steer": 8, "n_throttle": 3, "learning_rate": 0.0038702768940272764, "timesteps": 1630, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.5211, "std_reward": 0.0294, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0029/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 102.58456087112427, "reward_hacking_suspected": false} {"trial": 30, "timestamp": "2026-04-13T15:13:21.531295", "params": {"n_steer": 6, "n_throttle": 3, "learning_rate": 6.732545053457424e-05, "timesteps": 2708, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 267.9527, "std_reward": 7.4167, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0030/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 203.30307364463806, "reward_hacking_suspected": false} +{"trial": 31, "timestamp": "2026-04-13T15:17:17.111252", "params": {"n_steer": 8, "n_throttle": 3, "learning_rate": 0.004135349936389352, "timesteps": 3206, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 14.3463, "std_reward": 0.4624, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0031/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 230.14550280570984, "reward_hacking_suspected": false} +{"trial": 32, "timestamp": "2026-04-13T15:19:14.079566", "params": {"n_steer": 9, "n_throttle": 4, "learning_rate": 0.0033897596454698794, "timesteps": 1993, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.5031, "std_reward": 0.0044, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0032/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 112.94606161117554, "reward_hacking_suspected": false} +{"trial": 33, "timestamp": "2026-04-13T15:27:18.187546", "params": {"n_steer": 7, "n_throttle": 4, "learning_rate": 0.0014086345870627532, "timesteps": 1047, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0033/model.zip", "champion": false, "run_status": "timeout", "elapsed_sec": 480.0911786556244, "reward_hacking_suspected": false} +{"trial": 34, "timestamp": "2026-04-13T15:27:25.243328", "params": {"n_steer": 7, "n_throttle": 4, "learning_rate": 0.0005797524390357458, "timesteps": 1730, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 3.040273427963257, "reward_hacking_suspected": false} +{"trial": 35, "timestamp": "2026-04-13T15:27:31.962348", "params": {"n_steer": 8, "n_throttle": 3, "learning_rate": 5.189151979066741e-05, "timesteps": 2800, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 2.704819440841675, "reward_hacking_suspected": false} +{"trial": 36, "timestamp": "2026-04-13T15:27:38.703239", "params": {"n_steer": 6, "n_throttle": 4, "learning_rate": 0.0017131753633976303, "timesteps": 1286, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 2.724773406982422, "reward_hacking_suspected": false} +{"trial": 37, "timestamp": "2026-04-13T15:27:45.446009", "params": {"n_steer": 9, "n_throttle": 2, "learning_rate": 0.00381491453203496, "timesteps": 2570, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 2.7279446125030518, "reward_hacking_suspected": false} +{"trial": 38, "timestamp": "2026-04-13T15:27:52.121624", "params": {"n_steer": 9, "n_throttle": 3, "learning_rate": 0.0035795756473208304, "timesteps": 2168, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 2.658806324005127, "reward_hacking_suspected": false} +{"trial": 39, "timestamp": "2026-04-13T15:27:58.884410", "params": {"n_steer": 6, "n_throttle": 4, "learning_rate": 0.0007519115144017143, "timesteps": 1229, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 2.7456912994384766, "reward_hacking_suspected": false} +{"trial": 40, "timestamp": "2026-04-13T15:28:05.834236", "params": {"n_steer": 8, "n_throttle": 4, "learning_rate": 0.00011160573690096532, "timesteps": 3094, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "error", "elapsed_sec": 2.933994770050049, "reward_hacking_suspected": false}