From ed65cf59976853161a22f928e22b68c049a74454 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Mon, 13 Apr 2026 17:28:19 -0400 Subject: [PATCH] autoresearch: phase1 trial 30 results Agent: pi Tests: N/A Tests-Added: 0 TypeScript: N/A --- .../autoresearch_phase1_log.txt | 211 ++++++++++++++++++ .../autoresearch_results_phase1.jsonl | 10 + 2 files changed, 221 insertions(+) diff --git a/agent/outerloop-results/autoresearch_phase1_log.txt b/agent/outerloop-results/autoresearch_phase1_log.txt index 38c7141..3aa53df 100644 --- a/agent/outerloop-results/autoresearch_phase1_log.txt +++ b/agent/outerloop-results/autoresearch_phase1_log.txt @@ -1347,3 +1347,214 @@ [2026-04-13 16:38:17] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} [2026-04-13 16:38:17] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} [2026-04-13 16:38:17] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:38:18] [AutoResearch] Git push complete after trial 20 +[2026-04-13 16:38:20] +[AutoResearch] ========== Trial 21/50 ========== +[2026-04-13 16:38:20] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:38:20] UCB=3.7556 mu=2.6923 sigma=0.5317 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.00016732476969720305, 'timesteps': 4545} +[2026-04-13 16:38:20] UCB=3.6554 mu=2.6237 sigma=0.5159 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.00027361330320505457, 'timesteps': 4426} +[2026-04-13 16:38:20] UCB=2.9292 mu=1.5395 sigma=0.6949 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0008890476874051958, 'timesteps': 4901} +[2026-04-13 16:38:20] UCB=2.7843 mu=1.6784 sigma=0.5529 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0009111341085199367, 'timesteps': 4229} +[2026-04-13 16:38:20] UCB=2.0854 mu=1.5744 sigma=0.2555 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.000791530228950698, 'timesteps': 2671} +[2026-04-13 16:38:20] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.00016732476969720305, 'timesteps': 4545, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:38:22] [AutoResearch] Launching trial 21: {'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.00016732476969720305, 'timesteps': 4545, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:43:14] [AutoResearch] Trial 21 finished in 291.9s, returncode=0 +[2026-04-13 16:43:14] [AutoResearch] Trial 21: mean_reward=712.0042 std_reward=90.943 +[2026-04-13 16:43:14] [AutoResearch] === Trial 21 Summary === +[2026-04-13 16:43:14] Total Phase 1 runs: 53 +[2026-04-13 16:43:14] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:43:14] Top 5: +[2026-04-13 16:43:14] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:43:14] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:43:14] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:43:14] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:43:14] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:43:16] +[AutoResearch] ========== Trial 22/50 ========== +[2026-04-13 16:43:16] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:43:16] UCB=3.3258 mu=3.0998 sigma=0.1130 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.00047237107706507965, 'timesteps': 4731} +[2026-04-13 16:43:16] UCB=2.0722 mu=1.3063 sigma=0.3830 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0011620798877489517, 'timesteps': 4700} +[2026-04-13 16:43:16] UCB=1.7872 mu=-0.1514 sigma=0.9693 params={'n_steer': 9, 'n_throttle': 5, 'learning_rate': 0.0034249503127642264, 'timesteps': 4372} +[2026-04-13 16:43:16] UCB=1.7749 mu=-0.1723 sigma=0.9736 params={'n_steer': 9, 'n_throttle': 5, 'learning_rate': 0.003194575957101677, 'timesteps': 4362} +[2026-04-13 16:43:16] UCB=1.7471 mu=-0.2101 sigma=0.9786 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.004701365865030676, 'timesteps': 1665} +[2026-04-13 16:43:16] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.00047237107706507965, 'timesteps': 4731, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:43:18] [AutoResearch] Launching trial 22: {'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.00047237107706507965, 'timesteps': 4731, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:48:14] [AutoResearch] Trial 22 finished in 295.8s, returncode=0 +[2026-04-13 16:48:14] [AutoResearch] Trial 22: mean_reward=562.9132 std_reward=10.4819 +[2026-04-13 16:48:14] [AutoResearch] === Trial 22 Summary === +[2026-04-13 16:48:14] Total Phase 1 runs: 54 +[2026-04-13 16:48:14] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:48:14] Top 5: +[2026-04-13 16:48:14] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:48:14] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:48:14] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:48:14] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:48:14] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:48:16] +[AutoResearch] ========== Trial 23/50 ========== +[2026-04-13 16:48:16] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:48:16] UCB=5.7123 mu=4.0715 sigma=0.8204 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.000872444658512201, 'timesteps': 4959} +[2026-04-13 16:48:16] UCB=4.6632 mu=2.9971 sigma=0.8330 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0013578723525765764, 'timesteps': 4857} +[2026-04-13 16:48:16] UCB=3.8838 mu=2.3869 sigma=0.7485 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0009439538581721204, 'timesteps': 4720} +[2026-04-13 16:48:16] UCB=3.8689 mu=2.3330 sigma=0.7680 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0006494808486629405, 'timesteps': 4787} +[2026-04-13 16:48:16] UCB=3.6625 mu=3.3554 sigma=0.1536 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0004171158677596043, 'timesteps': 4760} +[2026-04-13 16:48:16] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.000872444658512201, 'timesteps': 4959, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:48:18] [AutoResearch] Launching trial 23: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.000872444658512201, 'timesteps': 4959, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:53:04] [AutoResearch] Trial 23 finished in 285.7s, returncode=0 +[2026-04-13 16:53:04] [AutoResearch] Trial 23: mean_reward=14.9815 std_reward=0.0093 +[2026-04-13 16:53:04] [AutoResearch] === Trial 23 Summary === +[2026-04-13 16:53:04] Total Phase 1 runs: 55 +[2026-04-13 16:53:04] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:53:04] Top 5: +[2026-04-13 16:53:04] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:53:04] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:53:04] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:53:04] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:53:04] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:53:06] +[AutoResearch] ========== Trial 24/50 ========== +[2026-04-13 16:53:06] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:53:06] UCB=3.4649 mu=3.1911 sigma=0.1369 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0001292488328476618, 'timesteps': 4749} +[2026-04-13 16:53:06] UCB=2.3675 mu=2.0286 sigma=0.1694 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.00035609774689880495, 'timesteps': 2952} +[2026-04-13 16:53:06] UCB=2.2707 mu=1.9059 sigma=0.1824 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0004136019141565726, 'timesteps': 2931} +[2026-04-13 16:53:06] UCB=2.1283 mu=1.6145 sigma=0.2569 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0019365337394847539, 'timesteps': 1025} +[2026-04-13 16:53:06] UCB=1.7996 mu=-0.1794 sigma=0.9895 params={'n_steer': 9, 'n_throttle': 5, 'learning_rate': 0.004779978427179205, 'timesteps': 4249} +[2026-04-13 16:53:06] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0001292488328476618, 'timesteps': 4749, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:53:08] [AutoResearch] Launching trial 24: {'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0001292488328476618, 'timesteps': 4749, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:58:24] [AutoResearch] Trial 24 finished in 316.5s, returncode=0 +[2026-04-13 16:58:24] [AutoResearch] Trial 24: mean_reward=657.5063 std_reward=2.2574 +[2026-04-13 16:58:24] [AutoResearch] === Trial 24 Summary === +[2026-04-13 16:58:24] Total Phase 1 runs: 56 +[2026-04-13 16:58:24] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:58:24] Top 5: +[2026-04-13 16:58:24] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:58:24] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:58:24] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:58:24] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:58:24] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:58:26] +[AutoResearch] ========== Trial 25/50 ========== +[2026-04-13 16:58:26] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 16:58:26] UCB=4.2698 mu=3.4448 sigma=0.4125 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 6.342460411299125e-05, 'timesteps': 4090} +[2026-04-13 16:58:26] UCB=3.7333 mu=2.7882 sigma=0.4726 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0011263751132521438, 'timesteps': 4576} +[2026-04-13 16:58:26] UCB=3.4290 mu=2.8470 sigma=0.2910 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.00047455842448918, 'timesteps': 4720} +[2026-04-13 16:58:26] UCB=3.3911 mu=2.4851 sigma=0.4530 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0003300910432647304, 'timesteps': 2676} +[2026-04-13 16:58:26] UCB=3.3573 mu=1.8872 sigma=0.7351 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0015667124250089057, 'timesteps': 3815} +[2026-04-13 16:58:26] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 2, 'learning_rate': 6.342460411299125e-05, 'timesteps': 4090, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 16:58:28] [AutoResearch] Launching trial 25: {'n_steer': 8, 'n_throttle': 2, 'learning_rate': 6.342460411299125e-05, 'timesteps': 4090, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:02:19] [AutoResearch] Trial 25 finished in 230.3s, returncode=0 +[2026-04-13 17:02:19] [AutoResearch] Trial 25: mean_reward=279.0168 std_reward=6.9418 +[2026-04-13 17:02:19] [AutoResearch] === Trial 25 Summary === +[2026-04-13 17:02:19] Total Phase 1 runs: 57 +[2026-04-13 17:02:19] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:02:19] Top 5: +[2026-04-13 17:02:19] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:02:19] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:02:19] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:02:19] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:02:19] mean_reward=1072.7063 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.00038717401417690916, 'timesteps': 2914, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:02:21] +[AutoResearch] ========== Trial 26/50 ========== +[2026-04-13 17:02:21] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 17:02:21] UCB=3.3528 mu=2.3124 sigma=0.5202 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898} +[2026-04-13 17:02:21] UCB=3.1485 mu=2.0065 sigma=0.5710 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003938802691041149, 'timesteps': 4868} +[2026-04-13 17:02:21] UCB=2.4341 mu=1.1732 sigma=0.6304 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0006992915187758438, 'timesteps': 4823} +[2026-04-13 17:02:21] UCB=2.2359 mu=1.3933 sigma=0.4213 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0009559040057827017, 'timesteps': 2924} +[2026-04-13 17:02:21] UCB=2.1994 mu=1.9371 sigma=0.1312 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0007187013270107417, 'timesteps': 2786} +[2026-04-13 17:02:21] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:02:23] [AutoResearch] Launching trial 26: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:07:50] [AutoResearch] Trial 26 finished in 327.3s, returncode=0 +[2026-04-13 17:07:50] [AutoResearch] Trial 26: mean_reward=2306.761 std_reward=6.7895 +[2026-04-13 17:07:50] [AutoResearch] === Trial 26 Summary === +[2026-04-13 17:07:50] Total Phase 1 runs: 58 +[2026-04-13 17:07:50] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:07:50] Top 5: +[2026-04-13 17:07:50] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:07:50] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:07:50] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:07:50] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:07:50] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:07:52] +[AutoResearch] ========== Trial 27/50 ========== +[2026-04-13 17:07:52] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 17:07:52] UCB=2.3063 mu=1.3113 sigma=0.4975 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0010435891469892742, 'timesteps': 4484} +[2026-04-13 17:07:52] UCB=2.1799 mu=0.7760 sigma=0.7020 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.0007345461142898144, 'timesteps': 4774} +[2026-04-13 17:07:52] UCB=2.0153 mu=0.4851 sigma=0.7651 params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0018910995990971135, 'timesteps': 4919} +[2026-04-13 17:07:52] UCB=2.0019 mu=0.8923 sigma=0.5548 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0013556755233696148, 'timesteps': 4750} +[2026-04-13 17:07:52] UCB=1.8940 mu=1.3114 sigma=0.2913 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0002403184577692745, 'timesteps': 4370} +[2026-04-13 17:07:52] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0010435891469892742, 'timesteps': 4484, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:07:54] [AutoResearch] Launching trial 27: {'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0010435891469892742, 'timesteps': 4484, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:13:17] [AutoResearch] Trial 27 finished in 322.7s, returncode=0 +[2026-04-13 17:13:17] [AutoResearch] Trial 27: mean_reward=332.1491 std_reward=0.1125 +[2026-04-13 17:13:17] [AutoResearch] === Trial 27 Summary === +[2026-04-13 17:13:17] Total Phase 1 runs: 59 +[2026-04-13 17:13:17] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:13:17] Top 5: +[2026-04-13 17:13:17] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:13:17] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:13:17] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:13:17] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:13:17] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:13:19] +[AutoResearch] ========== Trial 28/50 ========== +[2026-04-13 17:13:19] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 17:13:19] UCB=3.2262 mu=2.8774 sigma=0.1744 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0007283347926280964, 'timesteps': 4804} +[2026-04-13 17:13:19] UCB=2.5026 mu=2.0716 sigma=0.2155 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.000678144351086595, 'timesteps': 4957} +[2026-04-13 17:13:19] UCB=2.1611 mu=0.6361 sigma=0.7625 params={'n_steer': 3, 'n_throttle': 4, 'learning_rate': 0.00034031839912747184, 'timesteps': 4885} +[2026-04-13 17:13:19] UCB=1.9295 mu=1.6080 sigma=0.1608 params={'n_steer': 5, 'n_throttle': 4, 'learning_rate': 0.0005111405199774711, 'timesteps': 2981} +[2026-04-13 17:13:19] UCB=1.8912 mu=1.4029 sigma=0.2442 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006829056707945425, 'timesteps': 2729} +[2026-04-13 17:13:19] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0007283347926280964, 'timesteps': 4804, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:13:21] [AutoResearch] Launching trial 28: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0007283347926280964, 'timesteps': 4804, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:18:46] [AutoResearch] Trial 28 finished in 325.3s, returncode=0 +[2026-04-13 17:18:46] [AutoResearch] Trial 28: mean_reward=1125.892 std_reward=9.7091 +[2026-04-13 17:18:46] [AutoResearch] === Trial 28 Summary === +[2026-04-13 17:18:46] Total Phase 1 runs: 60 +[2026-04-13 17:18:46] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:18:46] Top 5: +[2026-04-13 17:18:46] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:18:46] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:18:46] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:18:46] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:18:46] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:18:48] +[AutoResearch] ========== Trial 29/50 ========== +[2026-04-13 17:18:48] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 17:18:48] UCB=1.9565 mu=1.0416 sigma=0.4574 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0006213302398696545, 'timesteps': 3064} +[2026-04-13 17:18:48] UCB=1.7895 mu=-0.1956 sigma=0.9925 params={'n_steer': 8, 'n_throttle': 5, 'learning_rate': 0.00496713790460354, 'timesteps': 4807} +[2026-04-13 17:18:48] UCB=1.7703 mu=-0.2014 sigma=0.9858 params={'n_steer': 5, 'n_throttle': 5, 'learning_rate': 0.004942965581265541, 'timesteps': 2075} +[2026-04-13 17:18:48] UCB=1.7475 mu=-0.1993 sigma=0.9734 params={'n_steer': 5, 'n_throttle': 5, 'learning_rate': 0.004884055358157497, 'timesteps': 2512} +[2026-04-13 17:18:48] UCB=1.7288 mu=-0.2113 sigma=0.9701 params={'n_steer': 6, 'n_throttle': 5, 'learning_rate': 0.004639975825026214, 'timesteps': 2393} +[2026-04-13 17:18:48] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0006213302398696545, 'timesteps': 3064, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:18:50] [AutoResearch] Launching trial 29: {'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0006213302398696545, 'timesteps': 3064, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:22:42] [AutoResearch] Trial 29 finished in 231.5s, returncode=0 +[2026-04-13 17:22:42] [AutoResearch] Trial 29: mean_reward=35.5084 std_reward=0.0228 +[2026-04-13 17:22:42] [AutoResearch] === Trial 29 Summary === +[2026-04-13 17:22:42] Total Phase 1 runs: 61 +[2026-04-13 17:22:42] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:22:42] Top 5: +[2026-04-13 17:22:42] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:22:42] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:22:42] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:22:42] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:22:42] mean_reward=1157.0470 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.00037737321665256695, 'timesteps': 2717, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:22:44] +[AutoResearch] ========== Trial 30/50 ========== +[2026-04-13 17:22:44] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 17:22:44] UCB=4.8341 mu=4.5783 sigma=0.1279 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977} +[2026-04-13 17:22:44] UCB=2.4343 mu=2.1541 sigma=0.1401 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.00036335401272427295, 'timesteps': 4842} +[2026-04-13 17:22:44] UCB=1.9834 mu=1.5341 sigma=0.2247 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0004804115725768152, 'timesteps': 4817} +[2026-04-13 17:22:44] UCB=1.8265 mu=0.4840 sigma=0.6712 params={'n_steer': 5, 'n_throttle': 2, 'learning_rate': 0.0011405885967124738, 'timesteps': 1031} +[2026-04-13 17:22:44] UCB=1.7885 mu=-0.1747 sigma=0.9816 params={'n_steer': 9, 'n_throttle': 5, 'learning_rate': 0.004833150908658096, 'timesteps': 3968} +[2026-04-13 17:22:44] [AutoResearch] Proposed: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:22:46] [AutoResearch] Launching trial 30: {'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:28:19] [AutoResearch] Trial 30 finished in 332.9s, returncode=0 +[2026-04-13 17:28:19] [AutoResearch] Trial 30: mean_reward=2286.9085 std_reward=1.508 +[2026-04-13 17:28:19] [AutoResearch] === Trial 30 Summary === +[2026-04-13 17:28:19] Total Phase 1 runs: 62 +[2026-04-13 17:28:19] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:28:19] Top 5: +[2026-04-13 17:28:19] mean_reward=2306.7610 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:28:19] mean_reward=2286.9085 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:28:19] mean_reward=2237.9305 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.0005660634897015402, 'timesteps': 4954, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:28:19] mean_reward=1859.8470 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005669006119489946, 'timesteps': 2156, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 17:28:19] mean_reward=1389.3806 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0005504110507719487, 'timesteps': 2472, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} diff --git a/agent/outerloop-results/autoresearch_results_phase1.jsonl b/agent/outerloop-results/autoresearch_results_phase1.jsonl index 0905145..308668b 100644 --- a/agent/outerloop-results/autoresearch_results_phase1.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase1.jsonl @@ -62,3 +62,13 @@ {"trial": 18, "timestamp": "2026-04-13T16:27:21.250692", "params": {"n_steer": 3, "n_throttle": 5, "learning_rate": 0.004635271824040741, "timesteps": 4579, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.1327, "std_reward": 0.0091, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0018/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 357.92918848991394, "reward_hacking_suspected": false} {"trial": 19, "timestamp": "2026-04-13T16:32:33.152498", "params": {"n_steer": 6, "n_throttle": 2, "learning_rate": 0.0005660634897015402, "timesteps": 4954, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 2237.9305, "std_reward": 4.2059, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0019/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 307.8850815296173, "reward_hacking_suspected": false} {"trial": 20, "timestamp": "2026-04-13T16:38:17.629499", "params": {"n_steer": 6, "n_throttle": 2, "learning_rate": 0.002069915888771807, "timesteps": 4960, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.1108, "std_reward": 0.0091, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0020/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 340.46046137809753, "reward_hacking_suspected": false} +{"trial": 21, "timestamp": "2026-04-13T16:43:14.823248", "params": {"n_steer": 7, "n_throttle": 2, "learning_rate": 0.00016732476969720305, "timesteps": 4545, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 712.0042, "std_reward": 90.943, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0021/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 291.91160893440247, "reward_hacking_suspected": false} +{"trial": 22, "timestamp": "2026-04-13T16:48:14.654548", "params": {"n_steer": 6, "n_throttle": 2, "learning_rate": 0.00047237107706507965, "timesteps": 4731, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 562.9132, "std_reward": 10.4819, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0022/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 295.81456112861633, "reward_hacking_suspected": false} +{"trial": 23, "timestamp": "2026-04-13T16:53:04.338445", "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 0.000872444658512201, "timesteps": 4959, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 14.9815, "std_reward": 0.0093, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0023/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 285.66902017593384, "reward_hacking_suspected": false} +{"trial": 24, "timestamp": "2026-04-13T16:58:24.857851", "params": {"n_steer": 7, "n_throttle": 2, "learning_rate": 0.0001292488328476618, "timesteps": 4749, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 657.5063, "std_reward": 2.2574, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0024/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 316.5029833316803, "reward_hacking_suspected": false} +{"trial": 25, "timestamp": "2026-04-13T17:02:19.194497", "params": {"n_steer": 8, "n_throttle": 2, "learning_rate": 6.342460411299125e-05, "timesteps": 4090, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 279.0168, "std_reward": 6.9418, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0025/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 230.31992149353027, "reward_hacking_suspected": false} +{"trial": 26, "timestamp": "2026-04-13T17:07:50.552977", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.0004488352572615814, "timesteps": 4898, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 2306.761, "std_reward": 6.7895, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0026/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 327.3414671421051, "reward_hacking_suspected": false} +{"trial": 27, "timestamp": "2026-04-13T17:13:17.273453", "params": {"n_steer": 8, "n_throttle": 2, "learning_rate": 0.0010435891469892742, "timesteps": 4484, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 332.1491, "std_reward": 0.1125, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0027/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 322.70347714424133, "reward_hacking_suspected": false} +{"trial": 28, "timestamp": "2026-04-13T17:18:46.584035", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.0007283347926280964, "timesteps": 4804, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 1125.892, "std_reward": 9.7091, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0028/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 325.2930808067322, "reward_hacking_suspected": false} +{"trial": 29, "timestamp": "2026-04-13T17:22:42.111254", "params": {"n_steer": 8, "n_throttle": 2, "learning_rate": 0.0006213302398696545, "timesteps": 3064, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 35.5084, "std_reward": 0.0228, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0029/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 231.50703597068787, "reward_hacking_suspected": false} +{"trial": 30, "timestamp": "2026-04-13T17:28:19.047146", "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.0003386484278685721, "timesteps": 4977, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 2286.9085, "std_reward": 1.508, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0030/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 332.9170308113098, "reward_hacking_suspected": false}