From d25bc7100871c9c8a8d63ac28dc0784b850720fd Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Mon, 13 Apr 2026 13:11:06 -0400 Subject: [PATCH] autoresearch: phase1 trial 10 results Agent: pi Tests: N/A Tests-Added: 0 TypeScript: N/A --- agent/models/champion/manifest.json | 15 ++ .../autoresearch_phase1_log.txt | 196 ++++++++++++++++++ .../autoresearch_results_phase1.jsonl | 10 + 3 files changed, 221 insertions(+) create mode 100644 agent/models/champion/manifest.json create mode 100644 agent/outerloop-results/autoresearch_results_phase1.jsonl diff --git a/agent/models/champion/manifest.json b/agent/models/champion/manifest.json new file mode 100644 index 0000000..3fc7aec --- /dev/null +++ b/agent/models/champion/manifest.json @@ -0,0 +1,15 @@ +{ + "trial": 5, + "timestamp": "2026-04-13T12:45:43.093664", + "params": { + "n_steer": 7, + "n_throttle": 3, + "learning_rate": 0.0006801262090358742, + "timesteps": 4787, + "agent": "ppo", + "eval_episodes": 3, + "reward_shaping": true + }, + "mean_reward": 4582.7984, + "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip" +} \ No newline at end of file diff --git a/agent/outerloop-results/autoresearch_phase1_log.txt b/agent/outerloop-results/autoresearch_phase1_log.txt index d08fdb0..3c15f24 100644 --- a/agent/outerloop-results/autoresearch_phase1_log.txt +++ b/agent/outerloop-results/autoresearch_phase1_log.txt @@ -24,3 +24,199 @@ [2026-04-13 12:27:28] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} [2026-04-13 12:27:28] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} [2026-04-13 12:27:28] [AutoResearch] Only 1 results — using random proposal. +[2026-04-13 12:27:56] ============================================================ +[2026-04-13 12:27:56] [AutoResearch] Phase 1 — Real PPO Training + GP+UCB Optimization +[2026-04-13 12:27:56] [AutoResearch] Max trials: 50 | kappa: 2.0 | push every: 10 +[2026-04-13 12:27:56] [AutoResearch] Results: /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase1.jsonl +[2026-04-13 12:27:56] [AutoResearch] Champion: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion +[2026-04-13 12:27:56] ============================================================ +[2026-04-13 12:27:56] [AutoResearch] Loaded 0 existing Phase 1 results. +[2026-04-13 12:27:56] [AutoResearch] No champion yet. +[2026-04-13 12:27:56] +[AutoResearch] ========== Trial 1/50 ========== +[2026-04-13 12:27:56] [AutoResearch] Only 0 results — using random proposal. +[2026-04-13 12:27:56] [AutoResearch] Proposed: {'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:27:58] [AutoResearch] Launching trial 1: {'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:31:19] [AutoResearch] Trial 1 finished in 200.9s, returncode=0 +[2026-04-13 12:31:19] [AutoResearch] Trial 1: mean_reward=270.5642 std_reward=0.1427 +[2026-04-13 12:31:19] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=270.5642 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:31:19] [AutoResearch] === Trial 1 Summary === +[2026-04-13 12:31:19] Total Phase 1 runs: 1 +[2026-04-13 12:31:19] Champion: trial=1 mean_reward=270.5642 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:31:19] Top 5: +[2026-04-13 12:31:19] mean_reward=270.5642 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:31:21] +[AutoResearch] ========== Trial 2/50 ========== +[2026-04-13 12:31:21] [AutoResearch] Only 1 results — using random proposal. +[2026-04-13 12:31:21] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0021131130619403186, 'timesteps': 1622, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:31:23] [AutoResearch] Launching trial 2: {'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0021131130619403186, 'timesteps': 1622, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:33:09] [AutoResearch] Trial 2 finished in 106.4s, returncode=0 +[2026-04-13 12:33:09] [AutoResearch] Trial 2: mean_reward=15.537 std_reward=0.0257 +[2026-04-13 12:33:09] [AutoResearch] === Trial 2 Summary === +[2026-04-13 12:33:09] Total Phase 1 runs: 2 +[2026-04-13 12:33:09] Champion: trial=1 mean_reward=270.5642 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:33:09] Top 5: +[2026-04-13 12:33:09] mean_reward=270.5642 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:33:09] mean_reward=15.5370 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0021131130619403186, 'timesteps': 1622, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:33:11] +[AutoResearch] ========== Trial 3/50 ========== +[2026-04-13 12:33:11] [AutoResearch] Only 2 results — using random proposal. +[2026-04-13 12:33:11] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.002684399373384131, 'timesteps': 1755, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:33:13] [AutoResearch] Launching trial 3: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.002684399373384131, 'timesteps': 1755, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:34:55] [AutoResearch] Trial 3 finished in 101.3s, returncode=0 +[2026-04-13 12:34:55] [AutoResearch] Trial 3: mean_reward=15.0687 std_reward=0.0219 +[2026-04-13 12:34:55] [AutoResearch] === Trial 3 Summary === +[2026-04-13 12:34:55] Total Phase 1 runs: 3 +[2026-04-13 12:34:55] Champion: trial=1 mean_reward=270.5642 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:34:55] Top 5: +[2026-04-13 12:34:55] mean_reward=270.5642 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:34:55] mean_reward=15.5370 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0021131130619403186, 'timesteps': 1622, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:34:55] mean_reward=15.0687 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.002684399373384131, 'timesteps': 1755, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:34:57] +[AutoResearch] ========== Trial 4/50 ========== +[2026-04-13 12:34:57] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 12:34:57] UCB=2.4445 mu=0.7362 sigma=0.8541 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0009549126527603771, 'timesteps': 4279} +[2026-04-13 12:34:57] UCB=2.4375 mu=0.9279 sigma=0.7548 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0005153901257954376, 'timesteps': 3901} +[2026-04-13 12:34:57] UCB=2.4265 mu=0.9512 sigma=0.7376 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.00041200493694593554, 'timesteps': 3979} +[2026-04-13 12:34:57] UCB=2.3771 mu=0.5079 sigma=0.9346 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.000984749088576101, 'timesteps': 4646} +[2026-04-13 12:34:57] UCB=2.3748 mu=0.6941 sigma=0.8403 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0010994124879848107, 'timesteps': 3297} +[2026-04-13 12:34:57] [AutoResearch] Proposed: {'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0009549126527603771, 'timesteps': 4279, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:34:59] [AutoResearch] Launching trial 4: {'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0009549126527603771, 'timesteps': 4279, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:39:56] [AutoResearch] Trial 4 finished in 297.3s, returncode=0 +[2026-04-13 12:39:56] [AutoResearch] Trial 4: mean_reward=627.6915 std_reward=2.3536 +[2026-04-13 12:39:56] [Champion] 🏆 NEW BEST! Trial 4: mean_reward=627.6915 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0009549126527603771, 'timesteps': 4279, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:39:56] [AutoResearch] === Trial 4 Summary === +[2026-04-13 12:39:56] Total Phase 1 runs: 4 +[2026-04-13 12:39:56] Champion: trial=4 mean_reward=627.6915 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0009549126527603771, 'timesteps': 4279, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:39:56] Top 5: +[2026-04-13 12:39:56] mean_reward=627.6915 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0009549126527603771, 'timesteps': 4279, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:39:56] mean_reward=270.5642 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:39:56] mean_reward=15.5370 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0021131130619403186, 'timesteps': 1622, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:39:56] mean_reward=15.0687 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.002684399373384131, 'timesteps': 1755, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:39:58] +[AutoResearch] ========== Trial 5/50 ========== +[2026-04-13 12:39:58] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 12:39:58] UCB=2.6036 mu=0.9596 sigma=0.8220 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787} +[2026-04-13 12:39:58] UCB=2.6006 mu=1.3117 sigma=0.6445 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0007973784600824807, 'timesteps': 4846} +[2026-04-13 12:39:58] UCB=2.5701 mu=1.4143 sigma=0.5779 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0014200634295944165, 'timesteps': 4764} +[2026-04-13 12:39:58] UCB=2.5683 mu=1.3628 sigma=0.6028 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0016809469142980496, 'timesteps': 4540} +[2026-04-13 12:39:58] UCB=2.5337 mu=1.3776 sigma=0.5780 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.0008758657417255942, 'timesteps': 4642} +[2026-04-13 12:39:58] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:40:00] [AutoResearch] Launching trial 5: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:45:42] [AutoResearch] Trial 5 finished in 342.4s, returncode=0 +[2026-04-13 12:45:42] [AutoResearch] Trial 5: mean_reward=4582.7984 std_reward=0.4849 +[2026-04-13 12:45:43] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:45:43] [AutoResearch] === Trial 5 Summary === +[2026-04-13 12:45:43] Total Phase 1 runs: 5 +[2026-04-13 12:45:43] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:45:43] Top 5: +[2026-04-13 12:45:43] mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:45:43] mean_reward=627.6915 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0009549126527603771, 'timesteps': 4279, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:45:43] mean_reward=270.5642 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:45:43] mean_reward=15.5370 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0021131130619403186, 'timesteps': 1622, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:45:43] mean_reward=15.0687 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.002684399373384131, 'timesteps': 1755, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:45:45] +[AutoResearch] ========== Trial 6/50 ========== +[2026-04-13 12:45:45] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 12:45:45] UCB=3.0560 mu=1.6412 sigma=0.7074 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0005165618383365869, 'timesteps': 4929} +[2026-04-13 12:45:45] UCB=2.9399 mu=2.0094 sigma=0.4652 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0006569923016159191, 'timesteps': 4926} +[2026-04-13 12:45:45] UCB=2.9261 mu=1.2265 sigma=0.8498 params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0011640583789804349, 'timesteps': 4808} +[2026-04-13 12:45:45] UCB=2.9161 mu=1.3094 sigma=0.8034 params={'n_steer': 6, 'n_throttle': 2, 'learning_rate': 0.000817395398052526, 'timesteps': 4272} +[2026-04-13 12:45:45] UCB=2.9056 mu=1.2637 sigma=0.8210 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0018216283484187063, 'timesteps': 4962} +[2026-04-13 12:45:45] [AutoResearch] Proposed: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0005165618383365869, 'timesteps': 4929, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:45:47] [AutoResearch] Launching trial 6: {'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0005165618383365869, 'timesteps': 4929, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:51:14] [AutoResearch] Trial 6 finished in 326.9s, returncode=0 +[2026-04-13 12:51:14] [AutoResearch] Trial 6: mean_reward=454.064 std_reward=2.7272 +[2026-04-13 12:51:14] [AutoResearch] === Trial 6 Summary === +[2026-04-13 12:51:14] Total Phase 1 runs: 6 +[2026-04-13 12:51:14] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:51:14] Top 5: +[2026-04-13 12:51:14] mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:51:14] mean_reward=627.6915 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0009549126527603771, 'timesteps': 4279, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:51:14] mean_reward=454.0640 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0005165618383365869, 'timesteps': 4929, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:51:14] mean_reward=270.5642 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:51:14] mean_reward=15.5370 params={'n_steer': 6, 'n_throttle': 4, 'learning_rate': 0.0021131130619403186, 'timesteps': 1622, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:51:16] +[AutoResearch] ========== Trial 7/50 ========== +[2026-04-13 12:51:16] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 12:51:16] UCB=3.0459 mu=1.3501 sigma=0.8479 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.002103379184926248, 'timesteps': 4607} +[2026-04-13 12:51:16] UCB=3.0426 mu=1.8623 sigma=0.5901 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0014223856509359136, 'timesteps': 4157} +[2026-04-13 12:51:16] UCB=2.9241 mu=1.2240 sigma=0.8500 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0023860629604045636, 'timesteps': 4875} +[2026-04-13 12:51:16] UCB=2.8724 mu=1.3748 sigma=0.7488 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0003305272678070145, 'timesteps': 4528} +[2026-04-13 12:51:16] UCB=2.8000 mu=1.7430 sigma=0.5285 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0008378766646858452, 'timesteps': 4278} +[2026-04-13 12:51:16] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.002103379184926248, 'timesteps': 4607, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:51:18] [AutoResearch] Launching trial 7: {'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.002103379184926248, 'timesteps': 4607, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:56:20] [AutoResearch] Trial 7 finished in 302.9s, returncode=0 +[2026-04-13 12:56:20] [AutoResearch] Trial 7: mean_reward=199.5367 std_reward=2.4906 +[2026-04-13 12:56:20] [AutoResearch] === Trial 7 Summary === +[2026-04-13 12:56:20] Total Phase 1 runs: 7 +[2026-04-13 12:56:20] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:56:20] Top 5: +[2026-04-13 12:56:20] mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:56:20] mean_reward=627.6915 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0009549126527603771, 'timesteps': 4279, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:56:20] mean_reward=454.0640 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0005165618383365869, 'timesteps': 4929, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:56:20] mean_reward=270.5642 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:56:20] mean_reward=199.5367 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.002103379184926248, 'timesteps': 4607, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:56:22] +[AutoResearch] ========== Trial 8/50 ========== +[2026-04-13 12:56:22] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 12:56:22] UCB=3.7155 mu=2.8123 sigma=0.4516 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0003097316245852375, 'timesteps': 4938} +[2026-04-13 12:56:22] UCB=3.6165 mu=2.1871 sigma=0.7147 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.00027774487279327734, 'timesteps': 4741} +[2026-04-13 12:56:22] UCB=3.5817 mu=2.3074 sigma=0.6372 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.00042186231182417467, 'timesteps': 4737} +[2026-04-13 12:56:22] UCB=3.1830 mu=1.7630 sigma=0.7100 params={'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0010392862598823948, 'timesteps': 4340} +[2026-04-13 12:56:22] UCB=2.9801 mu=2.1794 sigma=0.4004 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.001278006004760151, 'timesteps': 4950} +[2026-04-13 12:56:22] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0003097316245852375, 'timesteps': 4938, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 12:56:24] [AutoResearch] Launching trial 8: {'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0003097316245852375, 'timesteps': 4938, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:01:28] [AutoResearch] Trial 8 finished in 303.7s, returncode=0 +[2026-04-13 13:01:28] [AutoResearch] Trial 8: mean_reward=306.1739 std_reward=13.6044 +[2026-04-13 13:01:28] [AutoResearch] === Trial 8 Summary === +[2026-04-13 13:01:28] Total Phase 1 runs: 8 +[2026-04-13 13:01:28] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:01:28] Top 5: +[2026-04-13 13:01:28] mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:01:28] mean_reward=627.6915 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0009549126527603771, 'timesteps': 4279, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:01:28] mean_reward=454.0640 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0005165618383365869, 'timesteps': 4929, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:01:28] mean_reward=306.1739 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0003097316245852375, 'timesteps': 4938, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:01:28] mean_reward=270.5642 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:01:30] +[AutoResearch] ========== Trial 9/50 ========== +[2026-04-13 13:01:30] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 13:01:30] UCB=5.5166 mu=4.1098 sigma=0.7034 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0014813539623020004, 'timesteps': 4054} +[2026-04-13 13:01:30] UCB=5.3730 mu=4.0927 sigma=0.6401 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.000972771458635757, 'timesteps': 3832} +[2026-04-13 13:01:30] UCB=5.0616 mu=3.3272 sigma=0.8672 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0021026667911721994, 'timesteps': 4268} +[2026-04-13 13:01:30] UCB=4.9195 mu=3.4509 sigma=0.7343 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0019399712695168441, 'timesteps': 4823} +[2026-04-13 13:01:30] UCB=4.7893 mu=3.5895 sigma=0.5999 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0013622938331829832, 'timesteps': 4756} +[2026-04-13 13:01:30] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0014813539623020004, 'timesteps': 4054, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:01:32] [AutoResearch] Launching trial 9: {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0014813539623020004, 'timesteps': 4054, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:05:16] [AutoResearch] Trial 9 finished in 223.5s, returncode=0 +[2026-04-13 13:05:16] [AutoResearch] Trial 9: mean_reward=15.5625 std_reward=0.0011 +[2026-04-13 13:05:16] [AutoResearch] === Trial 9 Summary === +[2026-04-13 13:05:16] Total Phase 1 runs: 9 +[2026-04-13 13:05:16] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:05:16] Top 5: +[2026-04-13 13:05:16] mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:05:16] mean_reward=627.6915 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0009549126527603771, 'timesteps': 4279, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:05:16] mean_reward=454.0640 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0005165618383365869, 'timesteps': 4929, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:05:16] mean_reward=306.1739 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0003097316245852375, 'timesteps': 4938, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:05:16] mean_reward=270.5642 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.00014623033103219074, 'timesteps': 3139, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:05:18] +[AutoResearch] ========== Trial 10/50 ========== +[2026-04-13 13:05:18] [AutoResearch] GP UCB top-5 candidates: +[2026-04-13 13:05:18] UCB=3.2755 mu=2.1392 sigma=0.5681 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0010464507674264373, 'timesteps': 4450} +[2026-04-13 13:05:18] UCB=2.6299 mu=1.2561 sigma=0.6869 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0015652851488979931, 'timesteps': 4530} +[2026-04-13 13:05:18] UCB=2.4296 mu=1.2377 sigma=0.5960 params={'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0005256664341026775, 'timesteps': 4983} +[2026-04-13 13:05:18] UCB=2.3451 mu=0.9859 sigma=0.6796 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0005171395498457057, 'timesteps': 4013} +[2026-04-13 13:05:18] UCB=2.3171 mu=1.0474 sigma=0.6348 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0021625928193495006, 'timesteps': 4782} +[2026-04-13 13:05:18] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0010464507674264373, 'timesteps': 4450, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:05:20] [AutoResearch] Launching trial 10: {'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0010464507674264373, 'timesteps': 4450, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:11:06] [AutoResearch] Trial 10 finished in 346.0s, returncode=0 +[2026-04-13 13:11:06] [AutoResearch] Trial 10: mean_reward=682.7352 std_reward=420.9113 +[2026-04-13 13:11:06] [AutoResearch] === Trial 10 Summary === +[2026-04-13 13:11:06] Total Phase 1 runs: 10 +[2026-04-13 13:11:06] Champion: trial=5 mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:11:06] Top 5: +[2026-04-13 13:11:06] mean_reward=4582.7984 params={'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.0006801262090358742, 'timesteps': 4787, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:11:06] mean_reward=682.7352 params={'n_steer': 7, 'n_throttle': 2, 'learning_rate': 0.0010464507674264373, 'timesteps': 4450, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:11:06] mean_reward=627.6915 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0009549126527603771, 'timesteps': 4279, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:11:06] mean_reward=454.0640 params={'n_steer': 6, 'n_throttle': 3, 'learning_rate': 0.0005165618383365869, 'timesteps': 4929, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} +[2026-04-13 13:11:06] mean_reward=306.1739 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0003097316245852375, 'timesteps': 4938, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True} diff --git a/agent/outerloop-results/autoresearch_results_phase1.jsonl b/agent/outerloop-results/autoresearch_results_phase1.jsonl new file mode 100644 index 0000000..87b03d7 --- /dev/null +++ b/agent/outerloop-results/autoresearch_results_phase1.jsonl @@ -0,0 +1,10 @@ +{"trial": 1, "timestamp": "2026-04-13T12:31:19.488414", "params": {"n_steer": 9, "n_throttle": 3, "learning_rate": 0.00014623033103219074, "timesteps": 3139, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 270.5642, "std_reward": 0.1427, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0001/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 200.94285488128662, "reward_hacking_suspected": false} +{"trial": 2, "timestamp": "2026-04-13T12:33:09.872158", "params": {"n_steer": 6, "n_throttle": 4, "learning_rate": 0.0021131130619403186, "timesteps": 1622, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.537, "std_reward": 0.0257, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0002/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 106.36946535110474, "reward_hacking_suspected": false} +{"trial": 3, "timestamp": "2026-04-13T12:34:55.161415", "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 0.002684399373384131, "timesteps": 1755, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.0687, "std_reward": 0.0219, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0003/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 101.27748799324036, "reward_hacking_suspected": false} +{"trial": 4, "timestamp": "2026-04-13T12:39:56.555101", "params": {"n_steer": 9, "n_throttle": 3, "learning_rate": 0.0009549126527603771, "timesteps": 4279, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 627.6915, "std_reward": 2.3536, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0004/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 297.3021275997162, "reward_hacking_suspected": false} +{"trial": 5, "timestamp": "2026-04-13T12:45:43.094106", "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 0.0006801262090358742, "timesteps": 4787, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 4582.7984, "std_reward": 0.4849, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0005/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 342.3742210865021, "reward_hacking_suspected": false} +{"trial": 6, "timestamp": "2026-04-13T12:51:14.032008", "params": {"n_steer": 6, "n_throttle": 3, "learning_rate": 0.0005165618383365869, "timesteps": 4929, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 454.064, "std_reward": 2.7272, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0006/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 326.92552065849304, "reward_hacking_suspected": false} +{"trial": 7, "timestamp": "2026-04-13T12:56:20.922347", "params": {"n_steer": 7, "n_throttle": 4, "learning_rate": 0.002103379184926248, "timesteps": 4607, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 199.5367, "std_reward": 2.4906, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0007/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 302.873496055603, "reward_hacking_suspected": false} +{"trial": 8, "timestamp": "2026-04-13T13:01:28.616838", "params": {"n_steer": 8, "n_throttle": 3, "learning_rate": 0.0003097316245852375, "timesteps": 4938, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 306.1739, "std_reward": 13.6044, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0008/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 303.6810266971588, "reward_hacking_suspected": false} +{"trial": 9, "timestamp": "2026-04-13T13:05:16.112705", "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 0.0014813539623020004, "timesteps": 4054, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 15.5625, "std_reward": 0.0011, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0009/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 223.47979998588562, "reward_hacking_suspected": false} +{"trial": 10, "timestamp": "2026-04-13T13:11:06.106880", "params": {"n_steer": 7, "n_throttle": 2, "learning_rate": 0.0010464507674264373, "timesteps": 4450, "agent": "ppo", "eval_episodes": 3, "reward_shaping": true}, "mean_reward": 682.7352, "std_reward": 420.9113, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/trial-0010/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 345.9794178009033, "reward_hacking_suspected": false}