From 298cd1790a1089d27e16e51bc06ecc6c3553114b Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Tue, 14 Apr 2026 20:37:48 -0400 Subject: [PATCH] =?UTF-8?q?fix:=20LR=20override=20was=20not=20reaching=20t?= =?UTF-8?q?he=20optimizer=20=E2=80=94=20all=20trials=20ran=20at=200.000225?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PPO.load() restores the saved optimizer state (lr=0.000225 from Phase 2 champion). Setting model.learning_rate alone is insufficient because _update_learning_rate() may not fire before the first gradient step, and the optimizer's param_groups still hold the old value. Fix: after PPO.load(), explicitly set lr on every optimizer param_group: model.learning_rate = lr for pg in model.policy.optimizer.param_groups: pg['lr'] = lr Impact: all 8 previous Wave 3 trials actually trained at LR=0.000225 regardless of GP proposal. Results archived as: autoresearch_results_phase3_CONTAMINATED_wrong_lr.jsonl Phase 3 results cleared; autoresearch restarting from scratch. Agent: pi Tests: 83 passed Tests-Added: 0 TypeScript: N/A --- agent/multitrack_runner.py | 11 ++- .../autoresearch_phase2_log.txt | 13 +++ .../autoresearch_phase3_log.txt | 83 +++++++++++++++++++ .../autoresearch_results_phase3.jsonl | 5 -- ...results_phase3_CONTAMINATED_wrong_lr.jsonl | 8 ++ 5 files changed, 113 insertions(+), 7 deletions(-) create mode 100644 agent/outerloop-results/autoresearch_results_phase3_CONTAMINATED_wrong_lr.jsonl diff --git a/agent/multitrack_runner.py b/agent/multitrack_runner.py index f20bd54..bb821ce 100644 --- a/agent/multitrack_runner.py +++ b/agent/multitrack_runner.py @@ -202,9 +202,16 @@ def create_or_load_model(env, learning_rate, warm_start_path=None, seed=None): log(f'[W3 Runner] Loading warm-start model from {warm_start_path}') try: model = PPO.load(warm_start_path, env=env, device='auto') - # Override learning rate — we may be exploring different LRs + # Override learning rate — set BOTH the SB3 attribute AND the + # optimizer param groups. PPO.load() restores the saved optimizer + # state (lr=0.000225 from Phase 2), so just setting model.learning_rate + # is not enough — the optimizer ignores it until _update_learning_rate + # is called, and even then only if it reads the attribute correctly. model.learning_rate = learning_rate - log(f'[W3 Runner] ✅ Warm start loaded. LR overridden to {learning_rate:.6f}') + for pg in model.policy.optimizer.param_groups: + pg['lr'] = learning_rate + log(f'[W3 Runner] ✅ Warm start loaded. LR overridden to {learning_rate:.6f} ' + f'(set on model + {len(model.policy.optimizer.param_groups)} optimizer param group(s))') return model except Exception as e: log(f'[W3 Runner] ⚠️ Warm start failed ({e}), training from scratch.') diff --git a/agent/outerloop-results/autoresearch_phase2_log.txt b/agent/outerloop-results/autoresearch_phase2_log.txt index be01753..7ae7878 100644 --- a/agent/outerloop-results/autoresearch_phase2_log.txt +++ b/agent/outerloop-results/autoresearch_phase2_log.txt @@ -554,3 +554,16 @@ [2026-04-14 13:47:13] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} [2026-04-14 13:47:13] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} [2026-04-14 13:47:13] [AutoResearch] Only 1 results — using random proposal. +[2026-04-14 20:37:35] [AutoResearch] GP UCB top-5 candidates: +[2026-04-14 20:37:35] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173} +[2026-04-14 20:37:35] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198} +[2026-04-14 20:37:35] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887} +[2026-04-14 20:37:35] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199} +[2026-04-14 20:37:35] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035} +[2026-04-14 20:37:35] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5} +[2026-04-14 20:37:35] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7} +[2026-04-14 20:37:35] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50} +[2026-04-14 20:37:35] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80} +[2026-04-14 20:37:35] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} +[2026-04-14 20:37:35] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} +[2026-04-14 20:37:35] [AutoResearch] Only 1 results — using random proposal. diff --git a/agent/outerloop-results/autoresearch_phase3_log.txt b/agent/outerloop-results/autoresearch_phase3_log.txt index 819b70b..45d5da0 100644 --- a/agent/outerloop-results/autoresearch_phase3_log.txt +++ b/agent/outerloop-results/autoresearch_phase3_log.txt @@ -198,3 +198,86 @@ [2026-04-14 18:22:44] score=27.64 params={'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040} [2026-04-14 18:22:44] score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} [2026-04-14 18:22:44] score=14.61 params={'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000} +[2026-04-14 18:22:46] [Wave3] ✅ Git push complete after trial 5 +[2026-04-14 18:22:48] +[Wave3] ========== Trial 6/25 ========== +[2026-04-14 18:22:48] [Wave3] GP UCB top-5 proposals: +[2026-04-14 18:22:48] UCB=3.0500 mu=1.6257 σ=0.7122 params={'learning_rate': 0.0009335007740897103, 'steps_per_switch': 10939, 'total_timesteps': 37842} +[2026-04-14 18:22:48] UCB=3.0271 mu=1.6319 σ=0.6976 params={'learning_rate': 0.0009018551117370809, 'steps_per_switch': 10744, 'total_timesteps': 44030} +[2026-04-14 18:22:48] UCB=3.0176 mu=1.5762 σ=0.7207 params={'learning_rate': 0.000895413146419593, 'steps_per_switch': 10921, 'total_timesteps': 44444} +[2026-04-14 18:22:48] UCB=3.0100 mu=1.7115 σ=0.6493 params={'learning_rate': 0.0009264384396182288, 'steps_per_switch': 9176, 'total_timesteps': 53393} +[2026-04-14 18:22:48] UCB=2.9999 mu=1.4293 σ=0.7853 params={'learning_rate': 0.0009071884060283581, 'steps_per_switch': 11683, 'total_timesteps': 32934} +[2026-04-14 18:22:48] [Wave3] Proposed params: {'learning_rate': 0.0009335007740897103, 'steps_per_switch': 10939, 'total_timesteps': 37842} +[2026-04-14 18:22:50] [Wave3] Launching trial 6: {'learning_rate': 0.0009335007740897103, 'steps_per_switch': 10939, 'total_timesteps': 37842} +[2026-04-14 18:22:50] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 37842 --steps-per-switch 10939 --learning-rate 0.0009335007740897103 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0006 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 18:54:45] [Wave3] Trial 6 finished in 1915.4s, rc=0 +[2026-04-14 18:54:45] [Wave3] Parsed: combined=136.0329 mini_monaco=136.0329 +[2026-04-14 18:54:45] [Wave3] ===== Trial 6 Summary ===== +[2026-04-14 18:54:45] GP data points : 6 +[2026-04-14 18:54:45] Wave3 Champion: trial=5 score=137.58 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 18:54:45] Top 5: +[2026-04-14 18:54:45] score=137.58 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 18:54:45] score=136.03 params={'learning_rate': 0.0009335007740897103, 'steps_per_switch': 10939, 'total_timesteps': 37842} +[2026-04-14 18:54:45] score=28.09 params={'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110} +[2026-04-14 18:54:45] score=27.64 params={'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040} +[2026-04-14 18:54:45] score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 18:54:47] +[Wave3] ========== Trial 7/25 ========== +[2026-04-14 18:54:47] [Wave3] GP UCB top-5 proposals: +[2026-04-14 18:54:47] UCB=2.5695 mu=1.0434 σ=0.7630 params={'learning_rate': 0.0009571853094473745, 'steps_per_switch': 8279, 'total_timesteps': 65149} +[2026-04-14 18:54:47] UCB=2.4957 mu=0.8626 σ=0.8166 params={'learning_rate': 0.0008847315458885072, 'steps_per_switch': 7739, 'total_timesteps': 69794} +[2026-04-14 18:54:47] UCB=2.4939 mu=0.8390 σ=0.8274 params={'learning_rate': 0.0008771680507406297, 'steps_per_switch': 8334, 'total_timesteps': 71955} +[2026-04-14 18:54:47] UCB=2.4561 mu=0.9048 σ=0.7756 params={'learning_rate': 0.0008638688741688625, 'steps_per_switch': 9406, 'total_timesteps': 69536} +[2026-04-14 18:54:47] UCB=2.4282 mu=0.7057 σ=0.8612 params={'learning_rate': 0.0008410317919632058, 'steps_per_switch': 8157, 'total_timesteps': 74924} +[2026-04-14 18:54:47] [Wave3] Proposed params: {'learning_rate': 0.0009571853094473745, 'steps_per_switch': 8279, 'total_timesteps': 65149} +[2026-04-14 18:54:49] [Wave3] Launching trial 7: {'learning_rate': 0.0009571853094473745, 'steps_per_switch': 8279, 'total_timesteps': 65149} +[2026-04-14 18:54:49] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 65149 --steps-per-switch 8279 --learning-rate 0.0009571853094473745 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0007 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 19:58:11] [Wave3] Trial 7 finished in 3801.5s, rc=0 +[2026-04-14 19:58:11] [Wave3] Parsed: combined=92.5138 mini_monaco=92.5138 +[2026-04-14 19:58:11] [Wave3] ===== Trial 7 Summary ===== +[2026-04-14 19:58:11] GP data points : 7 +[2026-04-14 19:58:11] Wave3 Champion: trial=5 score=137.58 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 19:58:11] Top 5: +[2026-04-14 19:58:11] score=137.58 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 19:58:11] score=136.03 params={'learning_rate': 0.0009335007740897103, 'steps_per_switch': 10939, 'total_timesteps': 37842} +[2026-04-14 19:58:11] score=92.51 params={'learning_rate': 0.0009571853094473745, 'steps_per_switch': 8279, 'total_timesteps': 65149} +[2026-04-14 19:58:11] score=28.09 params={'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110} +[2026-04-14 19:58:11] score=27.64 params={'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040} +[2026-04-14 19:58:13] +[Wave3] ========== Trial 8/25 ========== +[2026-04-14 19:58:13] [Wave3] GP UCB top-5 proposals: +[2026-04-14 19:58:13] UCB=2.2244 mu=0.8924 σ=0.6660 params={'learning_rate': 0.0007166192676486139, 'steps_per_switch': 12263, 'total_timesteps': 34585} +[2026-04-14 19:58:13] UCB=2.1788 mu=0.7741 σ=0.7023 params={'learning_rate': 0.00070734628199524, 'steps_per_switch': 12454, 'total_timesteps': 39980} +[2026-04-14 19:58:13] UCB=2.1650 mu=0.7614 σ=0.7018 params={'learning_rate': 0.0007126709588457712, 'steps_per_switch': 12502, 'total_timesteps': 41585} +[2026-04-14 19:58:13] UCB=2.1630 mu=0.7027 σ=0.7301 params={'learning_rate': 0.0006962524520444611, 'steps_per_switch': 12551, 'total_timesteps': 41731} +[2026-04-14 19:58:13] UCB=2.1228 mu=1.4002 σ=0.3613 params={'learning_rate': 0.0009722139517771988, 'steps_per_switch': 7979, 'total_timesteps': 36859} +[2026-04-14 19:58:13] [Wave3] Proposed params: {'learning_rate': 0.0007166192676486139, 'steps_per_switch': 12263, 'total_timesteps': 34585} +[2026-04-14 19:58:15] [Wave3] Launching trial 8: {'learning_rate': 0.0007166192676486139, 'steps_per_switch': 12263, 'total_timesteps': 34585} +[2026-04-14 19:58:15] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 34585 --steps-per-switch 12263 --learning-rate 0.0007166192676486139 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0008 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 20:23:11] [Wave3] Trial 8 finished in 1495.6s, rc=0 +[2026-04-14 20:23:11] [Wave3] Parsed: combined=108.3697 mini_monaco=108.3697 +[2026-04-14 20:23:11] [Wave3] ===== Trial 8 Summary ===== +[2026-04-14 20:23:11] GP data points : 8 +[2026-04-14 20:23:11] Wave3 Champion: trial=5 score=137.58 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 20:23:11] Top 5: +[2026-04-14 20:23:11] score=137.58 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 20:23:11] score=136.03 params={'learning_rate': 0.0009335007740897103, 'steps_per_switch': 10939, 'total_timesteps': 37842} +[2026-04-14 20:23:11] score=108.37 params={'learning_rate': 0.0007166192676486139, 'steps_per_switch': 12263, 'total_timesteps': 34585} +[2026-04-14 20:23:11] score=92.51 params={'learning_rate': 0.0009571853094473745, 'steps_per_switch': 8279, 'total_timesteps': 65149} +[2026-04-14 20:23:11] score=28.09 params={'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110} +[2026-04-14 20:23:13] +[Wave3] ========== Trial 9/25 ========== +[2026-04-14 20:23:13] [Wave3] GP UCB top-5 proposals: +[2026-04-14 20:23:13] UCB=2.1683 mu=1.5130 σ=0.3277 params={'learning_rate': 0.0009737883579372665, 'steps_per_switch': 8510, 'total_timesteps': 30923} +[2026-04-14 20:23:13] UCB=1.9762 mu=-0.0238 σ=1.0000 params={'learning_rate': 0.0009868738288938224, 'steps_per_switch': 13481, 'total_timesteps': 149987} +[2026-04-14 20:23:13] UCB=1.9705 mu=-0.0295 σ=1.0000 params={'learning_rate': 0.0008613697541025559, 'steps_per_switch': 14439, 'total_timesteps': 144933} +[2026-04-14 20:23:13] UCB=1.9690 mu=-0.0310 σ=1.0000 params={'learning_rate': 0.0008421371980014351, 'steps_per_switch': 14662, 'total_timesteps': 143365} +[2026-04-14 20:23:13] UCB=1.9688 mu=-0.0317 σ=1.0003 params={'learning_rate': 0.0006318576844029589, 'steps_per_switch': 2183, 'total_timesteps': 145839} +[2026-04-14 20:23:13] [Wave3] Proposed params: {'learning_rate': 0.0009737883579372665, 'steps_per_switch': 8510, 'total_timesteps': 30923} +[2026-04-14 20:23:15] [Wave3] Launching trial 9: {'learning_rate': 0.0009737883579372665, 'steps_per_switch': 8510, 'total_timesteps': 30923} +[2026-04-14 20:23:15] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 30923 --steps-per-switch 8510 --learning-rate 0.0009737883579372665 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0009 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 20:37:40] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 20:37:40] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 20:37:40] [Wave3] Only 0 results — using random proposal. +[2026-04-14 20:37:40] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-14 20:37:40] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} diff --git a/agent/outerloop-results/autoresearch_results_phase3.jsonl b/agent/outerloop-results/autoresearch_results_phase3.jsonl index 1b2f6f3..e69de29 100644 --- a/agent/outerloop-results/autoresearch_results_phase3.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase3.jsonl @@ -1,5 +0,0 @@ -{"trial": 1, "timestamp": "2026-04-14T14:34:25.739734", "params": {"learning_rate": 0.000225, "steps_per_switch": 5000, "total_timesteps": 45000}, "combined_test_score": 24.7695, "mini_monaco_reward": 24.7695, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2808.679010629654} -{"trial": 2, "timestamp": "2026-04-14T16:33:15.893806", "params": {"learning_rate": 0.000225, "steps_per_switch": 10000, "total_timesteps": 90000}, "combined_test_score": 14.61, "mini_monaco_reward": 14.61, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0002/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4797.378093481064} -{"trial": 3, "timestamp": "2026-04-14T17:07:09.327044", "params": {"learning_rate": 0.0004302041414294587, "steps_per_switch": 6993, "total_timesteps": 31040}, "combined_test_score": 27.6387, "mini_monaco_reward": 27.6387, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0003/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2028.9967634677887} -{"trial": 4, "timestamp": "2026-04-14T17:52:56.249319", "params": {"learning_rate": 0.0006723430224246657, "steps_per_switch": 2400, "total_timesteps": 31110}, "combined_test_score": 28.0934, "mini_monaco_reward": 28.0934, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0004/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2742.52951335907} -{"trial": 5, "timestamp": "2026-04-14T18:22:44.803514", "params": {"learning_rate": 0.0008293130840877947, "steps_per_switch": 7847, "total_timesteps": 31625}, "combined_test_score": 137.5814, "mini_monaco_reward": 137.5814, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0005/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 1784.4159185886383} diff --git a/agent/outerloop-results/autoresearch_results_phase3_CONTAMINATED_wrong_lr.jsonl b/agent/outerloop-results/autoresearch_results_phase3_CONTAMINATED_wrong_lr.jsonl new file mode 100644 index 0000000..39defd6 --- /dev/null +++ b/agent/outerloop-results/autoresearch_results_phase3_CONTAMINATED_wrong_lr.jsonl @@ -0,0 +1,8 @@ +{"trial": 1, "timestamp": "2026-04-14T14:34:25.739734", "params": {"learning_rate": 0.000225, "steps_per_switch": 5000, "total_timesteps": 45000}, "combined_test_score": 24.7695, "mini_monaco_reward": 24.7695, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2808.679010629654} +{"trial": 2, "timestamp": "2026-04-14T16:33:15.893806", "params": {"learning_rate": 0.000225, "steps_per_switch": 10000, "total_timesteps": 90000}, "combined_test_score": 14.61, "mini_monaco_reward": 14.61, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0002/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4797.378093481064} +{"trial": 3, "timestamp": "2026-04-14T17:07:09.327044", "params": {"learning_rate": 0.0004302041414294587, "steps_per_switch": 6993, "total_timesteps": 31040}, "combined_test_score": 27.6387, "mini_monaco_reward": 27.6387, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0003/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2028.9967634677887} +{"trial": 4, "timestamp": "2026-04-14T17:52:56.249319", "params": {"learning_rate": 0.0006723430224246657, "steps_per_switch": 2400, "total_timesteps": 31110}, "combined_test_score": 28.0934, "mini_monaco_reward": 28.0934, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0004/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2742.52951335907} +{"trial": 5, "timestamp": "2026-04-14T18:22:44.803514", "params": {"learning_rate": 0.0008293130840877947, "steps_per_switch": 7847, "total_timesteps": 31625}, "combined_test_score": 137.5814, "mini_monaco_reward": 137.5814, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0005/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 1784.4159185886383} +{"trial": 6, "timestamp": "2026-04-14T18:54:45.786650", "params": {"learning_rate": 0.0009335007740897103, "steps_per_switch": 10939, "total_timesteps": 37842}, "combined_test_score": 136.0329, "mini_monaco_reward": 136.0329, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0006/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 1915.403836965561} +{"trial": 7, "timestamp": "2026-04-14T19:58:11.347899", "params": {"learning_rate": 0.0009571853094473745, "steps_per_switch": 8279, "total_timesteps": 65149}, "combined_test_score": 92.5138, "mini_monaco_reward": 92.5138, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0007/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 3801.5387511253357} +{"trial": 8, "timestamp": "2026-04-14T20:23:11.017295", "params": {"learning_rate": 0.0007166192676486139, "steps_per_switch": 12263, "total_timesteps": 34585}, "combined_test_score": 108.3697, "mini_monaco_reward": 108.3697, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0008/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 1495.6480858325958}