diff --git a/agent/multitrack_runner.py b/agent/multitrack_runner.py index f6a8b86..3680a8f 100644 --- a/agent/multitrack_runner.py +++ b/agent/multitrack_runner.py @@ -290,7 +290,8 @@ def create_or_load_model(env, learning_rate, warm_start_path=None, seed=None): # ---- Training loop ---- -def train_multitrack(model, first_env, total_timesteps, steps_per_switch): +def train_multitrack(model, first_env, total_timesteps, steps_per_switch, + save_dir=None): """ Train PPO across training tracks by round-robin switching every steps_per_switch steps. @@ -542,6 +543,7 @@ def main(): model, env, total_timesteps=args.total_timesteps, steps_per_switch=args.steps_per_switch, + save_dir=save_dir, ) except Exception as e: log(f'[W3 Runner] ❌ Training failed: {e}') diff --git a/agent/outerloop-results/autoresearch_phase2_log.txt b/agent/outerloop-results/autoresearch_phase2_log.txt index ae8c5b9..1e9b0d8 100644 --- a/agent/outerloop-results/autoresearch_phase2_log.txt +++ b/agent/outerloop-results/autoresearch_phase2_log.txt @@ -697,3 +697,16 @@ [2026-04-15 22:26:26] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} [2026-04-15 22:26:26] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} [2026-04-15 22:26:26] [AutoResearch] Only 1 results — using random proposal. +[2026-04-15 22:47:03] [AutoResearch] GP UCB top-5 candidates: +[2026-04-15 22:47:03] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173} +[2026-04-15 22:47:03] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198} +[2026-04-15 22:47:03] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887} +[2026-04-15 22:47:03] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199} +[2026-04-15 22:47:03] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035} +[2026-04-15 22:47:03] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5} +[2026-04-15 22:47:03] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7} +[2026-04-15 22:47:03] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50} +[2026-04-15 22:47:03] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80} +[2026-04-15 22:47:03] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} +[2026-04-15 22:47:03] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} +[2026-04-15 22:47:03] [AutoResearch] Only 1 results — using random proposal. diff --git a/agent/outerloop-results/autoresearch_phase3_log.txt b/agent/outerloop-results/autoresearch_phase3_log.txt index 7bfc5ce..3ac581f 100644 --- a/agent/outerloop-results/autoresearch_phase3_log.txt +++ b/agent/outerloop-results/autoresearch_phase3_log.txt @@ -380,3 +380,8 @@ [2026-04-15 22:26:39] [Wave3] Only 0 results — using random proposal. [2026-04-15 22:26:39] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} [2026-04-15 22:26:39] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} +[2026-04-15 22:47:16] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-15 22:47:16] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-15 22:47:16] [Wave3] Only 0 results — using random proposal. +[2026-04-15 22:47:16] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-15 22:47:16] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} diff --git a/agent/outerloop-results/autoresearch_phase4_log.txt b/agent/outerloop-results/autoresearch_phase4_log.txt index 54ff63d..1903e9e 100644 --- a/agent/outerloop-results/autoresearch_phase4_log.txt +++ b/agent/outerloop-results/autoresearch_phase4_log.txt @@ -364,3 +364,27 @@ [2026-04-15 22:26:54] [Wave4] Proposed params: {'learning_rate': 0.0009434282949002715, 'steps_per_switch': 14966, 'total_timesteps': 83094} [2026-04-15 22:26:56] [Wave4] Launching trial 6: {'learning_rate': 0.0009434282949002715, 'steps_per_switch': 14966, 'total_timesteps': 83094} [2026-04-15 22:26:56] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 83094 --steps-per-switch 14966 --learning-rate 0.0009434282949002715 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006 +[2026-04-15 22:38:49] [Wave4] Trial 6 finished in 712.6s, rc=101 +[2026-04-15 22:38:49] [Wave4] Parsed: combined=None mini_monaco=None +[2026-04-15 22:38:49] [Wave4] ⚠️ No test score parsed — defaulting to 0.0 +[2026-04-15 22:38:49] [Wave4] combined_test_score=0 — excluded from GP (crash/timeout). +[2026-04-15 22:38:49] [Wave4] ===== Trial 6 Summary ===== +[2026-04-15 22:38:49] GP data points : 5 +[2026-04-15 22:38:49] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-15 22:38:49] Top 5: +[2026-04-15 22:38:49] score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000} +[2026-04-15 22:38:49] score=176.67 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532} +[2026-04-15 22:38:49] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 22:38:49] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 22:38:49] score=31.73 params={'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878} +[2026-04-15 22:38:51] +[Wave4] ========== Trial 7/25 ========== +[2026-04-15 22:38:51] [Wave4] GP UCB top-5 proposals: +[2026-04-15 22:38:51] UCB=2.8131 mu=1.4171 σ=0.6980 params={'learning_rate': 0.0004346662414818233, 'steps_per_switch': 14860, 'total_timesteps': 83818} +[2026-04-15 22:38:51] UCB=2.7278 mu=1.2363 σ=0.7458 params={'learning_rate': 0.00020210801580883826, 'steps_per_switch': 14145, 'total_timesteps': 88060} +[2026-04-15 22:38:51] UCB=2.7159 mu=1.0082 σ=0.8538 params={'learning_rate': 0.00012352374581522935, 'steps_per_switch': 14555, 'total_timesteps': 84897} +[2026-04-15 22:38:51] UCB=2.6982 mu=1.4815 σ=0.6084 params={'learning_rate': 0.0010645009917759616, 'steps_per_switch': 14511, 'total_timesteps': 89107} +[2026-04-15 22:38:51] UCB=2.6948 mu=1.1351 σ=0.7798 params={'learning_rate': 0.0012021332143154963, 'steps_per_switch': 14377, 'total_timesteps': 87121} +[2026-04-15 22:38:51] [Wave4] Proposed params: {'learning_rate': 0.0004346662414818233, 'steps_per_switch': 14860, 'total_timesteps': 83818} +[2026-04-15 22:38:53] [Wave4] Launching trial 7: {'learning_rate': 0.0004346662414818233, 'steps_per_switch': 14860, 'total_timesteps': 83818} +[2026-04-15 22:38:53] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 83818 --steps-per-switch 14860 --learning-rate 0.0004346662414818233 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0007 diff --git a/agent/outerloop-results/autoresearch_results_phase4.jsonl b/agent/outerloop-results/autoresearch_results_phase4.jsonl index 3834922..5cd802d 100644 --- a/agent/outerloop-results/autoresearch_results_phase4.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase4.jsonl @@ -3,3 +3,4 @@ {"trial": 5, "timestamp": "2026-04-15T17:08:50.252022", "params": {"learning_rate": 0.0007517877668650138, "steps_per_switch": 9368, "total_timesteps": 104878}, "combined_test_score": 31.73, "mini_monaco_reward": 31.73, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0005/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5348.823204040527} {"trial": 6, "timestamp": "2026-04-15T18:19:30.998330", "params": {"learning_rate": 0.0016223486895735558, "steps_per_switch": 5524, "total_timesteps": 79532}, "combined_test_score": 176.6721, "mini_monaco_reward": 176.6721, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4235.310993909836} {"trial": 0, "timestamp": "2026-04-15T03:15:46", "params": {"learning_rate": 0.0006852550685205609, "steps_per_switch": 17499, "total_timesteps": 90000}, "combined_test_score": 1943.1, "mini_monaco_reward": 1943.1, "model_path": "agent/models/wave4-trial-0003/model.zip", "champion": true, "run_status": "ok_pre_patch", "elapsed_sec": 5766.0, "note": "Pre-exploit-patch overnight run. Model on disk. steps_per_switch capped for GP."} +{"trial": 6, "timestamp": "2026-04-15T22:38:49.383770", "params": {"learning_rate": 0.0009434282949002715, "steps_per_switch": 14966, "total_timesteps": 83094}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006/model.zip", "champion": false, "run_status": "error_rc101", "elapsed_sec": 712.563179731369}