diff --git a/agent/outerloop-results/autoresearch_phase4_log.txt b/agent/outerloop-results/autoresearch_phase4_log.txt index 091e285..c7df2fa 100644 --- a/agent/outerloop-results/autoresearch_phase4_log.txt +++ b/agent/outerloop-results/autoresearch_phase4_log.txt @@ -149,3 +149,82 @@ [2026-04-15 09:06:26] [Wave4] Proposed params: {'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} [2026-04-15 09:06:28] [Wave4] Launching trial 1: {'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} [2026-04-15 09:06:28] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 80000 --steps-per-switch 6000 --learning-rate 0.0003 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0001 +[2026-04-15 09:17:28] ================================================================= +[2026-04-15 09:17:28] [Wave4] Multi-Track Autoresearch — GP+UCB Generalization Search +[2026-04-15 09:17:28] [Wave4] Training tracks : generated_track, mountain_track (no generated_road, no warm-start) +[2026-04-15 09:17:28] [Wave4] Test tracks : mini_monaco only (zero-shot; warren removed — broken done condition) +[2026-04-15 09:17:28] [Wave4] Max trials : 25 | kappa=2.0 | push every 5 +[2026-04-15 09:17:28] [Wave4] Results file : /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase4.jsonl +[2026-04-15 09:17:28] [Wave4] Champion dir : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-champion +[2026-04-15 09:17:28] [Wave4] Warm start : NONE (training from scratch each trial) +[2026-04-15 09:17:28] ================================================================= +[2026-04-15 09:17:28] [Wave4] Loaded 0 existing Phase 3 results. +[2026-04-15 09:17:28] [Wave4] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-15 09:17:28] [Wave4] Starting from trial 1. +[2026-04-15 09:17:28] +[Wave4] ========== Trial 1/25 ========== +[2026-04-15 09:17:28] [Wave4] Seed trial 1/2: using hardcoded params. +[2026-04-15 09:17:28] [Wave4] Proposed params: {'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 09:17:30] [Wave4] Launching trial 1: {'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 09:17:30] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 80000 --steps-per-switch 6000 --learning-rate 0.0003 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0001 +[2026-04-15 10:33:27] [Wave4] Trial 1 finished in 4557.0s, rc=0 +[2026-04-15 10:33:27] [Wave4] Parsed: combined=42.2964 mini_monaco=42.2964 +[2026-04-15 10:33:27] [Wave4] ===== Trial 1 Summary ===== +[2026-04-15 10:33:27] GP data points : 1 +[2026-04-15 10:33:27] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-15 10:33:27] Top 5: +[2026-04-15 10:33:27] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 10:33:29] +[Wave4] ========== Trial 2/25 ========== +[2026-04-15 10:33:29] [Wave4] Seed trial 2/2: using hardcoded params. +[2026-04-15 10:33:29] [Wave4] Proposed params: {'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 10:33:31] [Wave4] Launching trial 2: {'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 10:33:31] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 80000 --steps-per-switch 6000 --learning-rate 0.001 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0002 +[2026-04-15 11:39:05] [Wave4] Trial 2 finished in 3934.0s, rc=0 +[2026-04-15 11:39:05] [Wave4] Parsed: combined=93.3894 mini_monaco=93.3894 +[2026-04-15 11:39:05] [Wave4] ===== Trial 2 Summary ===== +[2026-04-15 11:39:05] GP data points : 2 +[2026-04-15 11:39:05] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-15 11:39:05] Top 5: +[2026-04-15 11:39:05] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 11:39:05] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 11:39:07] +[Wave4] ========== Trial 3/25 ========== +[2026-04-15 11:39:07] [Wave4] Only 2 results — using random proposal. +[2026-04-15 11:39:07] [Wave4] Proposed params: {'learning_rate': 0.0008162408849407889, 'steps_per_switch': 8441, 'total_timesteps': 140634} +[2026-04-15 11:39:09] [Wave4] Launching trial 3: {'learning_rate': 0.0008162408849407889, 'steps_per_switch': 8441, 'total_timesteps': 140634} +[2026-04-15 11:39:09] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 140634 --steps-per-switch 8441 --learning-rate 0.0008162408849407889 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0003 +[2026-04-15 13:39:32] [Wave4] Trial 3 TIMED OUT — killing runner. +[2026-04-15 13:39:32] [Wave4] Trial 3 finished in 7222.7s, rc=-9 +[2026-04-15 13:39:32] [Wave4] Parsed: combined=None mini_monaco=None +[2026-04-15 13:39:32] [Wave4] ⚠️ No test score parsed — defaulting to 0.0 +[2026-04-15 13:39:32] [Wave4] combined_test_score=0 — excluded from GP (crash/timeout). +[2026-04-15 13:39:32] [Wave4] ===== Trial 3 Summary ===== +[2026-04-15 13:39:32] GP data points : 2 +[2026-04-15 13:39:32] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-15 13:39:32] Top 5: +[2026-04-15 13:39:32] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 13:39:32] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 13:39:34] +[Wave4] ========== Trial 4/25 ========== +[2026-04-15 13:39:34] [Wave4] Only 2 results — using random proposal. +[2026-04-15 13:39:34] [Wave4] Proposed params: {'learning_rate': 0.00020853884350577402, 'steps_per_switch': 19927, 'total_timesteps': 138928} +[2026-04-15 13:39:36] [Wave4] Launching trial 4: {'learning_rate': 0.00020853884350577402, 'steps_per_switch': 19927, 'total_timesteps': 138928} +[2026-04-15 13:39:36] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 138928 --steps-per-switch 19927 --learning-rate 0.00020853884350577402 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0004 +[2026-04-15 15:39:37] [Wave4] Trial 4 TIMED OUT — killing runner. +[2026-04-15 15:39:37] [Wave4] Trial 4 finished in 7200.7s, rc=-9 +[2026-04-15 15:39:37] [Wave4] Parsed: combined=None mini_monaco=None +[2026-04-15 15:39:37] [Wave4] ⚠️ No test score parsed — defaulting to 0.0 +[2026-04-15 15:39:37] [Wave4] combined_test_score=0 — excluded from GP (crash/timeout). +[2026-04-15 15:39:37] [Wave4] ===== Trial 4 Summary ===== +[2026-04-15 15:39:37] GP data points : 2 +[2026-04-15 15:39:37] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-15 15:39:37] Top 5: +[2026-04-15 15:39:37] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 15:39:37] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 15:39:39] +[Wave4] ========== Trial 5/25 ========== +[2026-04-15 15:39:39] [Wave4] Only 2 results — using random proposal. +[2026-04-15 15:39:39] [Wave4] Proposed params: {'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878} +[2026-04-15 15:39:41] [Wave4] Launching trial 5: {'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878} +[2026-04-15 15:39:41] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 104878 --steps-per-switch 9368 --learning-rate 0.0007517877668650138 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0005 diff --git a/agent/outerloop-results/autoresearch_results_phase4.jsonl b/agent/outerloop-results/autoresearch_results_phase4.jsonl index e69de29..6880260 100644 --- a/agent/outerloop-results/autoresearch_results_phase4.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase4.jsonl @@ -0,0 +1,4 @@ +{"trial": 1, "timestamp": "2026-04-15T10:33:27.772991", "params": {"learning_rate": 0.0003, "steps_per_switch": 6000, "total_timesteps": 80000}, "combined_test_score": 42.2964, "mini_monaco_reward": 42.2964, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0001/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4557.003196716309} +{"trial": 2, "timestamp": "2026-04-15T11:39:05.855178", "params": {"learning_rate": 0.001, "steps_per_switch": 6000, "total_timesteps": 80000}, "combined_test_score": 93.3894, "mini_monaco_reward": 93.3894, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0002/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 3934.0482540130615} +{"trial": 3, "timestamp": "2026-04-15T13:39:32.676397", "params": {"learning_rate": 0.0008162408849407889, "steps_per_switch": 8441, "total_timesteps": 140634}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0003/model.zip", "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7222.726921319962} +{"trial": 4, "timestamp": "2026-04-15T15:39:37.385294", "params": {"learning_rate": 0.00020853884350577402, "steps_per_switch": 19927, "total_timesteps": 138928}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": null, "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7200.657395839691} diff --git a/agent/wave4_controller.py b/agent/wave4_controller.py index ed21bbb..6ea8688 100644 --- a/agent/wave4_controller.py +++ b/agent/wave4_controller.py @@ -64,11 +64,12 @@ os.makedirs(CHAMPION_DIR, exist_ok=True) # ---- Hyperparameter search space ---- # Wider LR range for scratch training (no warm-start prior to anchor it). -# More total_timesteps needed — fresh model requires more steps to converge. +# total_timesteps capped at 120k: at ~20 steps/sec, 120k = 100 min, well +# within the 2-hour JOB_TIMEOUT. Trials 3 and 4 timed out at 140k steps. PARAM_SPACE = { 'learning_rate': {'type': 'float', 'min': 1e-4, 'max': 2e-3}, 'steps_per_switch': {'type': 'int', 'min': 3000, 'max': 20000}, - 'total_timesteps': {'type': 'int', 'min': 60000, 'max': 250000}, + 'total_timesteps': {'type': 'int', 'min': 60000, 'max': 120000}, } PARAM_KEYS = list(PARAM_SPACE.keys())