From e61ebc5b386c5124b26a3d2ee63f497a5bb380f7 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Wed, 15 Apr 2026 21:54:50 -0400 Subject: [PATCH] fix: prevent trial timeouts losing all data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes: 1. Lower total_timesteps cap: 120k → 90k Actual throughput is 16 steps/sec (not 20 as estimated). 120k steps = 126 min training + 9 min overhead = 135 min > 2hr limit. 90k steps = 94 min + 8 min overhead = 102 min, safely within limit. 2. Per-segment checkpoint saves in multitrack_runner model.save() called after every segment so the latest weights are always on disk. If the runner is killed (timeout/crash/Ctrl+C), training data is never completely lost. 3. Timeout rescue eval in wave4_controller If JOB_TIMEOUT fires and a checkpoint exists, immediately runs a quick mini_monaco eval on the checkpoint so the trial still produces a GP data point despite the timeout. Agent: pi Tests: 102 passed Tests-Added: 0 TypeScript: N/A --- agent/multitrack_runner.py | 11 ++++ .../autoresearch_phase2_log.txt | 13 +++++ .../autoresearch_phase3_log.txt | 5 ++ .../autoresearch_phase4_log.txt | 57 +++++++++++++++++++ .../autoresearch_results_phase4.jsonl | 2 + agent/wave4_controller.py | 34 +++++++++-- 6 files changed, 117 insertions(+), 5 deletions(-) diff --git a/agent/multitrack_runner.py b/agent/multitrack_runner.py index 4c3f472..f6a8b86 100644 --- a/agent/multitrack_runner.py +++ b/agent/multitrack_runner.py @@ -332,6 +332,17 @@ def train_multitrack(model, first_env, total_timesteps, steps_per_switch): ) steps_done += segment_steps + # --- Checkpoint after every segment --- + # If the trial is killed (timeout/crash) the latest model is always + # on disk so results are never completely lost. + if save_dir: + try: + os.makedirs(save_dir, exist_ok=True) + model.save(os.path.join(save_dir, 'model')) + log(f'[W3 Runner] Checkpoint saved at step {steps_done:,}') + except Exception as e: + log(f'[W3 Runner] WARNING: checkpoint save failed: {e}') + # Quick segment reward estimate — one deterministic episode, # capped at MAX_EVAL_STEPS to prevent non-terminating episodes # (e.g. car driving forever on wide generated_track) inflating the metric. diff --git a/agent/outerloop-results/autoresearch_phase2_log.txt b/agent/outerloop-results/autoresearch_phase2_log.txt index c27af4d..22de6b3 100644 --- a/agent/outerloop-results/autoresearch_phase2_log.txt +++ b/agent/outerloop-results/autoresearch_phase2_log.txt @@ -671,3 +671,16 @@ [2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} [2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} [2026-04-15 09:16:53] [AutoResearch] Only 1 results — using random proposal. +[2026-04-15 21:54:16] [AutoResearch] GP UCB top-5 candidates: +[2026-04-15 21:54:16] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173} +[2026-04-15 21:54:16] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198} +[2026-04-15 21:54:16] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887} +[2026-04-15 21:54:16] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199} +[2026-04-15 21:54:16] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035} +[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5} +[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7} +[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50} +[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80} +[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} +[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} +[2026-04-15 21:54:16] [AutoResearch] Only 1 results — using random proposal. diff --git a/agent/outerloop-results/autoresearch_phase3_log.txt b/agent/outerloop-results/autoresearch_phase3_log.txt index 322d545..2e057ba 100644 --- a/agent/outerloop-results/autoresearch_phase3_log.txt +++ b/agent/outerloop-results/autoresearch_phase3_log.txt @@ -370,3 +370,8 @@ [2026-04-15 09:17:10] [Wave3] Only 0 results — using random proposal. [2026-04-15 09:17:10] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} [2026-04-15 09:17:10] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} +[2026-04-15 21:54:37] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-15 21:54:37] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-15 21:54:37] [Wave3] Only 0 results — using random proposal. +[2026-04-15 21:54:37] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-15 21:54:37] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} diff --git a/agent/outerloop-results/autoresearch_phase4_log.txt b/agent/outerloop-results/autoresearch_phase4_log.txt index de88691..8fad94b 100644 --- a/agent/outerloop-results/autoresearch_phase4_log.txt +++ b/agent/outerloop-results/autoresearch_phase4_log.txt @@ -237,3 +237,60 @@ [2026-04-15 17:08:50] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} [2026-04-15 17:08:50] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} [2026-04-15 17:08:50] score=31.73 params={'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878} +[2026-04-15 17:08:51] [Wave4] ✅ Git push complete after trial 5 +[2026-04-15 17:08:53] +[Wave4] ========== Trial 6/25 ========== +[2026-04-15 17:08:53] [Wave4] GP UCB top-5 proposals: +[2026-04-15 17:08:53] UCB=2.9352 mu=1.3419 σ=0.7966 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532} +[2026-04-15 17:08:53] UCB=2.8442 mu=1.0827 σ=0.8808 params={'learning_rate': 0.0017357820530198068, 'steps_per_switch': 5009, 'total_timesteps': 87890} +[2026-04-15 17:08:53] UCB=2.8258 mu=1.0947 σ=0.8655 params={'learning_rate': 0.001668571948240882, 'steps_per_switch': 4814, 'total_timesteps': 101589} +[2026-04-15 17:08:53] UCB=2.8168 mu=0.9976 σ=0.9096 params={'learning_rate': 0.001788300003253932, 'steps_per_switch': 4215, 'total_timesteps': 80751} +[2026-04-15 17:08:53] UCB=2.8147 mu=1.5860 σ=0.6144 params={'learning_rate': 0.0012975326127189415, 'steps_per_switch': 3961, 'total_timesteps': 100527} +[2026-04-15 17:08:53] [Wave4] Proposed params: {'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532} +[2026-04-15 17:08:55] [Wave4] Launching trial 6: {'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532} +[2026-04-15 17:08:55] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 79532 --steps-per-switch 5524 --learning-rate 0.0016223486895735558 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006 +[2026-04-15 18:19:30] [Wave4] Trial 6 finished in 4235.3s, rc=0 +[2026-04-15 18:19:30] [Wave4] Parsed: combined=176.6721 mini_monaco=176.6721 +[2026-04-15 18:19:31] [Wave4] ===== Trial 6 Summary ===== +[2026-04-15 18:19:31] GP data points : 4 +[2026-04-15 18:19:31] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-15 18:19:31] Top 5: +[2026-04-15 18:19:31] score=176.67 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532} +[2026-04-15 18:19:31] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 18:19:31] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 18:19:31] score=31.73 params={'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878} +[2026-04-15 18:19:33] +[Wave4] ========== Trial 7/25 ========== +[2026-04-15 18:19:33] [Wave4] GP UCB top-5 proposals: +[2026-04-15 18:19:33] UCB=2.5520 mu=1.1319 σ=0.7101 params={'learning_rate': 0.001779556625962812, 'steps_per_switch': 4226, 'total_timesteps': 123651} +[2026-04-15 18:19:33] UCB=2.5115 mu=0.7590 σ=0.8763 params={'learning_rate': 0.0017725367196782225, 'steps_per_switch': 3941, 'total_timesteps': 145375} +[2026-04-15 18:19:33] UCB=2.4988 mu=1.1176 σ=0.6906 params={'learning_rate': 0.0018568268138302447, 'steps_per_switch': 6910, 'total_timesteps': 119000} +[2026-04-15 18:19:33] UCB=2.4973 mu=0.8571 σ=0.8201 params={'learning_rate': 0.0019597767383017994, 'steps_per_switch': 9245, 'total_timesteps': 113699} +[2026-04-15 18:19:33] UCB=2.4874 mu=1.2498 σ=0.6188 params={'learning_rate': 0.001739834862935009, 'steps_per_switch': 4764, 'total_timesteps': 117423} +[2026-04-15 18:19:33] [Wave4] Proposed params: {'learning_rate': 0.001779556625962812, 'steps_per_switch': 4226, 'total_timesteps': 123651} +[2026-04-15 18:19:35] [Wave4] Launching trial 7: {'learning_rate': 0.001779556625962812, 'steps_per_switch': 4226, 'total_timesteps': 123651} +[2026-04-15 18:19:35] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 123651 --steps-per-switch 4226 --learning-rate 0.001779556625962812 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0007 +[2026-04-15 20:19:38] [Wave4] Trial 7 TIMED OUT — killing runner. +[2026-04-15 20:19:38] [Wave4] Trial 7 finished in 7203.4s, rc=-9 +[2026-04-15 20:19:38] [Wave4] Parsed: combined=None mini_monaco=None +[2026-04-15 20:19:38] [Wave4] ⚠️ No test score parsed — defaulting to 0.0 +[2026-04-15 20:19:38] [Wave4] combined_test_score=0 — excluded from GP (crash/timeout). +[2026-04-15 20:19:38] [Wave4] ===== Trial 7 Summary ===== +[2026-04-15 20:19:38] GP data points : 4 +[2026-04-15 20:19:38] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-15 20:19:38] Top 5: +[2026-04-15 20:19:38] score=176.67 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532} +[2026-04-15 20:19:38] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 20:19:38] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 20:19:38] score=31.73 params={'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878} +[2026-04-15 20:19:40] +[Wave4] ========== Trial 8/25 ========== +[2026-04-15 20:19:40] [Wave4] GP UCB top-5 proposals: +[2026-04-15 20:19:40] UCB=2.5481 mu=1.2081 σ=0.6700 params={'learning_rate': 0.0019521225364821895, 'steps_per_switch': 4185, 'total_timesteps': 110996} +[2026-04-15 20:19:40] UCB=2.5357 mu=0.8887 σ=0.8235 params={'learning_rate': 0.001901474046587741, 'steps_per_switch': 5568, 'total_timesteps': 136179} +[2026-04-15 20:19:40] UCB=2.5188 mu=1.1408 σ=0.6890 params={'learning_rate': 0.0018359364507444984, 'steps_per_switch': 5746, 'total_timesteps': 122356} +[2026-04-15 20:19:40] UCB=2.4994 mu=0.7140 σ=0.8927 params={'learning_rate': 0.0019730433529852004, 'steps_per_switch': 4578, 'total_timesteps': 144668} +[2026-04-15 20:19:40] UCB=2.4934 mu=1.4169 σ=0.5382 params={'learning_rate': 0.0018644207617691767, 'steps_per_switch': 3071, 'total_timesteps': 75515} +[2026-04-15 20:19:40] [Wave4] Proposed params: {'learning_rate': 0.0019521225364821895, 'steps_per_switch': 4185, 'total_timesteps': 110996} +[2026-04-15 20:19:42] [Wave4] Launching trial 8: {'learning_rate': 0.0019521225364821895, 'steps_per_switch': 4185, 'total_timesteps': 110996} +[2026-04-15 20:19:42] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 110996 --steps-per-switch 4185 --learning-rate 0.0019521225364821895 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0008 diff --git a/agent/outerloop-results/autoresearch_results_phase4.jsonl b/agent/outerloop-results/autoresearch_results_phase4.jsonl index 10522b7..213e0c1 100644 --- a/agent/outerloop-results/autoresearch_results_phase4.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase4.jsonl @@ -3,3 +3,5 @@ {"trial": 3, "timestamp": "2026-04-15T13:39:32.676397", "params": {"learning_rate": 0.0008162408849407889, "steps_per_switch": 8441, "total_timesteps": 140634}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0003/model.zip", "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7222.726921319962} {"trial": 4, "timestamp": "2026-04-15T15:39:37.385294", "params": {"learning_rate": 0.00020853884350577402, "steps_per_switch": 19927, "total_timesteps": 138928}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": null, "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7200.657395839691} {"trial": 5, "timestamp": "2026-04-15T17:08:50.252022", "params": {"learning_rate": 0.0007517877668650138, "steps_per_switch": 9368, "total_timesteps": 104878}, "combined_test_score": 31.73, "mini_monaco_reward": 31.73, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0005/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5348.823204040527} +{"trial": 6, "timestamp": "2026-04-15T18:19:30.998330", "params": {"learning_rate": 0.0016223486895735558, "steps_per_switch": 5524, "total_timesteps": 79532}, "combined_test_score": 176.6721, "mini_monaco_reward": 176.6721, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4235.310993909836} +{"trial": 7, "timestamp": "2026-04-15T20:19:38.497310", "params": {"learning_rate": 0.001779556625962812, "steps_per_switch": 4226, "total_timesteps": 123651}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": null, "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7203.425666332245} diff --git a/agent/wave4_controller.py b/agent/wave4_controller.py index 6ea8688..6f5cfbe 100644 --- a/agent/wave4_controller.py +++ b/agent/wave4_controller.py @@ -63,13 +63,14 @@ os.makedirs(MODELS_DIR, exist_ok=True) os.makedirs(CHAMPION_DIR, exist_ok=True) # ---- Hyperparameter search space ---- -# Wider LR range for scratch training (no warm-start prior to anchor it). -# total_timesteps capped at 120k: at ~20 steps/sec, 120k = 100 min, well -# within the 2-hour JOB_TIMEOUT. Trials 3 and 4 timed out at 140k steps. +# total_timesteps capped at 90k. Actual throughput is ~16 steps/sec +# (mountain_track physics is heavier than estimated): +# 90k steps = 94 min training + ~8 min overhead = ~102 min < 2hr limit +# Previous cap of 120k caused timeouts (120k = 126 min training alone). PARAM_SPACE = { 'learning_rate': {'type': 'float', 'min': 1e-4, 'max': 2e-3}, - 'steps_per_switch': {'type': 'int', 'min': 3000, 'max': 20000}, - 'total_timesteps': {'type': 'int', 'min': 60000, 'max': 120000}, + 'steps_per_switch': {'type': 'int', 'min': 3000, 'max': 15000}, + 'total_timesteps': {'type': 'int', 'min': 60000, 'max': 90000}, } PARAM_KEYS = list(PARAM_SPACE.keys()) @@ -366,6 +367,29 @@ def launch_trial(params, trial_num): proc.kill() log(f'[Wave4] Trial {trial_num} TIMED OUT — killing runner.') output_lines.append(f'[TIMEOUT after {JOB_TIMEOUT}s]') + # Rescue: if a checkpoint was saved mid-training, run a quick + # mini_monaco eval so the trial produces SOME GP data. + ckpt = os.path.join(save_dir, 'model.zip') + if os.path.exists(ckpt): + log(f'[Wave4] Timeout rescue: checkpoint found, running mini_monaco eval...') + rescue_cmd = [ + 'python3', RUNNER, + '--total-timesteps', '1', # skip training + '--steps-per-switch', '1', + '--learning-rate', str(float(params['learning_rate'])), + '--eval-episodes', '3', + '--save-dir', save_dir, + '--warm-start', ckpt, + ] + try: + r = subprocess.run(rescue_cmd, capture_output=True, + text=True, timeout=600) + rescue_out = r.stdout + '\n' + r.stderr + print(rescue_out[-2000:], flush=True) + output_lines.append(rescue_out) + log(f'[Wave4] Rescue eval complete.') + except Exception as re: + log(f'[Wave4] Rescue eval failed: {re}') break proc.wait()