From e61ebc5b386c5124b26a3d2ee63f497a5bb380f7 Mon Sep 17 00:00:00 2001
From: Paul Huliganga <paje0101@gmail.com>
Date: Wed, 15 Apr 2026 21:54:50 -0400
Subject: [PATCH] fix: prevent trial timeouts losing all data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two changes:

1. Lower total_timesteps cap: 120k → 90k
   Actual throughput is 16 steps/sec (not 20 as estimated).
   120k steps = 126 min training + 9 min overhead = 135 min > 2hr limit.
   90k steps = 94 min + 8 min overhead = 102 min, safely within limit.

2. Per-segment checkpoint saves in multitrack_runner
   model.save() called after every segment so the latest weights are
   always on disk.  If the runner is killed (timeout/crash/Ctrl+C),
   training data is never completely lost.

3. Timeout rescue eval in wave4_controller
   If JOB_TIMEOUT fires and a checkpoint exists, immediately runs a
   quick mini_monaco eval on the checkpoint so the trial still produces
   a GP data point despite the timeout.

Agent: pi
Tests: 102 passed
Tests-Added: 0
TypeScript: N/A
---
 agent/multitrack_runner.py                    | 11 ++++
 .../autoresearch_phase2_log.txt               | 13 +++++
 .../autoresearch_phase3_log.txt               |  5 ++
 .../autoresearch_phase4_log.txt               | 57 +++++++++++++++++++
 .../autoresearch_results_phase4.jsonl         |  2 +
 agent/wave4_controller.py                     | 34 +++++++++--
 6 files changed, 117 insertions(+), 5 deletions(-)

diff --git a/agent/multitrack_runner.py b/agent/multitrack_runner.py
index 4c3f472..f6a8b86 100644
--- a/agent/multitrack_runner.py
+++ b/agent/multitrack_runner.py
@@ -332,6 +332,17 @@ def train_multitrack(model, first_env, total_timesteps, steps_per_switch):
         )
         steps_done += segment_steps
 
+        # --- Checkpoint after every segment ---
+        # If the trial is killed (timeout/crash) the latest model is always
+        # on disk so results are never completely lost.
+        if save_dir:
+            try:
+                os.makedirs(save_dir, exist_ok=True)
+                model.save(os.path.join(save_dir, 'model'))
+                log(f'[W3 Runner] Checkpoint saved at step {steps_done:,}')
+            except Exception as e:
+                log(f'[W3 Runner] WARNING: checkpoint save failed: {e}')
+
         # Quick segment reward estimate — one deterministic episode,
         # capped at MAX_EVAL_STEPS to prevent non-terminating episodes
         # (e.g. car driving forever on wide generated_track) inflating the metric.
diff --git a/agent/outerloop-results/autoresearch_phase2_log.txt b/agent/outerloop-results/autoresearch_phase2_log.txt
index c27af4d..22de6b3 100644
--- a/agent/outerloop-results/autoresearch_phase2_log.txt
+++ b/agent/outerloop-results/autoresearch_phase2_log.txt
@@ -671,3 +671,16 @@
 [2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
 [2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
 [2026-04-15 09:16:53] [AutoResearch] Only 1 results — using random proposal.
+[2026-04-15 21:54:16] [AutoResearch] GP UCB top-5 candidates:
+[2026-04-15 21:54:16]   UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
+[2026-04-15 21:54:16]   UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
+[2026-04-15 21:54:16]   UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
+[2026-04-15 21:54:16]   UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
+[2026-04-15 21:54:16]   UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
+[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
+[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
+[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
+[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
+[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
+[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
+[2026-04-15 21:54:16] [AutoResearch] Only 1 results — using random proposal.
diff --git a/agent/outerloop-results/autoresearch_phase3_log.txt b/agent/outerloop-results/autoresearch_phase3_log.txt
index 322d545..2e057ba 100644
--- a/agent/outerloop-results/autoresearch_phase3_log.txt
+++ b/agent/outerloop-results/autoresearch_phase3_log.txt
@@ -370,3 +370,8 @@
 [2026-04-15 09:17:10] [Wave3] Only 0 results — using random proposal.
 [2026-04-15 09:17:10] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
 [2026-04-15 09:17:10] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
+[2026-04-15 21:54:37] [Wave3] Seed trial 1/2: using hardcoded params.
+[2026-04-15 21:54:37] [Wave3] Seed trial 2/2: using hardcoded params.
+[2026-04-15 21:54:37] [Wave3] Only 0 results — using random proposal.
+[2026-04-15 21:54:37] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
+[2026-04-15 21:54:37] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
diff --git a/agent/outerloop-results/autoresearch_phase4_log.txt b/agent/outerloop-results/autoresearch_phase4_log.txt
index de88691..8fad94b 100644
--- a/agent/outerloop-results/autoresearch_phase4_log.txt
+++ b/agent/outerloop-results/autoresearch_phase4_log.txt
@@ -237,3 +237,60 @@
 [2026-04-15 17:08:50]     score=93.39  params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000}
 [2026-04-15 17:08:50]     score=42.30  params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000}
 [2026-04-15 17:08:50]     score=31.73  params={'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878}
+[2026-04-15 17:08:51] [Wave4] ✅ Git push complete after trial 5
+[2026-04-15 17:08:53] 
+[Wave4] ========== Trial 6/25 ==========
+[2026-04-15 17:08:53] [Wave4] GP UCB top-5 proposals:
+[2026-04-15 17:08:53]   UCB=2.9352 mu=1.3419 σ=0.7966 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
+[2026-04-15 17:08:53]   UCB=2.8442 mu=1.0827 σ=0.8808 params={'learning_rate': 0.0017357820530198068, 'steps_per_switch': 5009, 'total_timesteps': 87890}
+[2026-04-15 17:08:53]   UCB=2.8258 mu=1.0947 σ=0.8655 params={'learning_rate': 0.001668571948240882, 'steps_per_switch': 4814, 'total_timesteps': 101589}
+[2026-04-15 17:08:53]   UCB=2.8168 mu=0.9976 σ=0.9096 params={'learning_rate': 0.001788300003253932, 'steps_per_switch': 4215, 'total_timesteps': 80751}
+[2026-04-15 17:08:53]   UCB=2.8147 mu=1.5860 σ=0.6144 params={'learning_rate': 0.0012975326127189415, 'steps_per_switch': 3961, 'total_timesteps': 100527}
+[2026-04-15 17:08:53] [Wave4] Proposed params: {'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
+[2026-04-15 17:08:55] [Wave4] Launching trial 6: {'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
+[2026-04-15 17:08:55] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 79532 --steps-per-switch 5524 --learning-rate 0.0016223486895735558 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006
+[2026-04-15 18:19:30] [Wave4] Trial 6 finished in 4235.3s, rc=0
+[2026-04-15 18:19:30] [Wave4] Parsed: combined=176.6721 mini_monaco=176.6721
+[2026-04-15 18:19:31] [Wave4] ===== Trial 6 Summary =====
+[2026-04-15 18:19:31]   GP data points : 4
+[2026-04-15 18:19:31]   Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
+[2026-04-15 18:19:31]   Top 5:
+[2026-04-15 18:19:31]     score=176.67  params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
+[2026-04-15 18:19:31]     score=93.39  params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000}
+[2026-04-15 18:19:31]     score=42.30  params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000}
+[2026-04-15 18:19:31]     score=31.73  params={'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878}
+[2026-04-15 18:19:33] 
+[Wave4] ========== Trial 7/25 ==========
+[2026-04-15 18:19:33] [Wave4] GP UCB top-5 proposals:
+[2026-04-15 18:19:33]   UCB=2.5520 mu=1.1319 σ=0.7101 params={'learning_rate': 0.001779556625962812, 'steps_per_switch': 4226, 'total_timesteps': 123651}
+[2026-04-15 18:19:33]   UCB=2.5115 mu=0.7590 σ=0.8763 params={'learning_rate': 0.0017725367196782225, 'steps_per_switch': 3941, 'total_timesteps': 145375}
+[2026-04-15 18:19:33]   UCB=2.4988 mu=1.1176 σ=0.6906 params={'learning_rate': 0.0018568268138302447, 'steps_per_switch': 6910, 'total_timesteps': 119000}
+[2026-04-15 18:19:33]   UCB=2.4973 mu=0.8571 σ=0.8201 params={'learning_rate': 0.0019597767383017994, 'steps_per_switch': 9245, 'total_timesteps': 113699}
+[2026-04-15 18:19:33]   UCB=2.4874 mu=1.2498 σ=0.6188 params={'learning_rate': 0.001739834862935009, 'steps_per_switch': 4764, 'total_timesteps': 117423}
+[2026-04-15 18:19:33] [Wave4] Proposed params: {'learning_rate': 0.001779556625962812, 'steps_per_switch': 4226, 'total_timesteps': 123651}
+[2026-04-15 18:19:35] [Wave4] Launching trial 7: {'learning_rate': 0.001779556625962812, 'steps_per_switch': 4226, 'total_timesteps': 123651}
+[2026-04-15 18:19:35] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 123651 --steps-per-switch 4226 --learning-rate 0.001779556625962812 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0007
+[2026-04-15 20:19:38] [Wave4] Trial 7 TIMED OUT — killing runner.
+[2026-04-15 20:19:38] [Wave4] Trial 7 finished in 7203.4s, rc=-9
+[2026-04-15 20:19:38] [Wave4] Parsed: combined=None mini_monaco=None
+[2026-04-15 20:19:38] [Wave4] ⚠️  No test score parsed — defaulting to 0.0
+[2026-04-15 20:19:38] [Wave4] combined_test_score=0 — excluded from GP (crash/timeout).
+[2026-04-15 20:19:38] [Wave4] ===== Trial 7 Summary =====
+[2026-04-15 20:19:38]   GP data points : 4
+[2026-04-15 20:19:38]   Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
+[2026-04-15 20:19:38]   Top 5:
+[2026-04-15 20:19:38]     score=176.67  params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
+[2026-04-15 20:19:38]     score=93.39  params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000}
+[2026-04-15 20:19:38]     score=42.30  params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000}
+[2026-04-15 20:19:38]     score=31.73  params={'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878}
+[2026-04-15 20:19:40] 
+[Wave4] ========== Trial 8/25 ==========
+[2026-04-15 20:19:40] [Wave4] GP UCB top-5 proposals:
+[2026-04-15 20:19:40]   UCB=2.5481 mu=1.2081 σ=0.6700 params={'learning_rate': 0.0019521225364821895, 'steps_per_switch': 4185, 'total_timesteps': 110996}
+[2026-04-15 20:19:40]   UCB=2.5357 mu=0.8887 σ=0.8235 params={'learning_rate': 0.001901474046587741, 'steps_per_switch': 5568, 'total_timesteps': 136179}
+[2026-04-15 20:19:40]   UCB=2.5188 mu=1.1408 σ=0.6890 params={'learning_rate': 0.0018359364507444984, 'steps_per_switch': 5746, 'total_timesteps': 122356}
+[2026-04-15 20:19:40]   UCB=2.4994 mu=0.7140 σ=0.8927 params={'learning_rate': 0.0019730433529852004, 'steps_per_switch': 4578, 'total_timesteps': 144668}
+[2026-04-15 20:19:40]   UCB=2.4934 mu=1.4169 σ=0.5382 params={'learning_rate': 0.0018644207617691767, 'steps_per_switch': 3071, 'total_timesteps': 75515}
+[2026-04-15 20:19:40] [Wave4] Proposed params: {'learning_rate': 0.0019521225364821895, 'steps_per_switch': 4185, 'total_timesteps': 110996}
+[2026-04-15 20:19:42] [Wave4] Launching trial 8: {'learning_rate': 0.0019521225364821895, 'steps_per_switch': 4185, 'total_timesteps': 110996}
+[2026-04-15 20:19:42] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 110996 --steps-per-switch 4185 --learning-rate 0.0019521225364821895 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0008
diff --git a/agent/outerloop-results/autoresearch_results_phase4.jsonl b/agent/outerloop-results/autoresearch_results_phase4.jsonl
index 10522b7..213e0c1 100644
--- a/agent/outerloop-results/autoresearch_results_phase4.jsonl
+++ b/agent/outerloop-results/autoresearch_results_phase4.jsonl
@@ -3,3 +3,5 @@
 {"trial": 3, "timestamp": "2026-04-15T13:39:32.676397", "params": {"learning_rate": 0.0008162408849407889, "steps_per_switch": 8441, "total_timesteps": 140634}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0003/model.zip", "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7222.726921319962}
 {"trial": 4, "timestamp": "2026-04-15T15:39:37.385294", "params": {"learning_rate": 0.00020853884350577402, "steps_per_switch": 19927, "total_timesteps": 138928}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": null, "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7200.657395839691}
 {"trial": 5, "timestamp": "2026-04-15T17:08:50.252022", "params": {"learning_rate": 0.0007517877668650138, "steps_per_switch": 9368, "total_timesteps": 104878}, "combined_test_score": 31.73, "mini_monaco_reward": 31.73, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0005/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5348.823204040527}
+{"trial": 6, "timestamp": "2026-04-15T18:19:30.998330", "params": {"learning_rate": 0.0016223486895735558, "steps_per_switch": 5524, "total_timesteps": 79532}, "combined_test_score": 176.6721, "mini_monaco_reward": 176.6721, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4235.310993909836}
+{"trial": 7, "timestamp": "2026-04-15T20:19:38.497310", "params": {"learning_rate": 0.001779556625962812, "steps_per_switch": 4226, "total_timesteps": 123651}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": null, "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7203.425666332245}
diff --git a/agent/wave4_controller.py b/agent/wave4_controller.py
index 6ea8688..6f5cfbe 100644
--- a/agent/wave4_controller.py
+++ b/agent/wave4_controller.py
@@ -63,13 +63,14 @@ os.makedirs(MODELS_DIR, exist_ok=True)
 os.makedirs(CHAMPION_DIR, exist_ok=True)
 
 # ---- Hyperparameter search space ----
-# Wider LR range for scratch training (no warm-start prior to anchor it).
-# total_timesteps capped at 120k: at ~20 steps/sec, 120k = 100 min, well
-# within the 2-hour JOB_TIMEOUT.  Trials 3 and 4 timed out at 140k steps.
+# total_timesteps capped at 90k.  Actual throughput is ~16 steps/sec
+# (mountain_track physics is heavier than estimated):
+#   90k steps = 94 min training + ~8 min overhead = ~102 min < 2hr limit
+# Previous cap of 120k caused timeouts (120k = 126 min training alone).
 PARAM_SPACE = {
     'learning_rate':    {'type': 'float', 'min': 1e-4,  'max': 2e-3},
-    'steps_per_switch': {'type': 'int',   'min': 3000,  'max': 20000},
-    'total_timesteps':  {'type': 'int',   'min': 60000, 'max': 120000},
+    'steps_per_switch': {'type': 'int',   'min': 3000,  'max': 15000},
+    'total_timesteps':  {'type': 'int',   'min': 60000, 'max': 90000},
 }
 PARAM_KEYS = list(PARAM_SPACE.keys())
 
@@ -366,6 +367,29 @@ def launch_trial(params, trial_num):
                 proc.kill()
                 log(f'[Wave4] Trial {trial_num} TIMED OUT — killing runner.')
                 output_lines.append(f'[TIMEOUT after {JOB_TIMEOUT}s]')
+                # Rescue: if a checkpoint was saved mid-training, run a quick
+                # mini_monaco eval so the trial produces SOME GP data.
+                ckpt = os.path.join(save_dir, 'model.zip')
+                if os.path.exists(ckpt):
+                    log(f'[Wave4] Timeout rescue: checkpoint found, running mini_monaco eval...')
+                    rescue_cmd = [
+                        'python3', RUNNER,
+                        '--total-timesteps', '1',   # skip training
+                        '--steps-per-switch', '1',
+                        '--learning-rate', str(float(params['learning_rate'])),
+                        '--eval-episodes', '3',
+                        '--save-dir', save_dir,
+                        '--warm-start', ckpt,
+                    ]
+                    try:
+                        r = subprocess.run(rescue_cmd, capture_output=True,
+                                           text=True, timeout=600)
+                        rescue_out = r.stdout + '\n' + r.stderr
+                        print(rescue_out[-2000:], flush=True)
+                        output_lines.append(rescue_out)
+                        log(f'[Wave4] Rescue eval complete.')
+                    except Exception as re:
+                        log(f'[Wave4] Rescue eval failed: {re}')
                 break
 
         proc.wait()