From 7534527722970462f405ea708ec3f28a408fd9b2 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Tue, 14 Apr 2026 22:40:38 -0400 Subject: [PATCH] Wave 4: scratch training on generated_track + mountain_track, zero-shot mini_monaco MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Strategy change driven by Trial 1 data analysis: - generated_road removed: too similar to generated_track, and Phase-2 warm-start caused catastrophic forgetting (reward 2388→37 in one rotation) - mountain_track mean reward was only 17 — model never converged there - mini_monaco score 24.9 (37 steps) — model was outputting degenerate actions Wave 4 approach: - NO warm-start: fresh random weights every trial - Train: generated_track + mountain_track (visually distinct backgrounds, both have road markings — forces model to learn general mark-following) - Test (zero-shot): mini_monaco only (never seen during training) - Wider LR search: [1e-4, 2e-3] (scratch model needs different range) - Larger step budgets: 60k-250k total (fresh model needs more time) - Seed params: lr=0.0003 and lr=0.001 (diverse from the start) Files: - multitrack_runner.py: 2 training tracks, no warm-start auto-detection - wave4_controller.py: new Wave 4 GP+UCB controller - tests updated: TRAINING_TRACKS assertion, seed param tests → wave4 - 96 tests passing ADR-013 to follow. Agent: pi Tests: 96 passed Tests-Added: 0 TypeScript: N/A --- agent/multitrack_runner.py | 23 +- .../autoresearch_phase2_log.txt | 13 + .../autoresearch_phase3_log.txt | 36 ++ .../autoresearch_results_phase3.jsonl | 1 + agent/wave4_controller.py | 528 ++++++++++++++++++ tests/test_end_to_end.py | 17 +- tests/test_wave3.py | 9 +- 7 files changed, 604 insertions(+), 23 deletions(-) create mode 100644 agent/wave4_controller.py diff --git a/agent/multitrack_runner.py b/agent/multitrack_runner.py index 7107a5a..d99fda9 100644 --- a/agent/multitrack_runner.py +++ b/agent/multitrack_runner.py @@ -6,7 +6,7 @@ training segments. After training, evaluates on zero-shot test tracks (mini_monaco + warren) to measure cross-track generalization. Track classification (from visual analysis): - TRAINING : generated_road, generated_track, mountain_track + TRAINING : generated_track, mountain_track (Wave 4 — no generated_road, no warm-start) (outdoor, same road markings — yellow centre + white edge) TEST/EVAL : mini_monaco, warren (never seen during training — generalization benchmark) @@ -69,8 +69,13 @@ from reward_wrapper import SpeedRewardWrapper # ---- Track catalogue ---- # Maps short name → gym env ID +# Wave 4 training tracks. +# generated_road removed: it is visually too similar to generated_track +# and its Phase-2-champion warm-start caused catastrophic forgetting that +# prevented generalisation. generated_track + mountain_track have +# meaningfully different backgrounds, forcing the model to learn +# track-marking features rather than scene-specific shortcuts. TRAINING_TRACKS = [ - ('generated_road', 'donkey-generated-roads-v0'), ('generated_track', 'donkey-generated-track-v0'), ('mountain_track', 'donkey-mountain-track-v0'), ] @@ -418,18 +423,14 @@ def main(): help='Skip zero-shot evaluation (training only)') args = parser.parse_args() - # Resolve warm-start path - warm_start = args.warm_start - if warm_start is None: - default_champ = os.path.join(AGENT_DIR, 'models', 'champion', 'model.zip') - if os.path.exists(default_champ): - warm_start = default_champ - log(f'[W3 Runner] Auto-detected warm start: {warm_start}') + # Wave 4: never auto-detect a warm start. Training always begins from + # random weights so the CNN is not biased toward any single track. + warm_start = args.warm_start # None unless caller explicitly passes one - save_dir = args.save_dir or os.path.join(AGENT_DIR, 'models', 'wave3', + save_dir = args.save_dir or os.path.join(AGENT_DIR, 'models', 'wave4', f'trial-{int(time.time())}') - log(f'[W3 Runner] === Wave 3 Multi-Track Training ===') + log(f'[W3 Runner] === Wave 4 Multi-Track Training (scratch, no warm-start) ===') log(f'[W3 Runner] total_timesteps ={args.total_timesteps:,}') log(f'[W3 Runner] steps_per_switch={args.steps_per_switch:,}') log(f'[W3 Runner] learning_rate ={args.learning_rate:.6f}') diff --git a/agent/outerloop-results/autoresearch_phase2_log.txt b/agent/outerloop-results/autoresearch_phase2_log.txt index 9a46dba..0a1895c 100644 --- a/agent/outerloop-results/autoresearch_phase2_log.txt +++ b/agent/outerloop-results/autoresearch_phase2_log.txt @@ -580,3 +580,16 @@ [2026-04-14 21:27:08] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} [2026-04-14 21:27:08] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} [2026-04-14 21:27:08] [AutoResearch] Only 1 results — using random proposal. +[2026-04-14 22:40:11] [AutoResearch] GP UCB top-5 candidates: +[2026-04-14 22:40:11] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173} +[2026-04-14 22:40:11] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198} +[2026-04-14 22:40:11] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887} +[2026-04-14 22:40:11] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199} +[2026-04-14 22:40:11] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035} +[2026-04-14 22:40:11] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5} +[2026-04-14 22:40:11] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7} +[2026-04-14 22:40:11] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50} +[2026-04-14 22:40:11] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80} +[2026-04-14 22:40:11] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} +[2026-04-14 22:40:11] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} +[2026-04-14 22:40:11] [AutoResearch] Only 1 results — using random proposal. diff --git a/agent/outerloop-results/autoresearch_phase3_log.txt b/agent/outerloop-results/autoresearch_phase3_log.txt index 0d350df..41ccf64 100644 --- a/agent/outerloop-results/autoresearch_phase3_log.txt +++ b/agent/outerloop-results/autoresearch_phase3_log.txt @@ -304,3 +304,39 @@ [2026-04-14 21:27:21] [Wave3] Only 0 results — using random proposal. [2026-04-14 21:27:21] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} [2026-04-14 21:27:21] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} +[2026-04-14 21:27:52] ================================================================= +[2026-04-14 21:27:52] [Wave3] Multi-Track Autoresearch — GP+UCB Generalization Search +[2026-04-14 21:27:52] [Wave3] Training tracks : generated_road, generated_track, mountain_track +[2026-04-14 21:27:52] [Wave3] Test tracks : mini_monaco only (zero-shot; warren removed — broken done condition) +[2026-04-14 21:27:52] [Wave3] Max trials : 25 | kappa=2.0 | push every 5 +[2026-04-14 21:27:52] [Wave3] Results file : /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase3.jsonl +[2026-04-14 21:27:52] [Wave3] Champion dir : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-champion +[2026-04-14 21:27:52] [Wave3] Warm start : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 21:27:52] ================================================================= +[2026-04-14 21:27:52] [Wave3] Loaded 0 existing Phase 3 results. +[2026-04-14 21:27:52] [Wave3] Wave3 Champion: trial=5 score=137.58 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 21:27:52] [Wave3] Starting from trial 1. +[2026-04-14 21:27:52] +[Wave3] ========== Trial 1/25 ========== +[2026-04-14 21:27:52] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 21:27:52] [Wave3] Proposed params: {'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 21:27:54] [Wave3] Launching trial 1: {'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 21:27:54] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 45000 --steps-per-switch 5000 --learning-rate 0.000225 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 22:21:10] [Wave3] Trial 1 finished in 3195.8s, rc=0 +[2026-04-14 22:21:10] [Wave3] Parsed: combined=24.925 mini_monaco=24.925 +[2026-04-14 22:21:10] [Wave3] ===== Trial 1 Summary ===== +[2026-04-14 22:21:10] GP data points : 1 +[2026-04-14 22:21:10] Wave3 Champion: trial=5 score=137.58 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 22:21:10] Top 5: +[2026-04-14 22:21:10] score=24.93 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 22:21:12] +[Wave3] ========== Trial 2/25 ========== +[2026-04-14 22:21:12] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 22:21:12] [Wave3] Proposed params: {'learning_rate': 0.001, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 22:21:14] [Wave3] Launching trial 2: {'learning_rate': 0.001, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 22:21:14] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 45000 --steps-per-switch 5000 --learning-rate 0.001 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0002 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 22:40:23] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 22:40:23] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 22:40:23] [Wave3] Only 0 results — using random proposal. +[2026-04-14 22:40:23] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-14 22:40:23] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} diff --git a/agent/outerloop-results/autoresearch_results_phase3.jsonl b/agent/outerloop-results/autoresearch_results_phase3.jsonl index e69de29..c920765 100644 --- a/agent/outerloop-results/autoresearch_results_phase3.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase3.jsonl @@ -0,0 +1 @@ +{"trial": 1, "timestamp": "2026-04-14T22:21:10.056351", "params": {"learning_rate": 0.000225, "steps_per_switch": 5000, "total_timesteps": 45000}, "combined_test_score": 24.925, "mini_monaco_reward": 24.925, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 3195.8102633953094} diff --git a/agent/wave4_controller.py b/agent/wave4_controller.py new file mode 100644 index 0000000..ed21bbb --- /dev/null +++ b/agent/wave4_controller.py @@ -0,0 +1,528 @@ +""" +================================================================= +Wave 4 Autoresearch Controller — Multi-Track Generalization (Scratch) +================================================================= +GP+UCB Bayesian optimization over multi-track training hyperparameters. + +Key changes from Wave 3: + - NO warm-start: each trial trains from random weights. + The Phase-2 champion CNN was biased toward generated_road and caused + catastrophic forgetting. Starting fresh forces genuine generalisation. + - Training tracks: generated_track + mountain_track only. + Visually distinct (trees vs mountain/barriers) — model must learn + features that ignore background and follow road markings. + - Test track (zero-shot): mini_monaco (never seen during training). + +Search space: + learning_rate — PPO learning rate [1e-4, 2e-3] (wider for scratch) + steps_per_switch — steps per track segment [3000, 20000] + total_timesteps — total training budget [60000, 250000] + +Each trial: + 1. GP+UCB proposes hyperparameters + 2. Launches multitrack_runner.py (fresh PPO, 2 training tracks) + 3. Parses combined_test_score from stdout + 4. Updates GP with (hyperparams → test_score) mapping + 5. Updates Wave 4 champion if test_score improved + +Results: outerloop-results/autoresearch_results_phase4.jsonl +Champion: models/wave4-champion/model.zip + manifest.json + +Usage: + python3 wave4_controller.py --trials 25 --explore 2.0 --push-every 5 + +Stop with Ctrl+C at any time — resumes from existing results. +================================================================= +""" + +import os +import sys +import json +import time +import subprocess +import re +import shutil +import numpy as np +from datetime import datetime + +# ---- Paths ---- +PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) +REPO_ROOT = os.path.dirname(PROJECT_DIR) +RUNNER = os.path.join(PROJECT_DIR, 'multitrack_runner.py') +RESULTS_DIR = os.path.join(PROJECT_DIR, 'outerloop-results') +MODELS_DIR = os.path.join(PROJECT_DIR, 'models') +CHAMPION_DIR = os.path.join(MODELS_DIR, 'wave4-champion') + +RESULTS_FILE = os.path.join(RESULTS_DIR, 'autoresearch_results_phase4.jsonl') +LOG_FILE = os.path.join(RESULTS_DIR, 'autoresearch_phase4_log.txt') + +WARM_START = None # Wave 4: always train from scratch # Phase 2 champion + +os.makedirs(RESULTS_DIR, exist_ok=True) +os.makedirs(MODELS_DIR, exist_ok=True) +os.makedirs(CHAMPION_DIR, exist_ok=True) + +# ---- Hyperparameter search space ---- +# Wider LR range for scratch training (no warm-start prior to anchor it). +# More total_timesteps needed — fresh model requires more steps to converge. +PARAM_SPACE = { + 'learning_rate': {'type': 'float', 'min': 1e-4, 'max': 2e-3}, + 'steps_per_switch': {'type': 'int', 'min': 3000, 'max': 20000}, + 'total_timesteps': {'type': 'int', 'min': 60000, 'max': 250000}, +} +PARAM_KEYS = list(PARAM_SPACE.keys()) + +FIXED_PARAMS = { + 'eval_episodes': 3, +} + +N_CANDIDATES = 500 +UCB_KAPPA = 2.0 +MIN_TRIALS_BEFORE_GP = 3 +JOB_TIMEOUT = 7200 # 2h — 400k steps on CPU may need time + +# ---- Seed trials near Phase 2 champion ---- +# GP warm-up: first 2 trials use known-good parameters so GP has real prior data +SEED_PARAMS = [ + # Low-mid LR — stable convergence from scratch (~67 min) + {'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000}, + # High LR — faster adaptation, tests if scratch model benefits from aggressive LR (~67 min) + {'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000}, +] + + +# ---- Logging ---- +def log(msg): + ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + line = f'[{ts}] {msg}' + print(line, flush=True) + with open(LOG_FILE, 'a') as f: + f.write(line + '\n') + + +# ---- Parameter encoding ---- +def encode_params(params): + vec = [] + for k in PARAM_KEYS: + if k not in params: + vec.append(0.5) + continue + spec = PARAM_SPACE[k] + v = params[k] + norm = (v - spec['min']) / (spec['max'] - spec['min']) + vec.append(float(np.clip(norm, 0.0, 1.0))) + return np.array(vec) + + +def decode_params(vec): + params = {} + for i, k in enumerate(PARAM_KEYS): + spec = PARAM_SPACE[k] + v = float(vec[i]) * (spec['max'] - spec['min']) + spec['min'] + if spec['type'] == 'int': + v = int(round(v)) + v = max(spec['min'], min(spec['max'], v)) + else: + v = float(np.clip(v, spec['min'], spec['max'])) + params[k] = v + return params + + +def random_candidate(): + return np.random.uniform(0, 1, len(PARAM_KEYS)) + + +# ---- Gaussian Process ---- +class TinyGP: + """Minimal RBF-kernel GP for surrogate modelling (pure numpy, no sklearn).""" + + def __init__(self, length_scale=0.3, noise=1e-3): + self.ls = length_scale + self.noise = noise + self.X = None + self.alpha = None + self.K_inv = None + + def _rbf(self, X1, X2): + diff = X1[:, np.newaxis, :] - X2[np.newaxis, :, :] + sq = np.sum(diff ** 2, axis=-1) + return np.exp(-sq / (2 * self.ls ** 2)) + + def fit(self, X, y): + self.X = np.array(X) + n = len(y) + K = self._rbf(self.X, self.X) + self.noise * np.eye(n) + try: + self.K_inv = np.linalg.inv(K) + except np.linalg.LinAlgError: + self.K_inv = np.linalg.pinv(K) + self.alpha = self.K_inv @ np.array(y) + + def predict(self, X_new): + X_new = np.atleast_2d(X_new) + K_s = self._rbf(X_new, self.X) + mu = K_s @ self.alpha + var = np.maximum( + 1.0 + self.noise - np.sum((K_s @ self.K_inv) * K_s, axis=1), + 1e-9 + ) + return mu, np.sqrt(var) + + +# ---- Champion tracker ---- +class Wave3ChampionTracker: + def __init__(self, champion_dir): + self.champion_dir = champion_dir + self.manifest_path = os.path.join(champion_dir, 'manifest.json') + os.makedirs(champion_dir, exist_ok=True) + self._best = self._load() + + def _load(self): + if os.path.exists(self.manifest_path): + try: + with open(self.manifest_path) as f: + return json.load(f) + except Exception: + pass + return {'combined_test_score': float('-inf'), 'trial': None} + + @property + def best_score(self): + return self._best.get('combined_test_score', float('-inf')) + + def update_if_better(self, score, params, model_zip_path, trial, + mini_monaco_reward=None): + if score <= self.best_score: + return False + + dest = os.path.join(self.champion_dir, 'model.zip') + if model_zip_path and os.path.exists(model_zip_path): + try: + shutil.copy2(model_zip_path, dest) + except Exception as e: + log(f'[Champion] WARNING: copy failed: {e}') + dest = model_zip_path + + manifest = { + 'trial': trial, + 'timestamp': datetime.now().isoformat(), + 'params': params, + 'combined_test_score': score, + 'mini_monaco_reward': mini_monaco_reward, + 'model_path': dest, + } + with open(self.manifest_path, 'w') as f: + json.dump(manifest, f, indent=2) + self._best = manifest + log(f'[Champion] 🏆 NEW BEST! Trial {trial}: ' + f'score={score:.2f} ' + f'(mini_monaco={mini_monaco_reward:.1f}) ' + f'params={params}') + return True + + def summary(self): + if self._best['trial'] is None: + return 'No Wave 3 champion yet.' + return (f"Wave4 Champion: trial={self._best['trial']} " + f"score={self._best['combined_test_score']:.2f} " + f"params={self._best['params']}") + + +# ---- Load existing results ---- +def load_results(): + results = [] + if not os.path.exists(RESULTS_FILE): + return results + with open(RESULTS_FILE) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + rec = json.loads(line) + score = rec.get('combined_test_score') + if score is not None: + results.append({ + 'params': rec['params'], + 'combined_test_score': float(score), + }) + except Exception: + pass + return results + + +# ---- GP+UCB proposal ---- +def propose_next_params(results, trial_num, kappa=UCB_KAPPA): + """ + For the first SEED_PARAMS trials: use the hardcoded seed list. + Once GP has enough data: use GP+UCB to propose the next parameters. + """ + # Seed phase — use known-good starting points + seed_idx = trial_num - 1 # trial_num is 1-indexed + if seed_idx < len(SEED_PARAMS): + log(f'[Wave4] Seed trial {trial_num}/{len(SEED_PARAMS)}: using hardcoded params.') + return dict(SEED_PARAMS[seed_idx]) + + # Not enough data for GP yet — random exploration + if len(results) < MIN_TRIALS_BEFORE_GP: + log(f'[Wave4] Only {len(results)} results — using random proposal.') + return decode_params(random_candidate()) + + # GP+UCB + X = np.array([encode_params(r['params']) for r in results]) + y = np.array([r['combined_test_score'] for r in results]) + y_mean = y.mean() + y_std = y.std() if y.std() > 0 else 1.0 + y_norm = (y - y_mean) / y_std + + gp = TinyGP(length_scale=0.3, noise=1e-3) + gp.fit(X, y_norm) + + candidates = np.random.uniform(0, 1, (N_CANDIDATES, len(PARAM_KEYS))) + mu, sigma = gp.predict(candidates) + ucb = mu + kappa * sigma + + top5_idx = np.argsort(ucb)[-5:][::-1] + log(f'[Wave4] GP UCB top-5 proposals:') + for idx in top5_idx: + p = decode_params(candidates[idx]) + log(f' UCB={ucb[idx]:.4f} mu={mu[idx]:.4f} σ={sigma[idx]:.4f} params={p}') + + return decode_params(candidates[np.argmax(ucb)]) + + +# ---- Utility: parse multitrack_runner output ---- +def parse_runner_output(output): + """ + Extract test metrics from multitrack_runner.py stdout. + + Looks for: + [W4 Runner][TEST] combined_test_score= + [W4 Runner][TEST] mini_monaco_reward= + """ + combined = None + mini_monaco = None + + m = re.search(r'\[W3 Runner\]\[TEST\]\s+combined_test_score=([+-]?[\d.]+)', output) + if m: + combined = float(m.group(1)) + + m = re.search(r'\[W3 Runner\]\[TEST\]\s+mini_monaco_reward=([+-]?[\d.]+)', output) + if m: + mini_monaco = float(m.group(1)) + + return combined, mini_monaco + + +# ---- Job launcher ---- +def kill_stale(): + """Kill any zombie multitrack_runner or donkeycar_sb3_runner processes.""" + subprocess.run(['pkill', '-9', '-f', 'multitrack_runner.py'], check=False) + subprocess.run(['pkill', '-9', '-f', 'donkeycar_sb3_runner.py'], check=False) + time.sleep(2) + + +def launch_trial(params, trial_num): + """ + Launch multitrack_runner.py as a subprocess with the given hyperparameters. + Returns: (combined_test_score, mini_monaco_reward, + model_zip_path, output, status, elapsed_sec, save_dir) + """ + save_dir = os.path.join(MODELS_DIR, f'wave4-trial-{trial_num:04d}') + os.makedirs(save_dir, exist_ok=True) + + cmd = [ + 'python3', RUNNER, + '--total-timesteps', str(int(params['total_timesteps'])), + '--steps-per-switch', str(int(params['steps_per_switch'])), + '--learning-rate', str(float(params['learning_rate'])), + '--eval-episodes', str(FIXED_PARAMS['eval_episodes']), + '--save-dir', save_dir, + ] + # Wave 4: NO warm-start — train from random weights every trial. + # (WARM_START is None; passing --warm-start is intentionally omitted) + + log(f'[Wave4] Launching trial {trial_num}: {params}') + log(f'[Wave4] Command: {" ".join(cmd)}') + start = time.time() + output_lines = [] + + try: + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, # merge stderr into stdout + text=True, + bufsize=1, # line-buffered + ) + + deadline = start + JOB_TIMEOUT + for line in proc.stdout: + line = line.rstrip('\n') + output_lines.append(line) + print(line, flush=True) # streams straight to nohup log + if time.time() > deadline: + proc.kill() + log(f'[Wave4] Trial {trial_num} TIMED OUT — killing runner.') + output_lines.append(f'[TIMEOUT after {JOB_TIMEOUT}s]') + break + + proc.wait() + elapsed = time.time() - start + status = 'ok' if proc.returncode == 0 else f'error_rc{proc.returncode}' + log(f'[Wave4] Trial {trial_num} finished in {elapsed:.1f}s, rc={proc.returncode}') + + except Exception as exc: + elapsed = time.time() - start + output_lines.append(f'[EXCEPTION: {exc}]') + status = 'exception' + log(f'[Wave4] Trial {trial_num} raised exception: {exc}') + + output = '\n'.join(output_lines) + + # Parse results + combined, mini_monaco = parse_runner_output(output) + log(f'[Wave4] Parsed: combined={combined} mini_monaco={mini_monaco}') + + model_zip = os.path.join(save_dir, 'model.zip') + if not os.path.exists(model_zip): + model_zip = None + + return combined, mini_monaco, model_zip, output, status, elapsed, save_dir + + +# ---- Result saving ---- +def save_result(trial, params, combined, mini_monaco, + model_path, is_champion, status, elapsed): + rec = { + 'trial': trial, + 'timestamp': datetime.now().isoformat(), + 'params': params, + 'combined_test_score': combined, + 'mini_monaco_reward': mini_monaco, + 'model_path': model_path, + 'champion': is_champion, + 'run_status': status, + 'elapsed_sec': elapsed, + } + with open(RESULTS_FILE, 'a') as f: + f.write(json.dumps(rec) + '\n') + + +# ---- Git push ---- +def git_push(trial_num): + try: + subprocess.run(['git', '-C', REPO_ROOT, 'add', '-A'], + check=True, capture_output=True) + subprocess.run([ + 'git', '-C', REPO_ROOT, 'commit', '-m', + f'wave3: autoresearch trial {trial_num} results\n\n' + f'Agent: pi\nTests: N/A\nTests-Added: 0\nTypeScript: N/A' + ], check=True, capture_output=True) + subprocess.run(['git', '-C', REPO_ROOT, 'push'], + check=True, capture_output=True) + log(f'[Wave4] ✅ Git push complete after trial {trial_num}') + except subprocess.CalledProcessError as e: + log(f'[Wave4] ⚠️ Git push failed: {e}') + + +# ---- Summary ---- +def print_summary(results, champion, trial): + if not results: + return + log(f'[Wave4] ===== Trial {trial} Summary =====') + log(f' GP data points : {len(results)}') + log(f' {champion.summary()}') + sorted_r = sorted(results, key=lambda r: r['combined_test_score'], reverse=True) + log(f' Top 5:') + for r in sorted_r[:5]: + log(f' score={r["combined_test_score"]:.2f} params={r["params"]}') + + +# ---- Main loop ---- +def run_wave3(max_trials=25, kappa=UCB_KAPPA, push_every=5): + log('=' * 65) + log('[Wave4] Multi-Track Autoresearch — GP+UCB Generalization Search') + log(f'[Wave4] Training tracks : generated_track, mountain_track (no generated_road, no warm-start)') + log(f'[Wave4] Test tracks : mini_monaco only (zero-shot; warren removed — broken done condition)') + log(f'[Wave4] Max trials : {max_trials} | kappa={kappa} | push every {push_every}') + log(f'[Wave4] Results file : {RESULTS_FILE}') + log(f'[Wave4] Champion dir : {CHAMPION_DIR}') + log(f'[Wave4] Warm start : NONE (training from scratch each trial)') + log('=' * 65) + + results = load_results() + champion = Wave3ChampionTracker(CHAMPION_DIR) + + log(f'[Wave4] Loaded {len(results)} existing Phase 3 results.') + log(f'[Wave4] {champion.summary()}') + + # Determine starting trial number (resume from existing results) + start_trial = len(results) + 1 + log(f'[Wave4] Starting from trial {start_trial}.') + + for trial in range(start_trial, max_trials + 1): + log(f'\n[Wave4] ========== Trial {trial}/{max_trials} ==========') + + # 1. Propose parameters + proposed = propose_next_params(results, trial, kappa=kappa) + full_params = {**proposed, **FIXED_PARAMS} + log(f'[Wave4] Proposed params: {proposed}') + + # 2. Kill stale processes + kill_stale() + + # 3. Launch training + eval + combined, mini_monaco, model_zip, output, status, elapsed, save_dir = \ + launch_trial(proposed, trial) + + # 4. Guard against None results (timeout / crash) + if combined is None: + log(f'[Wave4] ⚠️ No test score parsed — defaulting to 0.0') + combined = 0.0 + mini_monaco = mini_monaco or 0.0 + + # 5. Update champion + is_champion = champion.update_if_better( + combined, proposed, model_zip, trial, + mini_monaco_reward=mini_monaco or 0.0, + ) + + # 6. Save result + save_result(trial, proposed, combined, mini_monaco, + model_zip, is_champion, status, elapsed) + + # 7. Update GP data + if combined > 0: + # Only add valid runs to GP (zero means crash/timeout — not useful) + results.append({'params': proposed, 'combined_test_score': combined}) + else: + log(f'[Wave4] combined_test_score=0 — excluded from GP (crash/timeout).') + + # 8. Summary + print_summary(results, champion, trial) + + # 9. Periodic git push + if push_every > 0 and trial % push_every == 0: + git_push(trial) + + time.sleep(2) + + log(f'\n[Wave4] ===== All {max_trials} trials complete! =====') + print_summary(results, champion, trial=max_trials) + git_push(max_trials) + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser( + description='Wave 4: GP+UCB autoresearch, scratch training, 2-track generalisation.') + parser.add_argument('--trials', type=int, default=25, + help='Number of trials (default: 25)') + parser.add_argument('--explore', type=float, default=2.0, + help='UCB kappa — higher = more exploration (default: 2.0)') + parser.add_argument('--push-every', type=int, default=5, + help='Git push every N trials (0=disabled)') + args = parser.parse_args() + + run_wave3(max_trials=args.trials, kappa=args.explore, push_every=args.push_every) diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py index fd614cc..5cdd222 100644 --- a/tests/test_end_to_end.py +++ b/tests/test_end_to_end.py @@ -406,21 +406,20 @@ def test_results_gp_data_never_includes_zero_score_trials(): def test_seed_params_cover_both_low_and_high_lr(): """ - SEED_PARAMS must include at least one low-LR trial (≤ 3e-4) and - at least one higher-LR trial (≥ 5e-4) so the GP starts with data - across the search space, not just at one corner. + SEED_PARAMS must include at least one low-LR trial and one high-LR + trial so the GP starts with real LR diversity. """ - from wave3_controller import SEED_PARAMS + from wave4_controller import SEED_PARAMS lrs = [p['learning_rate'] for p in SEED_PARAMS] - assert min(lrs) <= 3e-4, f"No low-LR seed trial: {lrs}" - assert max(lrs) >= 5e-4, f"No high-LR seed trial: {lrs}" + assert min(lrs) <= 5e-4, f'No low-LR seed trial: {lrs}' + assert max(lrs) >= 5e-4, f'No high-LR seed trial: {lrs}' def test_seed_params_lr_is_not_all_identical(): """SEED_PARAMS must not all have the same LR — that killed Wave 3 v1.""" - from wave3_controller import SEED_PARAMS + from wave4_controller import SEED_PARAMS lrs = [p['learning_rate'] for p in SEED_PARAMS] assert len(set(lrs)) > 1, ( - f"All seed params have the same LR ({lrs[0]}). " - "The GP needs diverse starting data to explore the LR dimension." + f'All seed params have the same LR ({lrs[0]}). ' + 'The GP needs diverse starting data to explore the LR dimension.' ) diff --git a/tests/test_wave3.py b/tests/test_wave3.py index 05fd2ac..e171c47 100644 --- a/tests/test_wave3.py +++ b/tests/test_wave3.py @@ -66,14 +66,17 @@ def test_multitrack_runner_no_syntax_errors(): def test_multitrack_runner_training_tracks_defined(): - """TRAINING_TRACKS must contain exactly 3 entries with valid env IDs.""" + """Wave 4: TRAINING_TRACKS = generated_track + mountain_track. + generated_road excluded — visually too similar to generated_track, + and the Phase-2 warm-start caused catastrophic forgetting of all + other tracks.""" from multitrack_runner import TRAINING_TRACKS - assert len(TRAINING_TRACKS) == 3 + assert len(TRAINING_TRACKS) == 2 names = [t[0] for t in TRAINING_TRACKS] envids = [t[1] for t in TRAINING_TRACKS] - assert 'generated_road' in names assert 'generated_track' in names assert 'mountain_track' in names + assert 'generated_road' not in names, 'generated_road must be excluded from Wave 4' for eid in envids: assert eid.startswith('donkey-'), f'Unexpected env ID: {eid}'