diff --git a/DECISIONS.md b/DECISIONS.md index b523348..66ae767 100644 --- a/DECISIONS.md +++ b/DECISIONS.md @@ -190,3 +190,69 @@ def test_runner_exits_cleanly(mock_make): mock_make.return_value = mock_env # ... test runner ``` + +--- + +## ADR-010: Warren is an Outdoor/Road Track — Include in Generalization Benchmark + +**Date:** 2026-04-12 +**Status:** Accepted + +**Context:** Warren (UCSD Warren Track v1.0) is under a tent but has proper road geometry: +white lane lines, yellow centre dashes, orange traffic cones. Unlike purely indoor tracks +(Robo Racing League, Waveshare, Circuit Launch, Warehouse) which use a carpet/hard floor +as the road surface with painted lines, Warren has an actual grass+painted-road layout +with genuine road markings. + +**Decision:** Warren is classified as a "pseudo-outdoor" track — visually similar to +outdoor road tracks despite being sheltered. It is included in the zero-shot test set +(alongside mini_monaco) rather than the indoor-skip category. + +**Consequence:** The Wave 3 generalization benchmark = 2 held-out tracks: +mini_monaco (outdoor trees + fence) + warren (pseudo-outdoor tent + road markings). + +--- + +## ADR-011: Wave 3 Zero-Shot Generalization — Test Tracks Never Used in Training + +**Date:** 2026-04-12 +**Status:** Accepted + +**Context:** Visual overfitting confirmed — Phase 2 champion drives only the track it was +trained on (generated_road). CNN learned background-specific features (desert horizon, +sky colour) rather than road-invariant features (lane markings, road edges). + +**Decision:** Wave 3 uses a strict train/test split: +- **Training tracks:** generated_road, generated_track, mountain_track +- **Test tracks (zero-shot only):** mini_monaco, warren +- **Optimisation target:** `combined_test_score = mini_monaco_mean_reward + warren_mean_reward` + (the GP ONLY sees test-track performance — training performance is not the objective) + +**Rationale:** This mirrors established domain generalisation practice. If we train the GP +on training reward, we could find hyperparams that overfit the training tracks while still +failing the test tracks. Only test performance correctly measures generalisation. + +**Consequence:** Zero-shot evaluation happens at the end of every trial. If a trial crashes +both test tracks, score=0. GP learns that those hyperparameters don't generalise. + +--- + +## ADR-012: Warm-Start from Phase 2 Champion for Wave 3 + +**Date:** 2026-04-12 +**Status:** Accepted + +**Context:** Training PPO from scratch across 3 tracks would require ~500k+ timesteps to +reach a competent policy. Phase 2 champion (Trial 20) already drives generated_road well. + +**Decision:** All Wave 3 trials warm-start from `models/champion/model.zip` (Phase 2 +champion). `PPO.load(path, env=new_env)` loads weights; `model.learning_rate` is then +overridden with the GP-proposed learning rate. Falls back to fresh PPO if load fails. + +**Rationale:** The champion already knows how to follow a road. Warm-starting means Wave 3 +only needs to teach *generalisation* — learning to apply the same skill to new visual +inputs. This is far more efficient than teaching driving from scratch. + +**Risk:** If the champion's policy is over-specialised (e.g., relies on very specific pixel +features of desert background), warm-starting could hinder generalisation. This is why the +GP tunes learning_rate — a higher LR will more aggressively overwrite specialised features. diff --git a/agent/multitrack_runner.py b/agent/multitrack_runner.py new file mode 100644 index 0000000..9f5eea6 --- /dev/null +++ b/agent/multitrack_runner.py @@ -0,0 +1,495 @@ +""" +Wave 3 Multi-Track Runner +========================= +Trains PPO across multiple DonkeyCar tracks by round-robin switching between +training segments. After training, evaluates on zero-shot test tracks +(mini_monaco + warren) to measure cross-track generalization. + +Track classification (from visual analysis): + TRAINING : generated_road, generated_track, mountain_track + (outdoor, same road markings — yellow centre + white edge) + TEST/EVAL : mini_monaco, warren + (never seen during training — generalization benchmark) + SKIPPED : warehouse, robo_racing_league, waveshare, circuit_launch + (fully indoor — different domain entirely) + avc_sparkfun (outdoor but orange markings — too different) + +Track switching strategy: + Close env → send_exit_scene_raw() → wait 4s → gym.make(next_track) + This avoids the double-connect issue in switch_track() when an env is + already open on the current track. + +Key invariants (ADR-005, ADR-006): + - model is always defined before model.save() + - env.close() + time.sleep(2) before every track switch + - Results appended to JSONL, never overwritten + +Output lines parsed by wave3_controller.py: + [W3 Runner][TRAIN] track= segment_reward= + [W3 Runner][TEST] track= mean_reward= mean_steps= + [W3 Runner][TEST] combined_test_score= + +Usage: + python3 multitrack_runner.py \\ + --total-timesteps 200000 \\ + --steps-per-switch 10000 \\ + --learning-rate 0.000225 \\ + --warm-start models/champion/model.zip \\ + --save-dir models/wave3/trial-0001 \\ + --eval-episodes 3 + +Exit codes: + 0 — success, model saved, evaluation complete + 100 — failed to connect to simulator on initial track + 101 — training error + 102 — evaluation error +""" + +import argparse +import os +import sys +import time +import json +import numpy as np +from datetime import datetime + +import gymnasium as gym +import gym_donkeycar +from stable_baselines3 import PPO +from stable_baselines3.common.evaluation import evaluate_policy +from stable_baselines3.common.callbacks import BaseCallback + +# ---- Project paths ---- +AGENT_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, AGENT_DIR) + +from donkeycar_sb3_runner import ThrottleClampWrapper, SimHealthCallback +from reward_wrapper import SpeedRewardWrapper +from track_switcher import send_exit_scene_raw + +# ---- Track catalogue ---- +# Maps short name → gym env ID +TRAINING_TRACKS = [ + ('generated_road', 'donkey-generated-roads-v0'), + ('generated_track', 'donkey-generated-track-v0'), + ('mountain_track', 'donkey-mountain-track-v0'), +] + +TEST_TRACKS = [ + ('mini_monaco', 'donkey-minimonaco-track-v0'), + ('warren', 'donkey-warren-track-v0'), +] + +# How many steps to sample before deciding the segment reward (shorter than segment) +SEGMENT_EVAL_STEPS = 500 +EXIT_SCENE_WAIT = 4.0 # seconds after exit_scene for sim to reach menu +THROTTLE_MIN = 0.2 # minimum throttle (prevents stationary car) +SPEED_SCALE = 0.1 # SpeedRewardWrapper coefficient + + +# ---- Logging ---- +def log(msg): + ts = datetime.now().strftime('%H:%M:%S') + print(f'[{ts}] {msg}', flush=True) + + +# ---- Health check callback ---- +class HealthCheckCallback(BaseCallback): + """Stops training early if sim is stuck or frozen.""" + def __init__(self, max_stuck_steps=150, min_speed=0.02): + super().__init__(verbose=0) + self.health = SimHealthCallback(max_stuck_steps=max_stuck_steps, min_speed=min_speed) + + def _on_step(self): + infos = self.locals.get('infos', [{}]) + obs = self.locals.get('new_obs', None) + info = infos[0] if infos else {} + obs_arr = obs[0] if obs is not None and len(obs) > 0 else None + healthy = self.health.on_step(obs_arr, None, None, info) + if not healthy: + log('[W3 Runner][HEALTH] Sim stuck/frozen — stopping segment early.') + return False + return True + + +# ---- Environment factory ---- +def wrap_env(raw_env): + """Apply standard wrappers: throttle clamp + speed reward shaping.""" + env = ThrottleClampWrapper(raw_env, throttle_min=THROTTLE_MIN) + env = SpeedRewardWrapper(env, speed_scale=SPEED_SCALE) + return env + + +# ---- Track switching ---- +def close_and_switch(current_env, next_env_id, verbose=True): + """ + Cleanly close current env and connect to next track. + + 1. env.close() + sleep(2) [ADR-006] + 2. send_exit_scene_raw() + sleep(EXIT_SCENE_WAIT) + 3. gym.make(next_env_id) + wrap + + Returns: new wrapped env, or raises on connection failure. + """ + if current_env is not None: + if verbose: + log(f'[W3 Runner] Closing current env before track switch...') + try: + current_env.close() + except Exception as e: + log(f'[W3 Runner] Warning: env.close() raised: {e}') + time.sleep(2) # ADR-006 + + if verbose: + log(f'[W3 Runner] Sending exit_scene to sim...') + send_exit_scene_raw() + + if verbose: + log(f'[W3 Runner] Waiting {EXIT_SCENE_WAIT}s for sim to reach main menu...') + time.sleep(EXIT_SCENE_WAIT) + + if verbose: + log(f'[W3 Runner] Connecting to {next_env_id}...') + raw_env = gym.make(next_env_id) + env = wrap_env(raw_env) + if verbose: + log(f'[W3 Runner] ✅ Connected to {next_env_id}') + return env + + +# ---- Model creation / warm-start ---- +def create_or_load_model(env, learning_rate, warm_start_path=None, seed=None): + """ + Load model from warm_start_path (PPO.load + set_env) or create fresh PPO. + Falls back to fresh model if warm-start path missing or space mismatch. + """ + if warm_start_path and os.path.exists(warm_start_path): + log(f'[W3 Runner] Loading warm-start model from {warm_start_path}') + try: + model = PPO.load(warm_start_path, env=env, device='auto') + # Override learning rate — we may be exploring different LRs + model.learning_rate = learning_rate + log(f'[W3 Runner] ✅ Warm start loaded. LR overridden to {learning_rate:.6f}') + return model + except Exception as e: + log(f'[W3 Runner] ⚠️ Warm start failed ({e}), training from scratch.') + + log(f'[W3 Runner] Creating fresh PPO model (lr={learning_rate:.6f})') + model = PPO( + 'CnnPolicy', + env, + learning_rate=learning_rate, + verbose=0, + seed=seed, + ) + return model + + +# ---- Training loop ---- +def train_multitrack(model, first_env, total_timesteps, steps_per_switch): + """ + Train PPO across training tracks by round-robin switching every steps_per_switch steps. + + Args: + model: PPO model (already set to first_env) + first_env: The first wrapped training env (already connected) + total_timesteps: Total training budget across all tracks + steps_per_switch: Steps per track segment before switching + + Returns: + env: The last env used (caller must close it) + segment_rewards: List of (track_name, reward) for each completed segment + """ + env = first_env + steps_done = 0 + track_idx = 0 # Start on generated_road (first in TRAINING_TRACKS) + segment_rewards = [] + health_cb = HealthCheckCallback() + + log(f'[W3 Runner] Starting multi-track training:') + log(f' Total timesteps : {total_timesteps:,}') + log(f' Steps per switch: {steps_per_switch:,}') + log(f' Training tracks : {[t[0] for t in TRAINING_TRACKS]}') + log(f' Rotations : ~{total_timesteps // (steps_per_switch * len(TRAINING_TRACKS))} full cycles') + + while steps_done < total_timesteps: + track_name, track_env_id = TRAINING_TRACKS[track_idx] + segment_steps = min(steps_per_switch, total_timesteps - steps_done) + + log(f'\n[W3 Runner] === Segment: {track_name} | ' + f'{steps_done:,}/{total_timesteps:,} steps done | ' + f'segment={segment_steps:,} steps ===') + + # Train segment + model.learn( + total_timesteps=segment_steps, + reset_num_timesteps=False, # Continuous timestep counter across segments + callback=health_cb, + ) + steps_done += segment_steps + + # Quick segment reward estimate (run one short episode deterministically) + try: + seg_reward, _ = evaluate_policy( + model, env, + n_eval_episodes=1, + deterministic=True, + return_episode_rewards=False, + warn=False, + ) + log(f'[W3 Runner][TRAIN] track={track_name} segment_reward={seg_reward:.2f}') + segment_rewards.append((track_name, float(seg_reward))) + except Exception as e: + log(f'[W3 Runner][TRAIN] Segment eval failed: {e}') + segment_rewards.append((track_name, 0.0)) + + if steps_done >= total_timesteps: + break + + # Switch to next training track + next_track_idx = (track_idx + 1) % len(TRAINING_TRACKS) + next_track_name, next_env_id = TRAINING_TRACKS[next_track_idx] + log(f'[W3 Runner] Switching: {track_name} → {next_track_name}') + + try: + new_env = close_and_switch(env, next_env_id) + model.set_env(new_env) + env = new_env + track_idx = next_track_idx + except Exception as e: + log(f'[W3 Runner] ⚠️ Track switch failed: {e}. Retrying in 5s...') + time.sleep(5) + try: + new_env = close_and_switch(None, next_env_id) + model.set_env(new_env) + env = new_env + track_idx = next_track_idx + except Exception as e2: + log(f'[W3 Runner] ❌ Track switch retry failed: {e2}. Continuing on current track.') + # Stay on current track — don't crash the whole run + + log(f'\n[W3 Runner] Training complete: {steps_done:,} total steps across ' + f'{len(segment_rewards)} segments.') + return env, segment_rewards + + +# ---- Zero-shot evaluation on test tracks ---- +def evaluate_test_tracks(model, current_env, eval_episodes): + """ + Evaluate the trained model on each test track (zero-shot generalization). + + Switches to each test track, runs eval_episodes episodes, records + mean_reward and mean_steps. Closes test envs when done. + + Returns: + test_results: dict of {track_name: {'mean_reward': float, 'mean_steps': float}} + combined_test_score: sum of mean_rewards across test tracks + """ + log(f'\n[W3 Runner] ===== ZERO-SHOT EVALUATION on TEST tracks =====') + log(f' Test tracks : {[t[0] for t in TEST_TRACKS]}') + log(f' Eval episodes : {eval_episodes}') + + test_results = {} + env = current_env + + for track_name, track_env_id in TEST_TRACKS: + log(f'\n[W3 Runner] Switching to TEST track: {track_name}') + try: + env = close_and_switch(env, track_env_id) + except Exception as e: + log(f'[W3 Runner] ❌ Cannot connect to test track {track_name}: {e}') + test_results[track_name] = {'mean_reward': 0.0, 'mean_steps': 0.0, 'error': str(e)} + continue + + # Run episodes manually to capture step count + all_rewards = [] + all_steps = [] + + for ep in range(eval_episodes): + obs, info = env.reset() + total_reward = 0.0 + steps = 0 + done = False + + while not done and steps < 2000: + action, _ = model.predict(obs, deterministic=True) + result = env.step(action) + if len(result) == 5: + obs, reward, terminated, truncated, info = result + done = terminated or truncated + else: + obs, reward, done, info = result + total_reward += reward + steps += 1 + + all_rewards.append(total_reward) + all_steps.append(steps) + log(f'[W3 Runner] {track_name} ep{ep+1}: reward={total_reward:.1f} steps={steps}') + time.sleep(0.5) + + mean_reward = float(np.mean(all_rewards)) + mean_steps = float(np.mean(all_steps)) + drove_far = mean_steps > 200 + + test_results[track_name] = { + 'mean_reward': mean_reward, + 'mean_steps': mean_steps, + 'drove_far': drove_far, + } + verdict = '✅ DRIVES' if drove_far else '❌ CRASHES' + log(f'[W3 Runner][TEST] track={track_name} mean_reward={mean_reward:.2f} ' + f'mean_steps={mean_steps:.1f} {verdict}') + + # Combined score = sum of mean_rewards on test tracks + combined = sum(r['mean_reward'] for r in test_results.values()) + log(f'\n[W3 Runner][TEST] combined_test_score={combined:.4f}') + log(f'[W3 Runner][TEST] mini_monaco_reward=' + f'{test_results.get("mini_monaco", {}).get("mean_reward", 0.0):.4f}') + log(f'[W3 Runner][TEST] warren_reward=' + f'{test_results.get("warren", {}).get("mean_reward", 0.0):.4f}') + + return test_results, combined, env + + +# ---- Main ---- +def main(): + parser = argparse.ArgumentParser(description='Wave 3 Multi-Track PPO Trainer.') + parser.add_argument('--total-timesteps', type=int, default=200000, + help='Total training timesteps across all tracks (default: 200000)') + parser.add_argument('--steps-per-switch', type=int, default=10000, + help='Steps on each track before switching (default: 10000)') + parser.add_argument('--learning-rate', type=float, default=0.000225, + help='PPO learning rate (default: 0.000225 = Phase 2 champion)') + parser.add_argument('--warm-start', type=str, default=None, + help='Path to .zip model for warm start (default: models/champion/model.zip)') + parser.add_argument('--save-dir', type=str, default=None, + help='Directory to save trained model') + parser.add_argument('--eval-episodes', type=int, default=3, + help='Episodes per test track for zero-shot evaluation') + parser.add_argument('--seed', type=int, default=None, + help='Random seed') + parser.add_argument('--skip-eval', action='store_true', + help='Skip zero-shot evaluation (training only)') + args = parser.parse_args() + + # Resolve warm-start path + warm_start = args.warm_start + if warm_start is None: + default_champ = os.path.join(AGENT_DIR, 'models', 'champion', 'model.zip') + if os.path.exists(default_champ): + warm_start = default_champ + log(f'[W3 Runner] Auto-detected warm start: {warm_start}') + + save_dir = args.save_dir or os.path.join(AGENT_DIR, 'models', 'wave3', + f'trial-{int(time.time())}') + + log(f'[W3 Runner] === Wave 3 Multi-Track Training ===') + log(f'[W3 Runner] total_timesteps ={args.total_timesteps:,}') + log(f'[W3 Runner] steps_per_switch={args.steps_per_switch:,}') + log(f'[W3 Runner] learning_rate ={args.learning_rate:.6f}') + log(f'[W3 Runner] warm_start ={warm_start}') + log(f'[W3 Runner] save_dir ={save_dir}') + log(f'[W3 Runner] eval_episodes ={args.eval_episodes}') + + # ---- 1. Connect to first training track ---- + first_track_name, first_env_id = TRAINING_TRACKS[0] + log(f'\n[W3 Runner] Starting on first training track: {first_track_name} ({first_env_id})') + + env = None + try: + # Always send exit_scene first to ensure sim is at main menu + log(f'[W3 Runner] Sending exit_scene to clean sim state...') + send_exit_scene_raw() + time.sleep(EXIT_SCENE_WAIT) + + raw_env = gym.make(first_env_id) + env = wrap_env(raw_env) + log(f'[W3 Runner] ✅ Connected to {first_env_id}') + except Exception as e: + log(f'[W3 Runner] ❌ Failed to connect to first training track: {e}') + sys.exit(100) + + # ---- 2. Create or load model ---- + model = None + try: + model = create_or_load_model(env, args.learning_rate, warm_start, args.seed) + except Exception as e: + log(f'[W3 Runner] ❌ Model creation failed: {e}') + try: + env.close() + time.sleep(2) + except Exception: + pass + sys.exit(101) + + # ---- 3. Multi-track training ---- + try: + env, segment_rewards = train_multitrack( + model, env, + total_timesteps=args.total_timesteps, + steps_per_switch=args.steps_per_switch, + ) + except Exception as e: + log(f'[W3 Runner] ❌ Training failed: {e}') + try: + env.close() + time.sleep(2) + except Exception: + pass + sys.exit(101) + + # ---- 4. Save model ---- + # ADR-005: model is always defined before model.save() + try: + os.makedirs(save_dir, exist_ok=True) + save_path = os.path.join(save_dir, 'model') + model.save(save_path) + log(f'[W3 Runner] ✅ Model saved to {save_path}.zip') + except Exception as e: + log(f'[W3 Runner] ⚠️ Model save failed: {e}') + + # ---- 5. Zero-shot evaluation on test tracks ---- + combined_test_score = 0.0 + test_results = {} + + if not args.skip_eval: + try: + test_results, combined_test_score, env = evaluate_test_tracks( + model, env, args.eval_episodes + ) + except Exception as e: + log(f'[W3 Runner] ❌ Test evaluation failed: {e}') + # Ensure combined_test_score = 0 is recorded (trial still valid) + + # ---- 6. Print training summary ---- + log(f'\n[W3 Runner] ===== TRAINING SUMMARY =====') + if segment_rewards: + by_track = {} + for tname, rew in segment_rewards: + by_track.setdefault(tname, []).append(rew) + for tname, rewards in by_track.items(): + log(f'[W3 Runner][TRAIN] {tname}: ' + f'mean={np.mean(rewards):.1f} over {len(rewards)} segments') + + log(f'\n[W3 Runner] ===== TEST SUMMARY (zero-shot generalization) =====') + for tname, metrics in test_results.items(): + verdict = '✅ DRIVES' if metrics.get('drove_far') else '❌ CRASHES' + log(f'[W3 Runner][TEST] {tname}: ' + f'reward={metrics.get("mean_reward", 0):.1f} ' + f'steps={metrics.get("mean_steps", 0):.0f} {verdict}') + log(f'[W3 Runner][TEST] combined_test_score={combined_test_score:.4f}') + + # ---- 7. Teardown ---- + log(f'[W3 Runner] Closing final env...') + try: + env.close() + log(f'[W3 Runner] env.close() complete.') + except Exception as e: + log(f'[W3 Runner] Warning: env.close() raised: {e}') + time.sleep(2) # ADR-006 + + log(f'[W3 Runner] ✅ Multi-track runner complete. Exiting.') + + +if __name__ == '__main__': + main() diff --git a/agent/outerloop-results/autoresearch_phase2_log.txt b/agent/outerloop-results/autoresearch_phase2_log.txt index 35954ab..2c7cd04 100644 --- a/agent/outerloop-results/autoresearch_phase2_log.txt +++ b/agent/outerloop-results/autoresearch_phase2_log.txt @@ -489,3 +489,16 @@ [2026-04-14 09:28:23] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} [2026-04-14 09:28:23] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} [2026-04-14 09:28:23] [AutoResearch] Only 1 results — using random proposal. +[2026-04-14 12:45:34] [AutoResearch] GP UCB top-5 candidates: +[2026-04-14 12:45:34] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173} +[2026-04-14 12:45:34] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198} +[2026-04-14 12:45:34] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887} +[2026-04-14 12:45:34] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199} +[2026-04-14 12:45:34] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035} +[2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5} +[2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7} +[2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50} +[2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80} +[2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} +[2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} +[2026-04-14 12:45:34] [AutoResearch] Only 1 results — using random proposal. diff --git a/agent/outerloop-results/autoresearch_phase3_log.txt b/agent/outerloop-results/autoresearch_phase3_log.txt new file mode 100644 index 0000000..15083a3 --- /dev/null +++ b/agent/outerloop-results/autoresearch_phase3_log.txt @@ -0,0 +1,20 @@ +[2026-04-14 12:44:38] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 12:44:38] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 12:44:38] [Wave3] Only 0 results — using random proposal. +[2026-04-14 12:44:38] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-14 12:44:38] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={} +[2026-04-14 12:45:00] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 12:45:00] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 12:45:00] [Wave3] Only 0 results — using random proposal. +[2026-04-14 12:45:00] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-14 12:45:00] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={} +[2026-04-14 12:45:27] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 12:45:27] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 12:45:27] [Wave3] Only 0 results — using random proposal. +[2026-04-14 12:45:27] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-14 12:45:27] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={} +[2026-04-14 12:45:39] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 12:45:39] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 12:45:39] [Wave3] Only 0 results — using random proposal. +[2026-04-14 12:45:39] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-14 12:45:39] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={} diff --git a/agent/park_at_start.py b/agent/park_at_start.py new file mode 100644 index 0000000..dfd1476 --- /dev/null +++ b/agent/park_at_start.py @@ -0,0 +1,70 @@ +""" +Park at Start — loads a track and holds the car at the starting line for screenshots. + +Usage: + python3 park_at_start.py --track generated_road + python3 park_at_start.py --track generated_track + python3 park_at_start.py --track mountain_track + python3 park_at_start.py --track warehouse + python3 park_at_start.py --track sparkfun_avc + python3 park_at_start.py --track mini_monaco + python3 park_at_start.py --track warren + python3 park_at_start.py --track roboracingleague_1 + python3 park_at_start.py --track waveshare + python3 park_at_start.py --track circuit_launch + +The car will sit still at the starting line. +Press Ctrl+C when done with screenshot. Script will clean up automatically. +""" + +import time +import sys +import argparse +import gymnasium as gym +import gym_donkeycar +from track_switcher import switch_track + +TRACK_MAP = { + 'generated_road': 'donkey-generated-roads-v0', + 'generated_track': 'donkey-generated-track-v0', + 'mountain_track': 'donkey-mountain-track-v0', + 'warehouse': 'donkey-warehouse-v0', + 'sparkfun_avc': 'donkey-avc-sparkfun-v0', + 'mini_monaco': 'donkey-minimonaco-track-v0', + 'warren': 'donkey-warren-track-v0', + 'roboracingleague_1': 'donkey-roboracingleague-track-v0', + 'waveshare': 'donkey-waveshare-v0', + 'circuit_launch': 'donkey-circuit-launch-track-v0', +} + +parser = argparse.ArgumentParser() +parser.add_argument('--track', required=True, choices=list(TRACK_MAP.keys()), + help='Track name to load') +parser.add_argument('--current', default='donkey-generated-roads-v0', + help='Current env ID (to exit from)') +args = parser.parse_args() + +env_id = TRACK_MAP[args.track] +print(f'\n[Park] Loading track: {args.track} ({env_id})', flush=True) + +env = switch_track(target_env_id=env_id, current_env_id=args.current) +obs, info = env.reset() + +print(f'\n[Park] ✅ Car is at the starting line on: {args.track}', flush=True) +print(f'[Park] Take your screenshot now!', flush=True) +print(f'[Park] Press Ctrl+C when done.\n', flush=True) + +# Hold the car still with zero throttle/steer +try: + import numpy as np + while True: + action = env.action_space.sample() * 0.0 # all zeros = parked + env.step(action) + time.sleep(0.1) +except KeyboardInterrupt: + print('\n[Park] Exiting...', flush=True) +finally: + env.unwrapped.viewer.exit_scene() + time.sleep(1) + env.close() + print('[Park] Done. Sim returned to main menu.', flush=True) diff --git a/agent/wave3_controller.py b/agent/wave3_controller.py new file mode 100644 index 0000000..9caade1 --- /dev/null +++ b/agent/wave3_controller.py @@ -0,0 +1,524 @@ +""" +================================================================= +Wave 3 Autoresearch Controller — Multi-Track Generalization +================================================================= +GP+UCB Bayesian optimization over multi-track training hyperparameters. + +Goal: find hyperparameters that maximize ZERO-SHOT generalization — +the model must drive mini_monaco and warren without ever having trained +on them. Only the test score (combined_test_score) feeds the GP. + +Track split: + Training : generated_road, generated_track, mountain_track + Test (ZSL): mini_monaco, warren (never seen during training) + +Search space: + learning_rate — PPO learning rate [5e-5, 1e-3] + steps_per_switch — steps per track segment before switching [2000, 25000] + total_timesteps — total training budget [80000, 400000] + +Each trial: + 1. GP+UCB proposes hyperparameters + 2. Launches multitrack_runner.py (real PPO training across 3 tracks) + 3. Parses combined_test_score from stdout + 4. Updates GP with (hyperparams → test_score) mapping + 5. Updates champion if test_score improved + +Results: outerloop-results/autoresearch_results_phase3.jsonl +Champion: models/wave3-champion/model.zip + manifest.json + +Usage: + python3 wave3_controller.py --trials 25 --explore 2.0 --push-every 5 + +Stop with Ctrl+C at any time — resumes from existing results. +================================================================= +""" + +import os +import sys +import json +import time +import subprocess +import re +import shutil +import numpy as np +from datetime import datetime + +# ---- Paths ---- +PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) +REPO_ROOT = os.path.dirname(PROJECT_DIR) +RUNNER = os.path.join(PROJECT_DIR, 'multitrack_runner.py') +RESULTS_DIR = os.path.join(PROJECT_DIR, 'outerloop-results') +MODELS_DIR = os.path.join(PROJECT_DIR, 'models') +CHAMPION_DIR = os.path.join(MODELS_DIR, 'wave3-champion') + +RESULTS_FILE = os.path.join(RESULTS_DIR, 'autoresearch_results_phase3.jsonl') +LOG_FILE = os.path.join(RESULTS_DIR, 'autoresearch_phase3_log.txt') + +WARM_START = os.path.join(MODELS_DIR, 'champion', 'model.zip') # Phase 2 champion + +os.makedirs(RESULTS_DIR, exist_ok=True) +os.makedirs(MODELS_DIR, exist_ok=True) +os.makedirs(CHAMPION_DIR, exist_ok=True) + +# ---- Hyperparameter search space ---- +PARAM_SPACE = { + 'learning_rate': {'type': 'float', 'min': 5e-5, 'max': 1e-3}, + 'steps_per_switch': {'type': 'int', 'min': 2000, 'max': 25000}, + 'total_timesteps': {'type': 'int', 'min': 80000, 'max': 400000}, +} +PARAM_KEYS = list(PARAM_SPACE.keys()) + +FIXED_PARAMS = { + 'eval_episodes': 3, +} + +N_CANDIDATES = 500 +UCB_KAPPA = 2.0 +MIN_TRIALS_BEFORE_GP = 3 +JOB_TIMEOUT = 7200 # 2h — 400k steps on CPU may need time + +# ---- Seed trials near Phase 2 champion ---- +# GP warm-up: first 2 trials use known-good parameters so GP has real prior data +SEED_PARAMS = [ + # Phase 2 champion settings — warm-start, moderate switching + {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 150000}, + # Higher timesteps, less frequent switching — encourage deeper learning per track + {'learning_rate': 0.000225, 'steps_per_switch': 20000, 'total_timesteps': 300000}, +] + + +# ---- Logging ---- +def log(msg): + ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + line = f'[{ts}] {msg}' + print(line, flush=True) + with open(LOG_FILE, 'a') as f: + f.write(line + '\n') + + +# ---- Parameter encoding ---- +def encode_params(params): + vec = [] + for k in PARAM_KEYS: + if k not in params: + vec.append(0.5) + continue + spec = PARAM_SPACE[k] + v = params[k] + norm = (v - spec['min']) / (spec['max'] - spec['min']) + vec.append(float(np.clip(norm, 0.0, 1.0))) + return np.array(vec) + + +def decode_params(vec): + params = {} + for i, k in enumerate(PARAM_KEYS): + spec = PARAM_SPACE[k] + v = float(vec[i]) * (spec['max'] - spec['min']) + spec['min'] + if spec['type'] == 'int': + v = int(round(v)) + v = max(spec['min'], min(spec['max'], v)) + else: + v = float(np.clip(v, spec['min'], spec['max'])) + params[k] = v + return params + + +def random_candidate(): + return np.random.uniform(0, 1, len(PARAM_KEYS)) + + +# ---- Gaussian Process ---- +class TinyGP: + """Minimal RBF-kernel GP for surrogate modelling (pure numpy, no sklearn).""" + + def __init__(self, length_scale=0.3, noise=1e-3): + self.ls = length_scale + self.noise = noise + self.X = None + self.alpha = None + self.K_inv = None + + def _rbf(self, X1, X2): + diff = X1[:, np.newaxis, :] - X2[np.newaxis, :, :] + sq = np.sum(diff ** 2, axis=-1) + return np.exp(-sq / (2 * self.ls ** 2)) + + def fit(self, X, y): + self.X = np.array(X) + n = len(y) + K = self._rbf(self.X, self.X) + self.noise * np.eye(n) + try: + self.K_inv = np.linalg.inv(K) + except np.linalg.LinAlgError: + self.K_inv = np.linalg.pinv(K) + self.alpha = self.K_inv @ np.array(y) + + def predict(self, X_new): + X_new = np.atleast_2d(X_new) + K_s = self._rbf(X_new, self.X) + mu = K_s @ self.alpha + var = np.maximum( + 1.0 + self.noise - np.sum((K_s @ self.K_inv) * K_s, axis=1), + 1e-9 + ) + return mu, np.sqrt(var) + + +# ---- Champion tracker ---- +class Wave3ChampionTracker: + def __init__(self, champion_dir): + self.champion_dir = champion_dir + self.manifest_path = os.path.join(champion_dir, 'manifest.json') + os.makedirs(champion_dir, exist_ok=True) + self._best = self._load() + + def _load(self): + if os.path.exists(self.manifest_path): + try: + with open(self.manifest_path) as f: + return json.load(f) + except Exception: + pass + return {'combined_test_score': float('-inf'), 'trial': None} + + @property + def best_score(self): + return self._best.get('combined_test_score', float('-inf')) + + def update_if_better(self, score, params, model_zip_path, trial, + mini_monaco_reward=None, warren_reward=None): + if score <= self.best_score: + return False + + dest = os.path.join(self.champion_dir, 'model.zip') + if model_zip_path and os.path.exists(model_zip_path): + try: + shutil.copy2(model_zip_path, dest) + except Exception as e: + log(f'[Champion] WARNING: copy failed: {e}') + dest = model_zip_path + + manifest = { + 'trial': trial, + 'timestamp': datetime.now().isoformat(), + 'params': params, + 'combined_test_score': score, + 'mini_monaco_reward': mini_monaco_reward, + 'warren_reward': warren_reward, + 'model_path': dest, + } + with open(self.manifest_path, 'w') as f: + json.dump(manifest, f, indent=2) + self._best = manifest + log(f'[Champion] 🏆 NEW BEST! Trial {trial}: ' + f'combined={score:.2f} ' + f'(mini_monaco={mini_monaco_reward:.1f}, warren={warren_reward:.1f}) ' + f'params={params}') + return True + + def summary(self): + if self._best['trial'] is None: + return 'No Wave 3 champion yet.' + return (f"Wave3 Champion: trial={self._best['trial']} " + f"combined={self._best['combined_test_score']:.2f} " + f"params={self._best['params']}") + + +# ---- Load existing results ---- +def load_results(): + results = [] + if not os.path.exists(RESULTS_FILE): + return results + with open(RESULTS_FILE) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + rec = json.loads(line) + score = rec.get('combined_test_score') + if score is not None: + results.append({ + 'params': rec['params'], + 'combined_test_score': float(score), + }) + except Exception: + pass + return results + + +# ---- GP+UCB proposal ---- +def propose_next_params(results, trial_num, kappa=UCB_KAPPA): + """ + For the first SEED_PARAMS trials: use the hardcoded seed list. + Once GP has enough data: use GP+UCB to propose the next parameters. + """ + # Seed phase — use known-good starting points + seed_idx = trial_num - 1 # trial_num is 1-indexed + if seed_idx < len(SEED_PARAMS): + log(f'[Wave3] Seed trial {trial_num}/{len(SEED_PARAMS)}: using hardcoded params.') + return dict(SEED_PARAMS[seed_idx]) + + # Not enough data for GP yet — random exploration + if len(results) < MIN_TRIALS_BEFORE_GP: + log(f'[Wave3] Only {len(results)} results — using random proposal.') + return decode_params(random_candidate()) + + # GP+UCB + X = np.array([encode_params(r['params']) for r in results]) + y = np.array([r['combined_test_score'] for r in results]) + y_mean = y.mean() + y_std = y.std() if y.std() > 0 else 1.0 + y_norm = (y - y_mean) / y_std + + gp = TinyGP(length_scale=0.3, noise=1e-3) + gp.fit(X, y_norm) + + candidates = np.random.uniform(0, 1, (N_CANDIDATES, len(PARAM_KEYS))) + mu, sigma = gp.predict(candidates) + ucb = mu + kappa * sigma + + top5_idx = np.argsort(ucb)[-5:][::-1] + log(f'[Wave3] GP UCB top-5 proposals:') + for idx in top5_idx: + p = decode_params(candidates[idx]) + log(f' UCB={ucb[idx]:.4f} mu={mu[idx]:.4f} σ={sigma[idx]:.4f} params={p}') + + return decode_params(candidates[np.argmax(ucb)]) + + +# ---- Utility: parse multitrack_runner output ---- +def parse_runner_output(output): + """ + Extract test metrics from multitrack_runner.py stdout. + + Looks for: + [W3 Runner][TEST] combined_test_score= + [W3 Runner][TEST] mini_monaco_reward= + [W3 Runner][TEST] warren_reward= + """ + combined = None + mini_monaco = None + warren_rwd = None + + m = re.search(r'\[W3 Runner\]\[TEST\]\s+combined_test_score=([+-]?[\d.]+)', output) + if m: + combined = float(m.group(1)) + + m = re.search(r'\[W3 Runner\]\[TEST\]\s+mini_monaco_reward=([+-]?[\d.]+)', output) + if m: + mini_monaco = float(m.group(1)) + + m = re.search(r'\[W3 Runner\]\[TEST\]\s+warren_reward=([+-]?[\d.]+)', output) + if m: + warren_rwd = float(m.group(1)) + + return combined, mini_monaco, warren_rwd + + +# ---- Job launcher ---- +def kill_stale(): + """Kill any zombie multitrack_runner or donkeycar_sb3_runner processes.""" + subprocess.run(['pkill', '-9', '-f', 'multitrack_runner.py'], check=False) + subprocess.run(['pkill', '-9', '-f', 'donkeycar_sb3_runner.py'], check=False) + time.sleep(2) + + +def launch_trial(params, trial_num): + """ + Launch multitrack_runner.py as a subprocess with the given hyperparameters. + Returns: (combined_test_score, mini_monaco_reward, warren_reward, + model_zip_path, output, status, elapsed_sec, save_dir) + """ + save_dir = os.path.join(MODELS_DIR, f'wave3-trial-{trial_num:04d}') + os.makedirs(save_dir, exist_ok=True) + + cmd = [ + 'python3', RUNNER, + '--total-timesteps', str(int(params['total_timesteps'])), + '--steps-per-switch', str(int(params['steps_per_switch'])), + '--learning-rate', str(float(params['learning_rate'])), + '--eval-episodes', str(FIXED_PARAMS['eval_episodes']), + '--save-dir', save_dir, + ] + # Always warm-start from Phase 2 champion if available + if os.path.exists(WARM_START): + cmd += ['--warm-start', WARM_START] + + log(f'[Wave3] Launching trial {trial_num}: {params}') + log(f'[Wave3] Command: {" ".join(cmd)}') + start = time.time() + + try: + proc = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=JOB_TIMEOUT, + ) + elapsed = time.time() - start + output = proc.stdout + '\n' + proc.stderr + status = 'ok' if proc.returncode == 0 else f'error_rc{proc.returncode}' + log(f'[Wave3] Trial {trial_num} finished in {elapsed:.1f}s, rc={proc.returncode}') + except subprocess.TimeoutExpired: + elapsed = time.time() - start + output = f'[TIMEOUT after {elapsed:.1f}s]' + status = 'timeout' + log(f'[Wave3] Trial {trial_num} TIMED OUT after {elapsed:.1f}s') + + # Always print tail of output + print('\n--- Multitrack Runner Output (tail) ---', flush=True) + print(output[-3000:], flush=True) + print('--- End Runner Output ---\n', flush=True) + + # Parse results + combined, mini_monaco, warren_rwd = parse_runner_output(output) + log(f'[Wave3] Parsed: combined={combined} mini_monaco={mini_monaco} warren={warren_rwd}') + + model_zip = os.path.join(save_dir, 'model.zip') + if not os.path.exists(model_zip): + model_zip = None + + return combined, mini_monaco, warren_rwd, model_zip, output, status, elapsed, save_dir + + +# ---- Result saving ---- +def save_result(trial, params, combined, mini_monaco, warren_rwd, + model_path, is_champion, status, elapsed): + rec = { + 'trial': trial, + 'timestamp': datetime.now().isoformat(), + 'params': params, + 'combined_test_score': combined, + 'mini_monaco_reward': mini_monaco, + 'warren_reward': warren_rwd, + 'model_path': model_path, + 'champion': is_champion, + 'run_status': status, + 'elapsed_sec': elapsed, + } + with open(RESULTS_FILE, 'a') as f: + f.write(json.dumps(rec) + '\n') + + +# ---- Git push ---- +def git_push(trial_num): + try: + subprocess.run(['git', '-C', REPO_ROOT, 'add', '-A'], + check=True, capture_output=True) + subprocess.run([ + 'git', '-C', REPO_ROOT, 'commit', '-m', + f'wave3: autoresearch trial {trial_num} results\n\n' + f'Agent: pi\nTests: N/A\nTests-Added: 0\nTypeScript: N/A' + ], check=True, capture_output=True) + subprocess.run(['git', '-C', REPO_ROOT, 'push'], + check=True, capture_output=True) + log(f'[Wave3] ✅ Git push complete after trial {trial_num}') + except subprocess.CalledProcessError as e: + log(f'[Wave3] ⚠️ Git push failed: {e}') + + +# ---- Summary ---- +def print_summary(results, champion, trial): + if not results: + return + log(f'[Wave3] ===== Trial {trial} Summary =====') + log(f' GP data points : {len(results)}') + log(f' {champion.summary()}') + sorted_r = sorted(results, key=lambda r: r['combined_test_score'], reverse=True) + log(f' Top 5:') + for r in sorted_r[:5]: + log(f' score={r["combined_test_score"]:.2f} params={r["params"]}') + + +# ---- Main loop ---- +def run_wave3(max_trials=25, kappa=UCB_KAPPA, push_every=5): + log('=' * 65) + log('[Wave3] Multi-Track Autoresearch — GP+UCB Generalization Search') + log(f'[Wave3] Training tracks : generated_road, generated_track, mountain_track') + log(f'[Wave3] Test tracks : mini_monaco, warren (zero-shot)') + log(f'[Wave3] Max trials : {max_trials} | kappa={kappa} | push every {push_every}') + log(f'[Wave3] Results file : {RESULTS_FILE}') + log(f'[Wave3] Champion dir : {CHAMPION_DIR}') + log(f'[Wave3] Warm start : {WARM_START}') + log('=' * 65) + + results = load_results() + champion = Wave3ChampionTracker(CHAMPION_DIR) + + log(f'[Wave3] Loaded {len(results)} existing Phase 3 results.') + log(f'[Wave3] {champion.summary()}') + + # Determine starting trial number (resume from existing results) + start_trial = len(results) + 1 + log(f'[Wave3] Starting from trial {start_trial}.') + + for trial in range(start_trial, max_trials + 1): + log(f'\n[Wave3] ========== Trial {trial}/{max_trials} ==========') + + # 1. Propose parameters + proposed = propose_next_params(results, trial, kappa=kappa) + full_params = {**proposed, **FIXED_PARAMS} + log(f'[Wave3] Proposed params: {proposed}') + + # 2. Kill stale processes + kill_stale() + + # 3. Launch training + eval + combined, mini_monaco, warren_rwd, model_zip, output, status, elapsed, save_dir = \ + launch_trial(proposed, trial) + + # 4. Guard against None results (timeout / crash) + if combined is None: + log(f'[Wave3] ⚠️ No test score parsed — defaulting to 0.0') + combined = 0.0 + mini_monaco = mini_monaco or 0.0 + warren_rwd = warren_rwd or 0.0 + + # 5. Update champion + is_champion = champion.update_if_better( + combined, proposed, model_zip, trial, + mini_monaco_reward=mini_monaco or 0.0, + warren_reward=warren_rwd or 0.0, + ) + + # 6. Save result + save_result(trial, proposed, combined, mini_monaco, warren_rwd, + model_zip, is_champion, status, elapsed) + + # 7. Update GP data + if combined > 0: + # Only add valid runs to GP (zero means crash/timeout — not useful) + results.append({'params': proposed, 'combined_test_score': combined}) + else: + log(f'[Wave3] combined_test_score=0 — excluded from GP (crash/timeout).') + + # 8. Summary + print_summary(results, champion, trial) + + # 9. Periodic git push + if push_every > 0 and trial % push_every == 0: + git_push(trial) + + time.sleep(2) + + log(f'\n[Wave3] ===== All {max_trials} trials complete! =====') + print_summary(results, champion, trial=max_trials) + git_push(max_trials) + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser( + description='Wave 3: GP+UCB autoresearch for multi-track generalization.') + parser.add_argument('--trials', type=int, default=25, + help='Number of trials (default: 25)') + parser.add_argument('--explore', type=float, default=2.0, + help='UCB kappa — higher = more exploration (default: 2.0)') + parser.add_argument('--push-every', type=int, default=5, + help='Git push every N trials (0=disabled)') + args = parser.parse_args() + + run_wave3(max_trials=args.trials, kappa=args.explore, push_every=args.push_every) diff --git a/docs/RESEARCH_LOG.md b/docs/RESEARCH_LOG.md index f8b2bec..8daf73a 100644 --- a/docs/RESEARCH_LOG.md +++ b/docs/RESEARCH_LOG.md @@ -502,3 +502,70 @@ Stage 3 — Any track: **Domain randomisation:** Even within a single track, the generated_road creates different layouts each episode. This natural randomisation is already helping — but we need visual diversity too. **Key hyperparameter change for Wave 3:** Increase timesteps significantly (50k-200k per trial) to give the model enough experience on multiple tracks. The model needs to see each track many times to learn track-agnostic driving features. + +--- + +## 2026-04-12 — Wave 3 Launch: Multi-Track Training + Visual Analysis + +### Finding: Track Visual Classification (from screenshots) + +**Observation:** Examined all 10 available DonkeyCar track screenshots at the starting line. + +**Outdoor tracks (same domain — sky, asphalt, lane markings):** +| Track | Road Surface | Markings | Background | Training Role | +|-------|-------------|----------|------------|---------------| +| Generated Road ⭐ | Grey smooth asphalt | Yellow centre + white edge | Bare desert | TRAINED | +| Generated Track | Same grey asphalt | Yellow centre, orange cones | Trees + grass | TRAIN | +| Mountain Track | Darker/wet asphalt | Yellow centre, barriers | Trees + mountains | TRAIN | +| Mini Monaco | Grey asphalt | Yellow centre + white edge | Trees + chain-link fence | TEST (zero-shot) | +| Warren | White painted lines on grass | Yellow dashes | Indoor tent, outdoor setting | TEST (zero-shot) | +| AVC Sparkfun | Cracked rough asphalt | **Orange** markings | Outdoor but very different | SKIP (too different) | + +**Indoor tracks (completely different domain — carpet/floor surface):** +- Warehouse (yellow floor), Robo Racing League (office interior), Waveshare (desktop mat), + Circuit Launch (convention hall) — all SKIP for now + +**Key insight on Warren:** Although technically under a tent shelter, Warren has proper road-style track geometry with white lane lines and yellow centre dashes, similar to outdoor road tracks. It was classified as a pseudo-outdoor track and included in the zero-shot test set (not indoor skip category). + +**Key insight on Robo Racing League 116-step anomaly:** NOT visual similarity — the indoor office track looks nothing like generated_road. More likely the episode boundary tolerance was different, allowing the car to wander longer before triggering `done=True`. + +### Decision: Wave 3 Track Split + +- **Training set (seen during training):** generated_road, generated_track, mountain_track +- **Test set (zero-shot generalization benchmark):** mini_monaco, warren +- **Metric:** `combined_test_score = mini_monaco_mean_reward + warren_mean_reward` + +This mirrors Will Roscoe's approach: train on multiple similar tracks, test on held-out track. + +### Implementation: Wave 3 Autoresearch System + +New files: +- `agent/multitrack_runner.py` — Inner training loop: round-robin across 3 training tracks, + warm-starts from Phase 2 champion, evaluates on test tracks +- `agent/wave3_controller.py` — GP+UCB outer loop: optimises for zero-shot test score +- `tests/test_wave3.py` — 30 new tests (83 total, all passing) + +**Track switching mechanism:** `close_and_switch()`: +1. `env.close()` + `time.sleep(2)` [ADR-006] +2. `send_exit_scene_raw()` + 4s wait +3. `gym.make(next_env_id)` + apply wrappers + +**Training strategy (round-robin):** With steps_per_switch=10000 and 3 tracks, the model +rotates: generated_road → generated_track → mountain_track → generated_road → ... +Each track gets roughly equal time. GP can tune steps_per_switch to change rotation rate. + +**GP+UCB parameter space:** +- `learning_rate`: [5e-5, 1e-3] — centred near Phase 2 champion (2.25e-4) +- `steps_per_switch`: [2000, 25000] — how long to stay on each track +- `total_timesteps`: [80000, 400000] — total training budget + +**Seed trials:** First 2 trials use hardcoded params to bootstrap the GP: +1. lr=2.25e-4, switch=10k, total=150k (near Phase 2 champion) +2. lr=2.25e-4, switch=20k, total=300k (longer, less frequent switching) + +**Warm-start:** All Wave 3 trials warm-start from `models/champion/model.zip` (Phase 2 +champion Trial 20), which already knows how to drive generated_road. This dramatically +speeds up training — the model starts from a working policy, not from scratch. + +**Pre-Wave 3 baseline:** 1/10 tracks drivable (0/2 test tracks) +**Wave 3 goal:** Both test tracks drivable (mini_monaco + warren) — 2/2 held-out tracks diff --git a/docs/track-screenshots/track-01-generated-road.png b/docs/track-screenshots/track-01-generated-road.png new file mode 100644 index 0000000..851e4d4 Binary files /dev/null and b/docs/track-screenshots/track-01-generated-road.png differ diff --git a/docs/track-screenshots/track-02-generated-track.png b/docs/track-screenshots/track-02-generated-track.png new file mode 100644 index 0000000..9cda5cb Binary files /dev/null and b/docs/track-screenshots/track-02-generated-track.png differ diff --git a/docs/track-screenshots/track-03-mountain-track.png b/docs/track-screenshots/track-03-mountain-track.png new file mode 100644 index 0000000..fc00f93 Binary files /dev/null and b/docs/track-screenshots/track-03-mountain-track.png differ diff --git a/docs/track-screenshots/track-04-warehouse.png b/docs/track-screenshots/track-04-warehouse.png new file mode 100644 index 0000000..9c5cb33 Binary files /dev/null and b/docs/track-screenshots/track-04-warehouse.png differ diff --git a/docs/track-screenshots/track-05-avc-sparkfun.png b/docs/track-screenshots/track-05-avc-sparkfun.png new file mode 100644 index 0000000..d446543 Binary files /dev/null and b/docs/track-screenshots/track-05-avc-sparkfun.png differ diff --git a/docs/track-screenshots/track-06-mini-monaco.png b/docs/track-screenshots/track-06-mini-monaco.png new file mode 100644 index 0000000..7d71587 Binary files /dev/null and b/docs/track-screenshots/track-06-mini-monaco.png differ diff --git a/docs/track-screenshots/track-07-warren.png b/docs/track-screenshots/track-07-warren.png new file mode 100644 index 0000000..5baf4d1 Binary files /dev/null and b/docs/track-screenshots/track-07-warren.png differ diff --git a/docs/track-screenshots/track-08-robo-racing-league.png b/docs/track-screenshots/track-08-robo-racing-league.png new file mode 100644 index 0000000..d4f57d1 Binary files /dev/null and b/docs/track-screenshots/track-08-robo-racing-league.png differ diff --git a/docs/track-screenshots/track-09-waveshare.png b/docs/track-screenshots/track-09-waveshare.png new file mode 100644 index 0000000..c8698b8 Binary files /dev/null and b/docs/track-screenshots/track-09-waveshare.png differ diff --git a/docs/track-screenshots/track-10-circuit-launch.png b/docs/track-screenshots/track-10-circuit-launch.png new file mode 100644 index 0000000..9b55afe Binary files /dev/null and b/docs/track-screenshots/track-10-circuit-launch.png differ diff --git a/tests/test_wave3.py b/tests/test_wave3.py new file mode 100644 index 0000000..d744675 --- /dev/null +++ b/tests/test_wave3.py @@ -0,0 +1,536 @@ +""" +Tests for multitrack_runner.py and wave3_controller.py — no live simulator required. +Uses mocked gym environments and subprocess output. +""" + +import os +import sys +import json +import tempfile +import pytest +import numpy as np +import gymnasium as gym +from unittest.mock import patch, MagicMock, call + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'agent')) + + +# ───────────────────────────────────────────────────────────────────────────── +# Shared mock environment +# ───────────────────────────────────────────────────────────────────────────── + +class MockGymEnv(gym.Env): + """Minimal mock of a DonkeyCar environment (same as other test files).""" + metadata = {'render_modes': []} + + def __init__(self, max_steps=30): + super().__init__() + self.observation_space = gym.spaces.Box( + low=0, high=255, shape=(80, 160, 3), dtype=np.uint8 + ) + self.action_space = gym.spaces.Box( + low=np.array([-1.0, 0.2]), + high=np.array([1.0, 1.0]), + dtype=np.float32 + ) + self._step_count = 0 + self._max_steps = max_steps + self._closed = False + + def reset(self, seed=None, **kwargs): + self._step_count = 0 + return np.zeros((80, 160, 3), dtype=np.uint8), {} + + def step(self, action): + self._step_count += 1 + obs = np.random.randint(0, 255, (80, 160, 3), dtype=np.uint8) + reward = float(np.random.uniform(0.5, 2.0)) + terminated = self._step_count >= self._max_steps + info = {'speed': 2.0, 'cte': 0.3, 'pos': [0.0, 0.0, float(self._step_count)]} + return obs, reward, terminated, False, info + + def close(self): + self._closed = True + + +# ───────────────────────────────────────────────────────────────────────────── +# multitrack_runner — module-level tests +# ───────────────────────────────────────────────────────────────────────────── + +def test_multitrack_runner_no_syntax_errors(): + """multitrack_runner.py must compile without syntax errors.""" + path = os.path.join(os.path.dirname(__file__), '..', 'agent', 'multitrack_runner.py') + with open(path) as f: + src = f.read() + compile(src, path, 'exec') # raises SyntaxError if broken + + +def test_multitrack_runner_training_tracks_defined(): + """TRAINING_TRACKS must contain exactly 3 entries with valid env IDs.""" + from multitrack_runner import TRAINING_TRACKS + assert len(TRAINING_TRACKS) == 3 + names = [t[0] for t in TRAINING_TRACKS] + envids = [t[1] for t in TRAINING_TRACKS] + assert 'generated_road' in names + assert 'generated_track' in names + assert 'mountain_track' in names + for eid in envids: + assert eid.startswith('donkey-'), f'Unexpected env ID: {eid}' + + +def test_multitrack_runner_test_tracks_defined(): + """TEST_TRACKS must contain exactly 2 entries: mini_monaco and warren.""" + from multitrack_runner import TEST_TRACKS + assert len(TEST_TRACKS) == 2 + names = [t[0] for t in TEST_TRACKS] + assert 'mini_monaco' in names + assert 'warren' in names + + +def test_multitrack_runner_no_model_save_before_definition(): + """ADR-005: model.save() must never appear before model is defined.""" + path = os.path.join(os.path.dirname(__file__), '..', 'agent', 'multitrack_runner.py') + with open(path) as f: + source = f.read() + lines = source.split('\n') + model_defined_at = None + in_docstring = False + for i, line in enumerate(lines): + stripped = line.strip() + # Toggle triple-quote docstring state + if stripped.count('"""') % 2 == 1: + in_docstring = not in_docstring + continue + if in_docstring or stripped.startswith('#'): + continue + if 'model = PPO(' in line or 'model = create_or_load_model' in line: + model_defined_at = i + if 'model.save(' in line and model_defined_at is None: + pytest.fail(f'model.save() before model defined at line {i+1}: {line}') + + +def test_wrap_env_applies_throttle_clamp(): + """wrap_env() should apply ThrottleClampWrapper so throttle low bound = 0.2.""" + from multitrack_runner import wrap_env + raw = MockGymEnv() + wrapped = wrap_env(raw) + # Action space low[1] should be 0.2 (throttle min) + assert float(wrapped.action_space.low[1]) == pytest.approx(0.2) + + +def test_wrap_env_returns_valid_action_space(): + """Wrapped env should have 2D continuous action space.""" + from multitrack_runner import wrap_env + raw = MockGymEnv() + wrapped = wrap_env(raw) + assert hasattr(wrapped.action_space, 'shape') + assert wrapped.action_space.shape == (2,) + + +def test_create_or_load_model_no_warm_start(): + """Without warm-start path, create_or_load_model() returns a fresh PPO.""" + from multitrack_runner import create_or_load_model + mock_env = MockGymEnv() + + with patch('multitrack_runner.PPO') as MockPPO: + mock_model = MagicMock() + MockPPO.return_value = mock_model + result = create_or_load_model(mock_env, learning_rate=0.0003, + warm_start_path=None) + MockPPO.assert_called_once() + assert result is mock_model + + +def test_create_or_load_model_missing_warm_start_falls_back(): + """If warm-start file does not exist, should create fresh model.""" + from multitrack_runner import create_or_load_model + mock_env = MockGymEnv() + + with patch('multitrack_runner.PPO') as MockPPO: + mock_model = MagicMock() + MockPPO.return_value = mock_model + result = create_or_load_model( + mock_env, learning_rate=0.001, + warm_start_path='/nonexistent/path/model.zip' + ) + MockPPO.assert_called_once() + assert result is mock_model + + +def test_create_or_load_model_warm_start_load_failure_falls_back(): + """If PPO.load() raises, create_or_load_model() falls back to fresh PPO.""" + from multitrack_runner import create_or_load_model + mock_env = MockGymEnv() + + with tempfile.NamedTemporaryFile(suffix='.zip') as f: + warm_path = f.name + + # Create a dummy file so os.path.exists() returns True + with open(warm_path, 'w') as f: + f.write('not a real model') + + try: + with patch('multitrack_runner.PPO') as MockPPO: + MockPPO.load.side_effect = Exception('incompatible obs space') + fresh_model = MagicMock() + MockPPO.return_value = fresh_model + + result = create_or_load_model(mock_env, learning_rate=0.001, + warm_start_path=warm_path) + # Should fall back to fresh PPO + MockPPO.assert_called_once() + finally: + if os.path.exists(warm_path): + os.remove(warm_path) + + +def test_close_and_switch_calls_env_close(): + """close_and_switch() must call env.close() on the old env (ADR-006).""" + from multitrack_runner import close_and_switch + old_env = MockGymEnv() + + with patch('multitrack_runner.send_exit_scene_raw', return_value=True), \ + patch('multitrack_runner.gym.make', return_value=MockGymEnv()), \ + patch('multitrack_runner.wrap_env', side_effect=lambda e: e), \ + patch('time.sleep'): + close_and_switch(old_env, 'donkey-generated-track-v0', verbose=False) + + assert old_env._closed, 'env.close() should have been called before track switch' + + +def test_close_and_switch_returns_new_env(): + """close_and_switch() should return a new wrapped env.""" + from multitrack_runner import close_and_switch + new_env = MockGymEnv() + + with patch('multitrack_runner.send_exit_scene_raw', return_value=True), \ + patch('multitrack_runner.gym.make', return_value=new_env), \ + patch('multitrack_runner.wrap_env', side_effect=lambda e: e), \ + patch('time.sleep'): + result = close_and_switch(None, 'donkey-generated-track-v0', verbose=False) + + assert result is new_env + + +def test_evaluate_test_tracks_returns_dict_with_track_keys(): + """evaluate_test_tracks() should return a dict keyed by track names.""" + from multitrack_runner import evaluate_test_tracks, TEST_TRACKS + + mock_model = MagicMock() + mock_model.predict.return_value = (np.array([0.0, 0.5]), None) + + new_env = MockGymEnv(max_steps=10) + + with patch('multitrack_runner.close_and_switch', return_value=new_env), \ + patch('time.sleep'): + test_results, combined, _ = evaluate_test_tracks( + mock_model, current_env=MockGymEnv(), eval_episodes=1 + ) + + track_names = [t[0] for t in TEST_TRACKS] + for name in track_names: + assert name in test_results, f'Missing test result for {name}' + assert isinstance(combined, float) + + +def test_evaluate_test_tracks_combined_score_is_sum(): + """combined_test_score should equal the sum of individual test track rewards.""" + from multitrack_runner import evaluate_test_tracks + + mock_model = MagicMock() + mock_model.predict.return_value = (np.array([0.0, 0.5]), None) + + new_env = MockGymEnv(max_steps=5) + + with patch('multitrack_runner.close_and_switch', return_value=new_env), \ + patch('time.sleep'): + test_results, combined, _ = evaluate_test_tracks( + mock_model, current_env=MockGymEnv(), eval_episodes=2 + ) + + expected = sum(r['mean_reward'] for r in test_results.values()) + assert combined == pytest.approx(expected, rel=1e-5) + + +def test_evaluate_test_tracks_connection_failure_gives_zero(): + """If we cannot connect to a test track, its score should be 0.0.""" + from multitrack_runner import evaluate_test_tracks + + mock_model = MagicMock() + + with patch('multitrack_runner.close_and_switch', side_effect=Exception('sim dead')), \ + patch('time.sleep'): + test_results, combined, _ = evaluate_test_tracks( + mock_model, current_env=MockGymEnv(), eval_episodes=1 + ) + + for metrics in test_results.values(): + assert metrics['mean_reward'] == 0.0 + assert combined == 0.0 + + +# ───────────────────────────────────────────────────────────────────────────── +# wave3_controller — unit tests +# ───────────────────────────────────────────────────────────────────────────── + +def test_wave3_controller_no_syntax_errors(): + """wave3_controller.py must compile without syntax errors.""" + path = os.path.join(os.path.dirname(__file__), '..', 'agent', 'wave3_controller.py') + with open(path) as f: + src = f.read() + compile(src, path, 'exec') + + +def test_wave3_encode_decode_round_trip(): + """encode_params → decode_params should round-trip within ±5% for each param.""" + from wave3_controller import encode_params, decode_params, PARAM_SPACE + + original = { + 'learning_rate': 0.000225, + 'steps_per_switch': 10000, + 'total_timesteps': 200000, + } + vec = encode_params(original) + recovered = decode_params(vec) + + for k in original: + spec = PARAM_SPACE[k] + rng = spec['max'] - spec['min'] + assert abs(recovered[k] - original[k]) < 0.05 * rng, \ + f'Round-trip error for {k}: {original[k]} → {recovered[k]}' + + +def test_wave3_decode_clamps_to_bounds(): + """decode_params() should clamp output to [min, max] even if vec is out-of-range.""" + from wave3_controller import decode_params, PARAM_SPACE + + # vec values outside [0,1] + vec = np.array([-0.5, 1.5, 2.0]) + params = decode_params(vec) + + for k, v in params.items(): + spec = PARAM_SPACE[k] + assert spec['min'] <= v <= spec['max'], \ + f'{k}={v} outside [{spec["min"]}, {spec["max"]}]' + + +def test_wave3_tinygp_predict_shape(): + """TinyGP.predict() should return (mu, sigma) with shape (N,) for N candidates.""" + from wave3_controller import TinyGP + + gp = TinyGP() + X_train = np.random.rand(5, 3) + y_train = np.random.rand(5) + gp.fit(X_train, y_train) + + X_test = np.random.rand(10, 3) + mu, sigma = gp.predict(X_test) + + assert mu.shape == (10,), f'Expected (10,), got {mu.shape}' + assert sigma.shape == (10,), f'Expected (10,), got {sigma.shape}' + assert np.all(sigma >= 0), 'Sigma must be non-negative' + + +def test_wave3_tinygp_ucb_selects_high_value(): + """GP should assign higher UCB to regions near high-reward training points.""" + from wave3_controller import TinyGP + + gp = TinyGP(length_scale=0.2) + # Point at 0.9 has reward 10, point at 0.1 has reward 0 + X_train = np.array([[0.1, 0.1, 0.1], [0.9, 0.9, 0.9]]) + y_train = np.array([0.0, 10.0]) + gp.fit(X_train, y_train) + + # Predict at two test points + X_test = np.array([[0.1, 0.1, 0.1], [0.9, 0.9, 0.9]]) + mu, _ = gp.predict(X_test) + + assert mu[1] > mu[0], 'GP should predict higher value near the high-reward training point' + + +def test_wave3_propose_uses_seed_for_first_trials(): + """For trial 1 and 2, propose_next_params() returns the hardcoded seed params.""" + from wave3_controller import propose_next_params, SEED_PARAMS + + results = [] # No prior data + + for i, seed in enumerate(SEED_PARAMS, start=1): + proposed = propose_next_params(results, trial_num=i) + for k, v in seed.items(): + assert proposed[k] == v, f'Trial {i}: {k} should be {v}, got {proposed[k]}' + + +def test_wave3_propose_random_when_few_results(): + """With fewer than MIN_TRIALS_BEFORE_GP results, should use random proposal.""" + from wave3_controller import propose_next_params, PARAM_SPACE, MIN_TRIALS_BEFORE_GP, SEED_PARAMS + + # Put trial_num beyond seed phase but with too few results for GP + trial_num = len(SEED_PARAMS) + 1 + results = [] # Empty — below threshold + + proposed = propose_next_params(results, trial_num=trial_num) + + for k, spec in PARAM_SPACE.items(): + assert spec['min'] <= proposed[k] <= spec['max'], \ + f'{k}={proposed[k]} out of bounds [{spec["min"]}, {spec["max"]}]' + + +def test_wave3_parse_runner_output_combined_score(): + """parse_runner_output() should extract combined_test_score correctly.""" + from wave3_controller import parse_runner_output + + output = """ +[12:34:56] [W3 Runner][TEST] track=mini_monaco mean_reward=1234.56 mean_steps=450.0 ✅ DRIVES +[12:34:57] [W3 Runner][TEST] track=warren mean_reward=789.01 mean_steps=310.0 ✅ DRIVES +[12:34:57] [W3 Runner][TEST] mini_monaco_reward=1234.5600 +[12:34:57] [W3 Runner][TEST] warren_reward=789.0100 +[12:34:57] [W3 Runner][TEST] combined_test_score=2023.5700 +""" + combined, mini_monaco, warren = parse_runner_output(output) + assert combined == pytest.approx(2023.57, rel=1e-4) + assert mini_monaco == pytest.approx(1234.56, rel=1e-4) + assert warren == pytest.approx(789.01, rel=1e-4) + + +def test_wave3_parse_runner_output_missing_returns_none(): + """parse_runner_output() returns None for each metric if not found.""" + from wave3_controller import parse_runner_output + + output = 'Training started... timeout' + combined, mini_monaco, warren = parse_runner_output(output) + assert combined is None + assert mini_monaco is None + assert warren is None + + +def test_wave3_champion_tracker_update_and_load(): + """Wave3ChampionTracker should update champion and persist to disk.""" + from wave3_controller import Wave3ChampionTracker + + with tempfile.TemporaryDirectory() as tmpdir: + tracker = Wave3ChampionTracker(tmpdir) + assert tracker.best_score == float('-inf') + + updated = tracker.update_if_better( + score=1500.0, + params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}, + model_zip_path=None, + trial=3, + mini_monaco_reward=900.0, + warren_reward=600.0, + ) + assert updated is True + assert tracker.best_score == pytest.approx(1500.0) + + # Reload from disk + tracker2 = Wave3ChampionTracker(tmpdir) + assert tracker2.best_score == pytest.approx(1500.0) + assert tracker2._best['trial'] == 3 + + +def test_wave3_champion_tracker_does_not_regress(): + """Champion should not be updated if new score is lower.""" + from wave3_controller import Wave3ChampionTracker + + with tempfile.TemporaryDirectory() as tmpdir: + tracker = Wave3ChampionTracker(tmpdir) + tracker.update_if_better(2000.0, {}, None, 1, + mini_monaco_reward=1200.0, warren_reward=800.0) + updated = tracker.update_if_better(1500.0, {}, None, 2, + mini_monaco_reward=900.0, warren_reward=600.0) + assert updated is False + assert tracker.best_score == pytest.approx(2000.0) + + +def test_wave3_results_appended_not_overwritten(): + """Saving results should append to JSONL file, never overwrite.""" + from wave3_controller import save_result + + with tempfile.TemporaryDirectory() as tmpdir: + # Monkey-patch the RESULTS_FILE path + import wave3_controller + original_path = wave3_controller.RESULTS_FILE + wave3_controller.RESULTS_FILE = os.path.join(tmpdir, 'phase3_results.jsonl') + + try: + # Write 3 records + for i in range(3): + save_result( + trial=i + 1, + params={'learning_rate': 0.0002, 'steps_per_switch': 5000, + 'total_timesteps': 100000}, + combined=float(i * 100), + mini_monaco=float(i * 60), + warren_rwd=float(i * 40), + model_path=None, + is_champion=(i == 2), + status='ok', + elapsed=120.0 * (i + 1), + ) + + # Should have 3 lines + with open(wave3_controller.RESULTS_FILE) as f: + lines = [l.strip() for l in f if l.strip()] + assert len(lines) == 3, f'Expected 3 result lines, got {len(lines)}' + + # All should be valid JSON + for line in lines: + rec = json.loads(line) + assert 'combined_test_score' in rec + assert 'params' in rec + finally: + wave3_controller.RESULTS_FILE = original_path + + +def test_wave3_zero_score_excluded_from_gp(): + """ + Trials with combined_test_score=0 should not be added to the GP data list + (they indicate crashes/timeouts, not useful signal). + """ + # Simulate the logic in run_wave3: only append if combined > 0 + results = [] + for score in [0.0, 1500.0, 0.0, 800.0]: + if score > 0: + results.append({'params': {}, 'combined_test_score': score}) + + assert len(results) == 2, 'Only non-zero scores should feed the GP' + assert all(r['combined_test_score'] > 0 for r in results) + + +def test_wave3_param_space_covers_phase2_champion(): + """The Phase 3 search space must contain the Phase 2 champion's parameters.""" + from wave3_controller import PARAM_SPACE + + # Phase 2 champion: lr=0.000225, which falls in [5e-5, 1e-3] + assert PARAM_SPACE['learning_rate']['min'] <= 0.000225 <= PARAM_SPACE['learning_rate']['max'] + # Moderate switching schedule + assert PARAM_SPACE['steps_per_switch']['min'] <= 10000 <= PARAM_SPACE['steps_per_switch']['max'] + # Phase 2 had 13k timesteps — Phase 3 needs much more; check min >= 50k + assert PARAM_SPACE['total_timesteps']['min'] >= 50000 + + +def test_wave3_seed_params_within_space(): + """All seed parameter sets must be within the defined search space.""" + from wave3_controller import SEED_PARAMS, PARAM_SPACE + + for i, seed in enumerate(SEED_PARAMS): + for k, v in seed.items(): + spec = PARAM_SPACE[k] + assert spec['min'] <= v <= spec['max'], \ + f'Seed {i}: {k}={v} outside [{spec["min"]}, {spec["max"]}]' + + +def test_health_check_callback_stops_on_stuck(): + """HealthCheckCallback should return False when speed stays near zero.""" + from multitrack_runner import HealthCheckCallback + + cb = HealthCheckCallback(max_stuck_steps=5, min_speed=0.1) + cb.health._stuck_count = 4 # One more step will trigger + + # Simulate a callback step with very low speed + cb.locals = { + 'infos': [{'speed': 0.0}], + 'new_obs': None, + } + result = cb._on_step() + assert result is False, 'Callback should stop training when sim is stuck'