From 650f893d2d804ee5ae6ca91303a2717a50743022 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Tue, 14 Apr 2026 21:27:43 -0400 Subject: [PATCH] =?UTF-8?q?fix:=20complete=20LR=20override=20=E2=80=94=20m?= =?UTF-8?q?ust=20patch=20lr=5Fschedule,=20not=20just=20param=5Fgroups?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PPO.load() bakes lr_schedule=FloatSchedule(saved_lr) into the model. train() calls _update_learning_rate() which reads lr_schedule, not model.learning_rate. So even with param_groups patched, the first gradient step reverts the optimizer to the saved LR. Complete 3-part fix in create_or_load_model(): model.learning_rate = lr # attribute model.lr_schedule = get_schedule_fn(lr) # prevents train() reverting for pg in optimizer.param_groups: pg['lr'] = lr # immediate effect Also: - SEED_PARAMS: second seed now uses LR=0.001 (was 0.000225) so GP starts with real LR diversity instead of two identical seeds - tests/test_end_to_end.py: 13 new tests covering the full LR override path including a live learn() call; would have caught both bugs - Phase 3 results re-cleared (seed trial 1 ran with half-fix) - 96 tests total, all passing Agent: pi Tests: 96 passed Tests-Added: 13 TypeScript: N/A --- agent/multitrack_runner.py | 16 +- .../autoresearch_phase2_log.txt | 13 + .../autoresearch_phase3_log.txt | 23 + ...earch_results_phase3_CONTAMINATED_v2.jsonl | 0 agent/wave3_controller.py | 8 +- tests/test_end_to_end.py | 426 ++++++++++++++++++ 6 files changed, 476 insertions(+), 10 deletions(-) create mode 100644 agent/outerloop-results/autoresearch_results_phase3_CONTAMINATED_v2.jsonl create mode 100644 tests/test_end_to_end.py diff --git a/agent/multitrack_runner.py b/agent/multitrack_runner.py index bb821ce..7107a5a 100644 --- a/agent/multitrack_runner.py +++ b/agent/multitrack_runner.py @@ -56,6 +56,7 @@ from datetime import datetime import gymnasium as gym import gym_donkeycar from stable_baselines3 import PPO +from stable_baselines3.common.utils import get_schedule_fn from stable_baselines3.common.evaluation import evaluate_policy from stable_baselines3.common.callbacks import BaseCallback @@ -202,16 +203,19 @@ def create_or_load_model(env, learning_rate, warm_start_path=None, seed=None): log(f'[W3 Runner] Loading warm-start model from {warm_start_path}') try: model = PPO.load(warm_start_path, env=env, device='auto') - # Override learning rate — set BOTH the SB3 attribute AND the - # optimizer param groups. PPO.load() restores the saved optimizer - # state (lr=0.000225 from Phase 2), so just setting model.learning_rate - # is not enough — the optimizer ignores it until _update_learning_rate - # is called, and even then only if it reads the attribute correctly. + # Three-part LR override required after PPO.load(): + # 1. model.learning_rate — Python attribute (used to recreate lr_schedule) + # 2. model.lr_schedule — FloatSchedule used by _update_learning_rate() + # during every train() call. Without this, + # _update_learning_rate() reverts the optimizer + # back to the saved LR on the first gradient step. + # 3. optimizer param_groups — immediate effect before first train() model.learning_rate = learning_rate + model.lr_schedule = get_schedule_fn(learning_rate) for pg in model.policy.optimizer.param_groups: pg['lr'] = learning_rate log(f'[W3 Runner] ✅ Warm start loaded. LR overridden to {learning_rate:.6f} ' - f'(set on model + {len(model.policy.optimizer.param_groups)} optimizer param group(s))') + f'(model + lr_schedule + {len(model.policy.optimizer.param_groups)} optimizer param group(s))') return model except Exception as e: log(f'[W3 Runner] ⚠️ Warm start failed ({e}), training from scratch.') diff --git a/agent/outerloop-results/autoresearch_phase2_log.txt b/agent/outerloop-results/autoresearch_phase2_log.txt index 7ae7878..9a46dba 100644 --- a/agent/outerloop-results/autoresearch_phase2_log.txt +++ b/agent/outerloop-results/autoresearch_phase2_log.txt @@ -567,3 +567,16 @@ [2026-04-14 20:37:35] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} [2026-04-14 20:37:35] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} [2026-04-14 20:37:35] [AutoResearch] Only 1 results — using random proposal. +[2026-04-14 21:27:08] [AutoResearch] GP UCB top-5 candidates: +[2026-04-14 21:27:08] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173} +[2026-04-14 21:27:08] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198} +[2026-04-14 21:27:08] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887} +[2026-04-14 21:27:08] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199} +[2026-04-14 21:27:08] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035} +[2026-04-14 21:27:08] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5} +[2026-04-14 21:27:08] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7} +[2026-04-14 21:27:08] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50} +[2026-04-14 21:27:08] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80} +[2026-04-14 21:27:08] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} +[2026-04-14 21:27:08] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} +[2026-04-14 21:27:08] [AutoResearch] Only 1 results — using random proposal. diff --git a/agent/outerloop-results/autoresearch_phase3_log.txt b/agent/outerloop-results/autoresearch_phase3_log.txt index 45d5da0..0d350df 100644 --- a/agent/outerloop-results/autoresearch_phase3_log.txt +++ b/agent/outerloop-results/autoresearch_phase3_log.txt @@ -281,3 +281,26 @@ [2026-04-14 20:37:40] [Wave3] Only 0 results — using random proposal. [2026-04-14 20:37:40] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} [2026-04-14 20:37:40] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} +[2026-04-14 20:37:55] ================================================================= +[2026-04-14 20:37:55] [Wave3] Multi-Track Autoresearch — GP+UCB Generalization Search +[2026-04-14 20:37:55] [Wave3] Training tracks : generated_road, generated_track, mountain_track +[2026-04-14 20:37:55] [Wave3] Test tracks : mini_monaco only (zero-shot; warren removed — broken done condition) +[2026-04-14 20:37:55] [Wave3] Max trials : 25 | kappa=2.0 | push every 5 +[2026-04-14 20:37:55] [Wave3] Results file : /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase3.jsonl +[2026-04-14 20:37:55] [Wave3] Champion dir : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-champion +[2026-04-14 20:37:55] [Wave3] Warm start : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 20:37:55] ================================================================= +[2026-04-14 20:37:55] [Wave3] Loaded 0 existing Phase 3 results. +[2026-04-14 20:37:55] [Wave3] Wave3 Champion: trial=5 score=137.58 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 20:37:55] [Wave3] Starting from trial 1. +[2026-04-14 20:37:55] +[Wave3] ========== Trial 1/25 ========== +[2026-04-14 20:37:55] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 20:37:55] [Wave3] Proposed params: {'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 20:37:57] [Wave3] Launching trial 1: {'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 20:37:57] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 45000 --steps-per-switch 5000 --learning-rate 0.000225 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 21:27:21] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 21:27:21] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 21:27:21] [Wave3] Only 0 results — using random proposal. +[2026-04-14 21:27:21] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-14 21:27:21] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} diff --git a/agent/outerloop-results/autoresearch_results_phase3_CONTAMINATED_v2.jsonl b/agent/outerloop-results/autoresearch_results_phase3_CONTAMINATED_v2.jsonl new file mode 100644 index 0000000..e69de29 diff --git a/agent/wave3_controller.py b/agent/wave3_controller.py index b9c5947..1c038cb 100644 --- a/agent/wave3_controller.py +++ b/agent/wave3_controller.py @@ -81,10 +81,10 @@ JOB_TIMEOUT = 7200 # 2h — 400k steps on CPU may need time # ---- Seed trials near Phase 2 champion ---- # GP warm-up: first 2 trials use known-good parameters so GP has real prior data SEED_PARAMS = [ - # 3 full rotations through all 3 training tracks (~35 min per trial) - {'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000}, - # Slower switching, more time per track (~45 min per trial) - {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000}, + # Low LR (same as Phase 2 champion) — baseline, ~35 min per trial + {'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000}, + # High LR — tests whether faster adaptation generalises better, ~35 min + {'learning_rate': 0.001000, 'steps_per_switch': 5000, 'total_timesteps': 45000}, ] diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py new file mode 100644 index 0000000..fd614cc --- /dev/null +++ b/tests/test_end_to_end.py @@ -0,0 +1,426 @@ +""" +End-to-end pipeline tests — no live simulator required. + +These tests exist to catch integration-level bugs that unit tests miss. +The LR-override bug (Wave 3: all trials silently ran at LR=0.000225) was +not caught because we had no test that verified the optimizer's actual LR +after PPO.load(). Every test in this file targets a real failure that +already burned training time. + +Test categories +--------------- +1. LR override — PPO.load() + param_group update +2. create_or_load_model — the function that wraps PPO.load in multitrack_runner +3. Training step LR — a short real PPO.learn() to confirm the log LR matches +4. Output parsing — parse_runner_output extracts correct metrics +5. Results round-trip — save → load → GP uses correct data +""" + +import json +import os +import sys +import tempfile + +import numpy as np +import pytest + +# --------------------------------------------------------------------------- +# Path setup — tests run from repo root or tests/ dir +# --------------------------------------------------------------------------- +AGENT_DIR = os.path.join(os.path.dirname(__file__), '..', 'agent') +if AGENT_DIR not in sys.path: + sys.path.insert(0, AGENT_DIR) + + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + +import gymnasium as gym +import numpy as np +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage + + +class MockDonkeyEnv(gym.Env): + """ + Minimal DonkeyCar-shaped env: image observations, Box actions. + No simulator required. + """ + metadata = {'render_modes': []} + + def __init__(self): + super().__init__() + self.observation_space = gym.spaces.Box( + low=0, high=255, shape=(120, 160, 3), dtype=np.uint8 + ) + self.action_space = gym.spaces.Box( + low=np.array([-1.0, 0.0]), + high=np.array([1.0, 1.0]), + dtype=np.float32, + ) + self._step_count = 0 + + def reset(self, seed=None, **kwargs): + self._step_count = 0 + return np.zeros((120, 160, 3), dtype=np.uint8), {} + + def step(self, action): + self._step_count += 1 + obs = np.random.randint(0, 255, (120, 160, 3), dtype=np.uint8) + terminated = self._step_count >= 30 + return obs, 1.0, terminated, False, {'speed': 2.0, 'cte': 0.1} + + def close(self): + pass + + +def make_vec_env(): + """Wrap MockDonkeyEnv the same way SB3 expects for CnnPolicy.""" + return VecTransposeImage(DummyVecEnv([MockDonkeyEnv])) + + +def save_ppo_model(path, lr): + """Create a tiny CnnPolicy PPO, save it, return the path.""" + env = make_vec_env() + model = PPO('CnnPolicy', env, learning_rate=lr, verbose=0, + n_steps=64, batch_size=16) + model.save(path) + env.close() + return path + + +# =========================================================================== +# 1. LR override — the bug that burned 8 hours of training +# =========================================================================== + +def test_lr_override_bug_demonstration(): + """ + Setting model.learning_rate after PPO.load() does NOT update the + optimizer. This demonstrates the exact bug that caused all Wave 3 + trials to run at 0.000225 regardless of the GP-proposed LR. + """ + ORIGINAL_LR = 0.000225 + NEW_LR = 0.001 + + env = make_vec_env() + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, 'model.zip') + save_ppo_model(path, ORIGINAL_LR) + + loaded = PPO.load(path, env=env, device='cpu') + # WRONG: only set the Python attribute, not the optimizer + loaded.learning_rate = NEW_LR + + actual_lr = loaded.policy.optimizer.param_groups[0]['lr'] + # The optimizer still has the OLD lr — this is the bug + assert actual_lr == pytest.approx(ORIGINAL_LR), ( + f"Expected optimizer to STILL have old LR {ORIGINAL_LR} " + f"(demonstrating the bug), got {actual_lr}" + ) + env.close() + + +def test_lr_override_fix_sets_optimizer_param_groups(): + """ + The correct fix: after PPO.load(), set lr on BOTH model.learning_rate + AND every optimizer param_group. This is what multitrack_runner now does. + """ + ORIGINAL_LR = 0.000225 + NEW_LR = 0.001 + + env = make_vec_env() + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, 'model.zip') + save_ppo_model(path, ORIGINAL_LR) + + loaded = PPO.load(path, env=env, device='cpu') + # CORRECT fix + loaded.learning_rate = NEW_LR + for pg in loaded.policy.optimizer.param_groups: + pg['lr'] = NEW_LR + + # model attribute updated + assert loaded.learning_rate == pytest.approx(NEW_LR) + # optimizer updated — this is what matters for actual gradient updates + for i, pg in enumerate(loaded.policy.optimizer.param_groups): + assert pg['lr'] == pytest.approx(NEW_LR), ( + f"param_group[{i}]['lr'] = {pg['lr']}, expected {NEW_LR}" + ) + env.close() + + +def test_lr_override_survives_one_training_step(): + """ + After the COMPLETE fix (learning_rate + lr_schedule + param_groups), + the optimizer LR must still be correct after one real PPO gradient update. + + Root cause of the original bug: + - PPO.load() bakes lr_schedule = FloatSchedule(0.000225) into the model + - train() calls _update_learning_rate() which reads lr_schedule, NOT learning_rate + - So even if param_groups are patched, train() overwrites them back to 0.000225 + - Fix: also patch model.lr_schedule = get_schedule_fn(NEW_LR) + """ + from stable_baselines3.common.utils import get_schedule_fn + ORIGINAL_LR = 0.000225 + NEW_LR = 0.001 + + env = make_vec_env() + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, 'model.zip') + save_ppo_model(path, ORIGINAL_LR) + + loaded = PPO.load(path, env=env, device='cpu') + # Complete 3-part fix + loaded.learning_rate = NEW_LR + loaded.lr_schedule = get_schedule_fn(NEW_LR) # <-- prevents train() reverting LR + for pg in loaded.policy.optimizer.param_groups: + pg['lr'] = NEW_LR + + # Run a minimal training step + loaded.learn(total_timesteps=64, reset_num_timesteps=True) + + # LR must still be NEW_LR after _update_learning_rate() fired + for i, pg in enumerate(loaded.policy.optimizer.param_groups): + assert pg['lr'] == pytest.approx(NEW_LR), ( + f"After learn(), param_group[{i}]['lr'] = {pg['lr']}, " + f"expected {NEW_LR}. lr_schedule was not patched correctly." + ) + env.close() + + +# =========================================================================== +# 2. create_or_load_model — the actual function in multitrack_runner +# =========================================================================== + +def test_create_or_load_model_warm_start_lr_reaches_optimizer(): + """ + create_or_load_model() must leave the optimizer at the requested LR, + not at the LR baked into the saved model — both before AND after a + training step (lr_schedule must be patched, not just param_groups). + """ + from multitrack_runner import create_or_load_model + + ORIGINAL_LR = 0.000225 + NEW_LR = 0.00083 + + env = make_vec_env() + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, 'model.zip') + save_ppo_model(path, ORIGINAL_LR) + + model = create_or_load_model(env, learning_rate=NEW_LR, + warm_start_path=path) + + # model attribute + assert model.learning_rate == pytest.approx(NEW_LR) + # lr_schedule (used by _update_learning_rate during train()) + assert model.lr_schedule(1.0) == pytest.approx(NEW_LR), ( + f"lr_schedule(1.0) = {model.lr_schedule(1.0)}, expected {NEW_LR}. " + "train() will revert optimizer to old LR without this fix." + ) + # optimizer param_groups + for i, pg in enumerate(model.policy.optimizer.param_groups): + assert pg['lr'] == pytest.approx(NEW_LR) + + # Verify it survives an actual training step + model.learn(total_timesteps=64, reset_num_timesteps=True) + for i, pg in enumerate(model.policy.optimizer.param_groups): + assert pg['lr'] == pytest.approx(NEW_LR), ( + f"After learn(), param_group[{i}]['lr'] = {pg['lr']}, expected {NEW_LR}" + ) + env.close() + + +def test_create_or_load_model_fresh_model_uses_correct_lr(): + """ + When warm_start_path is None/missing, create_or_load_model() must + create a fresh PPO with the requested LR. + """ + from multitrack_runner import create_or_load_model + + LR = 0.00075 + env = make_vec_env() + model = create_or_load_model(env, learning_rate=LR, warm_start_path=None) + + assert model.learning_rate == pytest.approx(LR) + for pg in model.policy.optimizer.param_groups: + assert pg['lr'] == pytest.approx(LR) + env.close() + + +def test_create_or_load_model_falls_back_to_fresh_on_bad_path(): + """ + If the warm_start_path doesn't exist, create_or_load_model() must + fall back to a fresh model (not crash). + """ + from multitrack_runner import create_or_load_model + + LR = 0.0005 + env = make_vec_env() + model = create_or_load_model(env, learning_rate=LR, + warm_start_path='/nonexistent/model.zip') + + assert model is not None + assert model.learning_rate == pytest.approx(LR) + env.close() + + +# =========================================================================== +# 3. Output parsing — parse_runner_output extracts correct metrics +# =========================================================================== + +def test_parse_runner_output_full_success(): + """parse_runner_output correctly extracts all metrics from a full run.""" + from wave3_controller import parse_runner_output + + output = """ +[12:00:01] [W3 Runner][TRAIN] track=generated_road segment_reward=2409.70 +[12:08:00] [W3 Runner][TRAIN] track=generated_track segment_reward=112.30 +[12:15:00] [W3 Runner] Switching to TEST track: mini_monaco +[12:15:30] [W3 Runner][TEST] track=mini_monaco mean_reward=843.21 mean_steps=980.0 ✅ DRIVES +[12:15:30] [W3 Runner][TEST] mini_monaco_reward=843.2100 +[12:15:30] [W3 Runner][TEST] combined_test_score=843.2100 +""" + combined, mini_monaco = parse_runner_output(output) + assert combined == pytest.approx(843.21, rel=1e-4) + assert mini_monaco == pytest.approx(843.21, rel=1e-4) + + +def test_parse_runner_output_crash(): + """parse_runner_output handles a crash/timeout (no test score lines).""" + from wave3_controller import parse_runner_output + + output = "[TIMEOUT after 7200s]" + combined, mini_monaco = parse_runner_output(output) + assert combined is None + assert mini_monaco is None + + +def test_parse_runner_output_partial(): + """parse_runner_output handles missing combined but present mini_monaco.""" + from wave3_controller import parse_runner_output + + output = "[W3 Runner][TEST] mini_monaco_reward=55.5\n" + combined, mini_monaco = parse_runner_output(output) + assert combined is None + assert mini_monaco == pytest.approx(55.5, rel=1e-4) + + +# =========================================================================== +# 4. Results round-trip — save → load → GP uses the data +# =========================================================================== + +def test_results_round_trip_gp_sees_correct_lr(): + """ + After save_result() writes a trial, load_results() must return it, and + the GP must receive the correct params (including the actual LR used). + This ensures GP data is not silently corrupted. + """ + import wave3_controller + + TRIAL_LR = 0.00083 + TRIAL_SCORE = 250.0 + + with tempfile.TemporaryDirectory() as tmpdir: + original_path = wave3_controller.RESULTS_FILE + wave3_controller.RESULTS_FILE = os.path.join(tmpdir, 'results.jsonl') + + try: + from wave3_controller import save_result, load_results + save_result( + trial=1, + params={'learning_rate': TRIAL_LR, + 'steps_per_switch': 8000, + 'total_timesteps': 45000}, + combined=TRIAL_SCORE, + mini_monaco=TRIAL_SCORE, + model_path=None, + is_champion=True, + status='ok', + elapsed=1200.0, + ) + + results = load_results() + assert len(results) == 1 + assert results[0]['params']['learning_rate'] == pytest.approx(TRIAL_LR) + assert results[0]['combined_test_score'] == pytest.approx(TRIAL_SCORE) + finally: + wave3_controller.RESULTS_FILE = original_path + + +def test_results_gp_data_never_includes_zero_score_trials(): + """ + Zero-score trials (crash/timeout) must NOT be added to GP data. + If they were, the GP would learn that certain params are bad even + when the failure was actually a simulator glitch. + """ + import wave3_controller + + with tempfile.TemporaryDirectory() as tmpdir: + original_path = wave3_controller.RESULTS_FILE + wave3_controller.RESULTS_FILE = os.path.join(tmpdir, 'results.jsonl') + + try: + from wave3_controller import save_result, load_results + + # Write a zero-score (crash) trial + save_result( + trial=1, + params={'learning_rate': 0.001, 'steps_per_switch': 5000, + 'total_timesteps': 30000}, + combined=0.0, + mini_monaco=0.0, + model_path=None, + is_champion=False, + status='timeout', + elapsed=7200.0, + ) + # Write a valid trial + save_result( + trial=2, + params={'learning_rate': 0.0005, 'steps_per_switch': 8000, + 'total_timesteps': 45000}, + combined=300.0, + mini_monaco=300.0, + model_path=None, + is_champion=True, + status='ok', + elapsed=1800.0, + ) + + results = load_results() + # load_results only returns trials with non-None score; the + # wave3 main loop further filters out score==0 before adding to GP + scores = [r['combined_test_score'] for r in results] + assert 300.0 in scores + # The zero-score trial IS in the file but the main loop guards it + assert 0.0 in scores # it's saved + finally: + wave3_controller.RESULTS_FILE = original_path + + +# =========================================================================== +# 5. Seed params sanity — seed trials cover the important LR range +# =========================================================================== + +def test_seed_params_cover_both_low_and_high_lr(): + """ + SEED_PARAMS must include at least one low-LR trial (≤ 3e-4) and + at least one higher-LR trial (≥ 5e-4) so the GP starts with data + across the search space, not just at one corner. + """ + from wave3_controller import SEED_PARAMS + lrs = [p['learning_rate'] for p in SEED_PARAMS] + assert min(lrs) <= 3e-4, f"No low-LR seed trial: {lrs}" + assert max(lrs) >= 5e-4, f"No high-LR seed trial: {lrs}" + + +def test_seed_params_lr_is_not_all_identical(): + """SEED_PARAMS must not all have the same LR — that killed Wave 3 v1.""" + from wave3_controller import SEED_PARAMS + lrs = [p['learning_rate'] for p in SEED_PARAMS] + assert len(set(lrs)) > 1, ( + f"All seed params have the same LR ({lrs[0]}). " + "The GP needs diverse starting data to explore the LR dimension." + )