From 86657a26b8c212d85520f08b02c306d836e61ad0 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Tue, 14 Apr 2026 13:29:49 -0400 Subject: [PATCH] wave3: fix track-switch bug (viewer not raw socket) + shorten trial budgets Bug: send_exit_scene_raw() opened a NEW TCP connection, creating a second phantom vehicle. The sim sent exit_scene to the phantom, leaving the real training connection stuck on generated_road for the entire run. Fix: _send_exit_scene() now calls env.unwrapped.viewer.exit_scene() on the EXISTING TCP connection that the training env already holds. This is the only reliable way to switch scenes mid-session (matches track_switcher.py). Also: - Removed send_exit_scene_raw() import from multitrack_runner.py - Simplified initial connection (no spurious exit_scene at startup) - Reduced search space: total_timesteps 80k-400k -> 30k-150k - Reduced seed params: 150k/300k -> 45k/90k (~35-45 min per trial) - Added test: test_close_and_switch_uses_viewer_not_raw_socket 83 tests passing Agent: pi Tests: 83 passed Tests-Added: 1 TypeScript: N/A --- agent/multitrack_runner.py | 59 ++++++++++++++----- .../autoresearch_phase2_log.txt | 39 ++++++++++++ .../autoresearch_phase3_log.txt | 33 +++++++++++ agent/wave3_controller.py | 14 ++--- tests/test_wave3.py | 37 ++++++++---- 5 files changed, 150 insertions(+), 32 deletions(-) diff --git a/agent/multitrack_runner.py b/agent/multitrack_runner.py index 9f5eea6..2395703 100644 --- a/agent/multitrack_runner.py +++ b/agent/multitrack_runner.py @@ -65,7 +65,6 @@ sys.path.insert(0, AGENT_DIR) from donkeycar_sb3_runner import ThrottleClampWrapper, SimHealthCallback from reward_wrapper import SpeedRewardWrapper -from track_switcher import send_exit_scene_raw # ---- Track catalogue ---- # Maps short name → gym env ID @@ -121,33 +120,66 @@ def wrap_env(raw_env): # ---- Track switching ---- +def _send_exit_scene(env, verbose=True): + """ + Send exit_scene through the EXISTING connection on env. + + Critical: the DonkeyCar sim creates one vehicle per TCP connection. + Sending exit_scene via a NEW raw socket creates a second vehicle and + the sim ignores it for the real training session. We must use the + existing viewer connection that env already holds. + """ + try: + base = env.unwrapped # strips all gym.Wrapper layers + if hasattr(base, 'viewer') and base.viewer is not None: + base.viewer.exit_scene() # sends {'msg_type': 'exit_scene'} on existing TCP + time.sleep(0.5) # let the message flush before closing socket + if verbose: + log('[W3 Runner] exit_scene sent on existing viewer connection.') + return True + else: + if verbose: + log('[W3 Runner] Warning: no viewer found on unwrapped env.') + return False + except Exception as e: + if verbose: + log(f'[W3 Runner] Warning: viewer.exit_scene() raised: {e}') + return False + + def close_and_switch(current_env, next_env_id, verbose=True): """ Cleanly close current env and connect to next track. - 1. env.close() + sleep(2) [ADR-006] - 2. send_exit_scene_raw() + sleep(EXIT_SCENE_WAIT) - 3. gym.make(next_env_id) + wrap + Correct order (IMPORTANT): + 1. viewer.exit_scene() on existing connection ← tells sim to go to menu + 2. env.close() + sleep(2) ← disconnect (ADR-006) + 3. sleep(EXIT_SCENE_WAIT) ← wait for sim menu + 4. gym.make(next_env_id) + wrap ← connect to new track Returns: new wrapped env, or raises on connection failure. """ if current_env is not None: + # Step 1: tell the sim to exit the scene BEFORE we close the connection if verbose: - log(f'[W3 Runner] Closing current env before track switch...') + log('[W3 Runner] Sending exit_scene via existing viewer connection...') + _send_exit_scene(current_env, verbose=verbose) + + # Step 2: now close the env (ADR-006) + if verbose: + log('[W3 Runner] Closing current env...') try: current_env.close() except Exception as e: log(f'[W3 Runner] Warning: env.close() raised: {e}') time.sleep(2) # ADR-006 - if verbose: - log(f'[W3 Runner] Sending exit_scene to sim...') - send_exit_scene_raw() - + # Step 3: wait for sim to reach the scene-selection menu if verbose: log(f'[W3 Runner] Waiting {EXIT_SCENE_WAIT}s for sim to reach main menu...') time.sleep(EXIT_SCENE_WAIT) + # Step 4: connect to the target track if verbose: log(f'[W3 Runner] Connecting to {next_env_id}...') raw_env = gym.make(next_env_id) @@ -392,16 +424,13 @@ def main(): log(f'[W3 Runner] eval_episodes ={args.eval_episodes}') # ---- 1. Connect to first training track ---- + # Assume sim is already at the main menu (user-started, or previous run exited cleanly). + # gym.make() on the first track will load it directly from the menu. first_track_name, first_env_id = TRAINING_TRACKS[0] - log(f'\n[W3 Runner] Starting on first training track: {first_track_name} ({first_env_id})') + log(f'\n[W3 Runner] Connecting to first training track: {first_track_name} ({first_env_id})') env = None try: - # Always send exit_scene first to ensure sim is at main menu - log(f'[W3 Runner] Sending exit_scene to clean sim state...') - send_exit_scene_raw() - time.sleep(EXIT_SCENE_WAIT) - raw_env = gym.make(first_env_id) env = wrap_env(raw_env) log(f'[W3 Runner] ✅ Connected to {first_env_id}') diff --git a/agent/outerloop-results/autoresearch_phase2_log.txt b/agent/outerloop-results/autoresearch_phase2_log.txt index 2c7cd04..3d2fb3b 100644 --- a/agent/outerloop-results/autoresearch_phase2_log.txt +++ b/agent/outerloop-results/autoresearch_phase2_log.txt @@ -502,3 +502,42 @@ [2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} [2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} [2026-04-14 12:45:34] [AutoResearch] Only 1 results — using random proposal. +[2026-04-14 13:28:43] [AutoResearch] GP UCB top-5 candidates: +[2026-04-14 13:28:43] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173} +[2026-04-14 13:28:43] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198} +[2026-04-14 13:28:43] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887} +[2026-04-14 13:28:43] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199} +[2026-04-14 13:28:43] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035} +[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5} +[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7} +[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50} +[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80} +[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} +[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} +[2026-04-14 13:28:43] [AutoResearch] Only 1 results — using random proposal. +[2026-04-14 13:29:04] [AutoResearch] GP UCB top-5 candidates: +[2026-04-14 13:29:04] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173} +[2026-04-14 13:29:04] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198} +[2026-04-14 13:29:04] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887} +[2026-04-14 13:29:04] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199} +[2026-04-14 13:29:04] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035} +[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5} +[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7} +[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50} +[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80} +[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} +[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} +[2026-04-14 13:29:04] [AutoResearch] Only 1 results — using random proposal. +[2026-04-14 13:29:30] [AutoResearch] GP UCB top-5 candidates: +[2026-04-14 13:29:30] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173} +[2026-04-14 13:29:30] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198} +[2026-04-14 13:29:30] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887} +[2026-04-14 13:29:30] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199} +[2026-04-14 13:29:30] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035} +[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5} +[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7} +[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50} +[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80} +[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} +[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} +[2026-04-14 13:29:30] [AutoResearch] Only 1 results — using random proposal. diff --git a/agent/outerloop-results/autoresearch_phase3_log.txt b/agent/outerloop-results/autoresearch_phase3_log.txt index 15083a3..45d34b5 100644 --- a/agent/outerloop-results/autoresearch_phase3_log.txt +++ b/agent/outerloop-results/autoresearch_phase3_log.txt @@ -18,3 +18,36 @@ [2026-04-14 12:45:39] [Wave3] Only 0 results — using random proposal. [2026-04-14 12:45:39] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} [2026-04-14 12:45:39] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={} +[2026-04-14 12:47:25] ================================================================= +[2026-04-14 12:47:25] [Wave3] Multi-Track Autoresearch — GP+UCB Generalization Search +[2026-04-14 12:47:25] [Wave3] Training tracks : generated_road, generated_track, mountain_track +[2026-04-14 12:47:25] [Wave3] Test tracks : mini_monaco, warren (zero-shot) +[2026-04-14 12:47:25] [Wave3] Max trials : 25 | kappa=2.0 | push every 5 +[2026-04-14 12:47:25] [Wave3] Results file : /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase3.jsonl +[2026-04-14 12:47:25] [Wave3] Champion dir : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-champion +[2026-04-14 12:47:25] [Wave3] Warm start : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 12:47:25] ================================================================= +[2026-04-14 12:47:25] [Wave3] Loaded 0 existing Phase 3 results. +[2026-04-14 12:47:25] [Wave3] No Wave 3 champion yet. +[2026-04-14 12:47:25] [Wave3] Starting from trial 1. +[2026-04-14 12:47:25] +[Wave3] ========== Trial 1/25 ========== +[2026-04-14 12:47:25] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 12:47:25] [Wave3] Proposed params: {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 150000} +[2026-04-14 12:47:27] [Wave3] Launching trial 1: {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 150000} +[2026-04-14 12:47:27] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 150000 --steps-per-switch 10000 --learning-rate 0.000225 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 13:28:47] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 13:28:47] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 13:28:47] [Wave3] Only 0 results — using random proposal. +[2026-04-14 13:28:47] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-14 13:28:47] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={} +[2026-04-14 13:29:08] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 13:29:08] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 13:29:08] [Wave3] Only 0 results — using random proposal. +[2026-04-14 13:29:08] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-14 13:29:08] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={} +[2026-04-14 13:29:34] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 13:29:34] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 13:29:34] [Wave3] Only 0 results — using random proposal. +[2026-04-14 13:29:34] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-14 13:29:34] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={} diff --git a/agent/wave3_controller.py b/agent/wave3_controller.py index 9caade1..cc67c82 100644 --- a/agent/wave3_controller.py +++ b/agent/wave3_controller.py @@ -63,9 +63,9 @@ os.makedirs(CHAMPION_DIR, exist_ok=True) # ---- Hyperparameter search space ---- PARAM_SPACE = { - 'learning_rate': {'type': 'float', 'min': 5e-5, 'max': 1e-3}, - 'steps_per_switch': {'type': 'int', 'min': 2000, 'max': 25000}, - 'total_timesteps': {'type': 'int', 'min': 80000, 'max': 400000}, + 'learning_rate': {'type': 'float', 'min': 5e-5, 'max': 1e-3}, + 'steps_per_switch': {'type': 'int', 'min': 2000, 'max': 15000}, + 'total_timesteps': {'type': 'int', 'min': 30000, 'max': 150000}, } PARAM_KEYS = list(PARAM_SPACE.keys()) @@ -81,10 +81,10 @@ JOB_TIMEOUT = 7200 # 2h — 400k steps on CPU may need time # ---- Seed trials near Phase 2 champion ---- # GP warm-up: first 2 trials use known-good parameters so GP has real prior data SEED_PARAMS = [ - # Phase 2 champion settings — warm-start, moderate switching - {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 150000}, - # Higher timesteps, less frequent switching — encourage deeper learning per track - {'learning_rate': 0.000225, 'steps_per_switch': 20000, 'total_timesteps': 300000}, + # 3 full rotations through all 3 training tracks (~35 min per trial) + {'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000}, + # Slower switching, more time per track (~45 min per trial) + {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000}, ] diff --git a/tests/test_wave3.py b/tests/test_wave3.py index d744675..db6e758 100644 --- a/tests/test_wave3.py +++ b/tests/test_wave3.py @@ -188,9 +188,11 @@ def test_close_and_switch_calls_env_close(): """close_and_switch() must call env.close() on the old env (ADR-006).""" from multitrack_runner import close_and_switch old_env = MockGymEnv() + # Give it a fake viewer so _send_exit_scene has something to call + mock_viewer = MagicMock() + old_env.unwrapped.viewer = mock_viewer - with patch('multitrack_runner.send_exit_scene_raw', return_value=True), \ - patch('multitrack_runner.gym.make', return_value=MockGymEnv()), \ + with patch('multitrack_runner.gym.make', return_value=MockGymEnv()), \ patch('multitrack_runner.wrap_env', side_effect=lambda e: e), \ patch('time.sleep'): close_and_switch(old_env, 'donkey-generated-track-v0', verbose=False) @@ -203,8 +205,7 @@ def test_close_and_switch_returns_new_env(): from multitrack_runner import close_and_switch new_env = MockGymEnv() - with patch('multitrack_runner.send_exit_scene_raw', return_value=True), \ - patch('multitrack_runner.gym.make', return_value=new_env), \ + with patch('multitrack_runner.gym.make', return_value=new_env), \ patch('multitrack_runner.wrap_env', side_effect=lambda e: e), \ patch('time.sleep'): result = close_and_switch(None, 'donkey-generated-track-v0', verbose=False) @@ -212,7 +213,23 @@ def test_close_and_switch_returns_new_env(): assert result is new_env -def test_evaluate_test_tracks_returns_dict_with_track_keys(): +def test_close_and_switch_uses_viewer_not_raw_socket(): + """exit_scene must be sent via env.unwrapped.viewer, not a new raw connection.""" + from multitrack_runner import close_and_switch + old_env = MockGymEnv() + mock_viewer = MagicMock() + old_env.unwrapped.viewer = mock_viewer # attach mock viewer + + with patch('multitrack_runner.gym.make', return_value=MockGymEnv()), \ + patch('multitrack_runner.wrap_env', side_effect=lambda e: e), \ + patch('time.sleep'): + close_and_switch(old_env, 'donkey-generated-track-v0', verbose=False) + + mock_viewer.exit_scene.assert_called_once(), \ + 'exit_scene must be called on existing viewer (not a new raw socket)' + + + """evaluate_test_tracks() should return a dict keyed by track names.""" from multitrack_runner import evaluate_test_tracks, TEST_TRACKS @@ -287,8 +304,8 @@ def test_wave3_encode_decode_round_trip(): original = { 'learning_rate': 0.000225, - 'steps_per_switch': 10000, - 'total_timesteps': 200000, + 'steps_per_switch': 5000, + 'total_timesteps': 90000, } vec = encode_params(original) recovered = decode_params(vec) @@ -504,9 +521,9 @@ def test_wave3_param_space_covers_phase2_champion(): # Phase 2 champion: lr=0.000225, which falls in [5e-5, 1e-3] assert PARAM_SPACE['learning_rate']['min'] <= 0.000225 <= PARAM_SPACE['learning_rate']['max'] # Moderate switching schedule - assert PARAM_SPACE['steps_per_switch']['min'] <= 10000 <= PARAM_SPACE['steps_per_switch']['max'] - # Phase 2 had 13k timesteps — Phase 3 needs much more; check min >= 50k - assert PARAM_SPACE['total_timesteps']['min'] >= 50000 + assert PARAM_SPACE['steps_per_switch']['min'] <= 5000 <= PARAM_SPACE['steps_per_switch']['max'] + # Phase 3 needs more than Phase 2's 13k; check min >= 20k + assert PARAM_SPACE['total_timesteps']['min'] >= 20000 def test_wave3_seed_params_within_space():