wave3: fix track-switch bug (viewer not raw socket) + shorten trial budgets

Bug: send_exit_scene_raw() opened a NEW TCP connection, creating a second
phantom vehicle. The sim sent exit_scene to the phantom, leaving the real
training connection stuck on generated_road for the entire run.

Fix: _send_exit_scene() now calls env.unwrapped.viewer.exit_scene() on the
EXISTING TCP connection that the training env already holds. This is the
only reliable way to switch scenes mid-session (matches track_switcher.py).

Also:
- Removed send_exit_scene_raw() import from multitrack_runner.py
- Simplified initial connection (no spurious exit_scene at startup)
- Reduced search space: total_timesteps 80k-400k -> 30k-150k
- Reduced seed params: 150k/300k -> 45k/90k (~35-45 min per trial)
- Added test: test_close_and_switch_uses_viewer_not_raw_socket

83 tests passing

Agent: pi
Tests: 83 passed
Tests-Added: 1
TypeScript: N/A
This commit is contained in:
Paul Huliganga 2026-04-14 13:29:49 -04:00
parent 4ca5304a71
commit 86657a26b8
5 changed files with 150 additions and 32 deletions

View File

@ -65,7 +65,6 @@ sys.path.insert(0, AGENT_DIR)
from donkeycar_sb3_runner import ThrottleClampWrapper, SimHealthCallback
from reward_wrapper import SpeedRewardWrapper
from track_switcher import send_exit_scene_raw
# ---- Track catalogue ----
# Maps short name → gym env ID
@ -121,33 +120,66 @@ def wrap_env(raw_env):
# ---- Track switching ----
def _send_exit_scene(env, verbose=True):
"""
Send exit_scene through the EXISTING connection on env.
Critical: the DonkeyCar sim creates one vehicle per TCP connection.
Sending exit_scene via a NEW raw socket creates a second vehicle and
the sim ignores it for the real training session. We must use the
existing viewer connection that env already holds.
"""
try:
base = env.unwrapped # strips all gym.Wrapper layers
if hasattr(base, 'viewer') and base.viewer is not None:
base.viewer.exit_scene() # sends {'msg_type': 'exit_scene'} on existing TCP
time.sleep(0.5) # let the message flush before closing socket
if verbose:
log('[W3 Runner] exit_scene sent on existing viewer connection.')
return True
else:
if verbose:
log('[W3 Runner] Warning: no viewer found on unwrapped env.')
return False
except Exception as e:
if verbose:
log(f'[W3 Runner] Warning: viewer.exit_scene() raised: {e}')
return False
def close_and_switch(current_env, next_env_id, verbose=True):
"""
Cleanly close current env and connect to next track.
1. env.close() + sleep(2) [ADR-006]
2. send_exit_scene_raw() + sleep(EXIT_SCENE_WAIT)
3. gym.make(next_env_id) + wrap
Correct order (IMPORTANT):
1. viewer.exit_scene() on existing connection tells sim to go to menu
2. env.close() + sleep(2) disconnect (ADR-006)
3. sleep(EXIT_SCENE_WAIT) wait for sim menu
4. gym.make(next_env_id) + wrap connect to new track
Returns: new wrapped env, or raises on connection failure.
"""
if current_env is not None:
# Step 1: tell the sim to exit the scene BEFORE we close the connection
if verbose:
log(f'[W3 Runner] Closing current env before track switch...')
log('[W3 Runner] Sending exit_scene via existing viewer connection...')
_send_exit_scene(current_env, verbose=verbose)
# Step 2: now close the env (ADR-006)
if verbose:
log('[W3 Runner] Closing current env...')
try:
current_env.close()
except Exception as e:
log(f'[W3 Runner] Warning: env.close() raised: {e}')
time.sleep(2) # ADR-006
if verbose:
log(f'[W3 Runner] Sending exit_scene to sim...')
send_exit_scene_raw()
# Step 3: wait for sim to reach the scene-selection menu
if verbose:
log(f'[W3 Runner] Waiting {EXIT_SCENE_WAIT}s for sim to reach main menu...')
time.sleep(EXIT_SCENE_WAIT)
# Step 4: connect to the target track
if verbose:
log(f'[W3 Runner] Connecting to {next_env_id}...')
raw_env = gym.make(next_env_id)
@ -392,16 +424,13 @@ def main():
log(f'[W3 Runner] eval_episodes ={args.eval_episodes}')
# ---- 1. Connect to first training track ----
# Assume sim is already at the main menu (user-started, or previous run exited cleanly).
# gym.make() on the first track will load it directly from the menu.
first_track_name, first_env_id = TRAINING_TRACKS[0]
log(f'\n[W3 Runner] Starting on first training track: {first_track_name} ({first_env_id})')
log(f'\n[W3 Runner] Connecting to first training track: {first_track_name} ({first_env_id})')
env = None
try:
# Always send exit_scene first to ensure sim is at main menu
log(f'[W3 Runner] Sending exit_scene to clean sim state...')
send_exit_scene_raw()
time.sleep(EXIT_SCENE_WAIT)
raw_env = gym.make(first_env_id)
env = wrap_env(raw_env)
log(f'[W3 Runner] ✅ Connected to {first_env_id}')

View File

@ -502,3 +502,42 @@
[2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
[2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
[2026-04-14 12:45:34] [AutoResearch] Only 1 results — using random proposal.
[2026-04-14 13:28:43] [AutoResearch] GP UCB top-5 candidates:
[2026-04-14 13:28:43] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
[2026-04-14 13:28:43] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
[2026-04-14 13:28:43] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
[2026-04-14 13:28:43] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
[2026-04-14 13:28:43] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
[2026-04-14 13:28:43] [AutoResearch] Only 1 results — using random proposal.
[2026-04-14 13:29:04] [AutoResearch] GP UCB top-5 candidates:
[2026-04-14 13:29:04] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
[2026-04-14 13:29:04] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
[2026-04-14 13:29:04] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
[2026-04-14 13:29:04] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
[2026-04-14 13:29:04] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
[2026-04-14 13:29:04] [AutoResearch] Only 1 results — using random proposal.
[2026-04-14 13:29:30] [AutoResearch] GP UCB top-5 candidates:
[2026-04-14 13:29:30] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
[2026-04-14 13:29:30] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
[2026-04-14 13:29:30] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
[2026-04-14 13:29:30] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
[2026-04-14 13:29:30] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
[2026-04-14 13:29:30] [AutoResearch] Only 1 results — using random proposal.

View File

@ -18,3 +18,36 @@
[2026-04-14 12:45:39] [Wave3] Only 0 results — using random proposal.
[2026-04-14 12:45:39] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
[2026-04-14 12:45:39] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={}
[2026-04-14 12:47:25] =================================================================
[2026-04-14 12:47:25] [Wave3] Multi-Track Autoresearch — GP+UCB Generalization Search
[2026-04-14 12:47:25] [Wave3] Training tracks : generated_road, generated_track, mountain_track
[2026-04-14 12:47:25] [Wave3] Test tracks : mini_monaco, warren (zero-shot)
[2026-04-14 12:47:25] [Wave3] Max trials : 25 | kappa=2.0 | push every 5
[2026-04-14 12:47:25] [Wave3] Results file : /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase3.jsonl
[2026-04-14 12:47:25] [Wave3] Champion dir : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-champion
[2026-04-14 12:47:25] [Wave3] Warm start : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip
[2026-04-14 12:47:25] =================================================================
[2026-04-14 12:47:25] [Wave3] Loaded 0 existing Phase 3 results.
[2026-04-14 12:47:25] [Wave3] No Wave 3 champion yet.
[2026-04-14 12:47:25] [Wave3] Starting from trial 1.
[2026-04-14 12:47:25]
[Wave3] ========== Trial 1/25 ==========
[2026-04-14 12:47:25] [Wave3] Seed trial 1/2: using hardcoded params.
[2026-04-14 12:47:25] [Wave3] Proposed params: {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 150000}
[2026-04-14 12:47:27] [Wave3] Launching trial 1: {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 150000}
[2026-04-14 12:47:27] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 150000 --steps-per-switch 10000 --learning-rate 0.000225 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip
[2026-04-14 13:28:47] [Wave3] Seed trial 1/2: using hardcoded params.
[2026-04-14 13:28:47] [Wave3] Seed trial 2/2: using hardcoded params.
[2026-04-14 13:28:47] [Wave3] Only 0 results — using random proposal.
[2026-04-14 13:28:47] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
[2026-04-14 13:28:47] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={}
[2026-04-14 13:29:08] [Wave3] Seed trial 1/2: using hardcoded params.
[2026-04-14 13:29:08] [Wave3] Seed trial 2/2: using hardcoded params.
[2026-04-14 13:29:08] [Wave3] Only 0 results — using random proposal.
[2026-04-14 13:29:08] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
[2026-04-14 13:29:08] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={}
[2026-04-14 13:29:34] [Wave3] Seed trial 1/2: using hardcoded params.
[2026-04-14 13:29:34] [Wave3] Seed trial 2/2: using hardcoded params.
[2026-04-14 13:29:34] [Wave3] Only 0 results — using random proposal.
[2026-04-14 13:29:34] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
[2026-04-14 13:29:34] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={}

View File

@ -63,9 +63,9 @@ os.makedirs(CHAMPION_DIR, exist_ok=True)
# ---- Hyperparameter search space ----
PARAM_SPACE = {
'learning_rate': {'type': 'float', 'min': 5e-5, 'max': 1e-3},
'steps_per_switch': {'type': 'int', 'min': 2000, 'max': 25000},
'total_timesteps': {'type': 'int', 'min': 80000, 'max': 400000},
'learning_rate': {'type': 'float', 'min': 5e-5, 'max': 1e-3},
'steps_per_switch': {'type': 'int', 'min': 2000, 'max': 15000},
'total_timesteps': {'type': 'int', 'min': 30000, 'max': 150000},
}
PARAM_KEYS = list(PARAM_SPACE.keys())
@ -81,10 +81,10 @@ JOB_TIMEOUT = 7200 # 2h — 400k steps on CPU may need time
# ---- Seed trials near Phase 2 champion ----
# GP warm-up: first 2 trials use known-good parameters so GP has real prior data
SEED_PARAMS = [
# Phase 2 champion settings — warm-start, moderate switching
{'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 150000},
# Higher timesteps, less frequent switching — encourage deeper learning per track
{'learning_rate': 0.000225, 'steps_per_switch': 20000, 'total_timesteps': 300000},
# 3 full rotations through all 3 training tracks (~35 min per trial)
{'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000},
# Slower switching, more time per track (~45 min per trial)
{'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000},
]

View File

@ -188,9 +188,11 @@ def test_close_and_switch_calls_env_close():
"""close_and_switch() must call env.close() on the old env (ADR-006)."""
from multitrack_runner import close_and_switch
old_env = MockGymEnv()
# Give it a fake viewer so _send_exit_scene has something to call
mock_viewer = MagicMock()
old_env.unwrapped.viewer = mock_viewer
with patch('multitrack_runner.send_exit_scene_raw', return_value=True), \
patch('multitrack_runner.gym.make', return_value=MockGymEnv()), \
with patch('multitrack_runner.gym.make', return_value=MockGymEnv()), \
patch('multitrack_runner.wrap_env', side_effect=lambda e: e), \
patch('time.sleep'):
close_and_switch(old_env, 'donkey-generated-track-v0', verbose=False)
@ -203,8 +205,7 @@ def test_close_and_switch_returns_new_env():
from multitrack_runner import close_and_switch
new_env = MockGymEnv()
with patch('multitrack_runner.send_exit_scene_raw', return_value=True), \
patch('multitrack_runner.gym.make', return_value=new_env), \
with patch('multitrack_runner.gym.make', return_value=new_env), \
patch('multitrack_runner.wrap_env', side_effect=lambda e: e), \
patch('time.sleep'):
result = close_and_switch(None, 'donkey-generated-track-v0', verbose=False)
@ -212,7 +213,23 @@ def test_close_and_switch_returns_new_env():
assert result is new_env
def test_evaluate_test_tracks_returns_dict_with_track_keys():
def test_close_and_switch_uses_viewer_not_raw_socket():
"""exit_scene must be sent via env.unwrapped.viewer, not a new raw connection."""
from multitrack_runner import close_and_switch
old_env = MockGymEnv()
mock_viewer = MagicMock()
old_env.unwrapped.viewer = mock_viewer # attach mock viewer
with patch('multitrack_runner.gym.make', return_value=MockGymEnv()), \
patch('multitrack_runner.wrap_env', side_effect=lambda e: e), \
patch('time.sleep'):
close_and_switch(old_env, 'donkey-generated-track-v0', verbose=False)
mock_viewer.exit_scene.assert_called_once(), \
'exit_scene must be called on existing viewer (not a new raw socket)'
"""evaluate_test_tracks() should return a dict keyed by track names."""
from multitrack_runner import evaluate_test_tracks, TEST_TRACKS
@ -287,8 +304,8 @@ def test_wave3_encode_decode_round_trip():
original = {
'learning_rate': 0.000225,
'steps_per_switch': 10000,
'total_timesteps': 200000,
'steps_per_switch': 5000,
'total_timesteps': 90000,
}
vec = encode_params(original)
recovered = decode_params(vec)
@ -504,9 +521,9 @@ def test_wave3_param_space_covers_phase2_champion():
# Phase 2 champion: lr=0.000225, which falls in [5e-5, 1e-3]
assert PARAM_SPACE['learning_rate']['min'] <= 0.000225 <= PARAM_SPACE['learning_rate']['max']
# Moderate switching schedule
assert PARAM_SPACE['steps_per_switch']['min'] <= 10000 <= PARAM_SPACE['steps_per_switch']['max']
# Phase 2 had 13k timesteps — Phase 3 needs much more; check min >= 50k
assert PARAM_SPACE['total_timesteps']['min'] >= 50000
assert PARAM_SPACE['steps_per_switch']['min'] <= 5000 <= PARAM_SPACE['steps_per_switch']['max']
# Phase 3 needs more than Phase 2's 13k; check min >= 20k
assert PARAM_SPACE['total_timesteps']['min'] >= 20000
def test_wave3_seed_params_within_space():