wave3: fix track-switch bug (viewer not raw socket) + shorten trial budgets
Bug: send_exit_scene_raw() opened a NEW TCP connection, creating a second phantom vehicle. The sim sent exit_scene to the phantom, leaving the real training connection stuck on generated_road for the entire run. Fix: _send_exit_scene() now calls env.unwrapped.viewer.exit_scene() on the EXISTING TCP connection that the training env already holds. This is the only reliable way to switch scenes mid-session (matches track_switcher.py). Also: - Removed send_exit_scene_raw() import from multitrack_runner.py - Simplified initial connection (no spurious exit_scene at startup) - Reduced search space: total_timesteps 80k-400k -> 30k-150k - Reduced seed params: 150k/300k -> 45k/90k (~35-45 min per trial) - Added test: test_close_and_switch_uses_viewer_not_raw_socket 83 tests passing Agent: pi Tests: 83 passed Tests-Added: 1 TypeScript: N/A
This commit is contained in:
parent
4ca5304a71
commit
86657a26b8
|
|
@ -65,7 +65,6 @@ sys.path.insert(0, AGENT_DIR)
|
||||||
|
|
||||||
from donkeycar_sb3_runner import ThrottleClampWrapper, SimHealthCallback
|
from donkeycar_sb3_runner import ThrottleClampWrapper, SimHealthCallback
|
||||||
from reward_wrapper import SpeedRewardWrapper
|
from reward_wrapper import SpeedRewardWrapper
|
||||||
from track_switcher import send_exit_scene_raw
|
|
||||||
|
|
||||||
# ---- Track catalogue ----
|
# ---- Track catalogue ----
|
||||||
# Maps short name → gym env ID
|
# Maps short name → gym env ID
|
||||||
|
|
@ -121,33 +120,66 @@ def wrap_env(raw_env):
|
||||||
|
|
||||||
|
|
||||||
# ---- Track switching ----
|
# ---- Track switching ----
|
||||||
|
def _send_exit_scene(env, verbose=True):
|
||||||
|
"""
|
||||||
|
Send exit_scene through the EXISTING connection on env.
|
||||||
|
|
||||||
|
Critical: the DonkeyCar sim creates one vehicle per TCP connection.
|
||||||
|
Sending exit_scene via a NEW raw socket creates a second vehicle and
|
||||||
|
the sim ignores it for the real training session. We must use the
|
||||||
|
existing viewer connection that env already holds.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
base = env.unwrapped # strips all gym.Wrapper layers
|
||||||
|
if hasattr(base, 'viewer') and base.viewer is not None:
|
||||||
|
base.viewer.exit_scene() # sends {'msg_type': 'exit_scene'} on existing TCP
|
||||||
|
time.sleep(0.5) # let the message flush before closing socket
|
||||||
|
if verbose:
|
||||||
|
log('[W3 Runner] exit_scene sent on existing viewer connection.')
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
if verbose:
|
||||||
|
log('[W3 Runner] Warning: no viewer found on unwrapped env.')
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
if verbose:
|
||||||
|
log(f'[W3 Runner] Warning: viewer.exit_scene() raised: {e}')
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def close_and_switch(current_env, next_env_id, verbose=True):
|
def close_and_switch(current_env, next_env_id, verbose=True):
|
||||||
"""
|
"""
|
||||||
Cleanly close current env and connect to next track.
|
Cleanly close current env and connect to next track.
|
||||||
|
|
||||||
1. env.close() + sleep(2) [ADR-006]
|
Correct order (IMPORTANT):
|
||||||
2. send_exit_scene_raw() + sleep(EXIT_SCENE_WAIT)
|
1. viewer.exit_scene() on existing connection ← tells sim to go to menu
|
||||||
3. gym.make(next_env_id) + wrap
|
2. env.close() + sleep(2) ← disconnect (ADR-006)
|
||||||
|
3. sleep(EXIT_SCENE_WAIT) ← wait for sim menu
|
||||||
|
4. gym.make(next_env_id) + wrap ← connect to new track
|
||||||
|
|
||||||
Returns: new wrapped env, or raises on connection failure.
|
Returns: new wrapped env, or raises on connection failure.
|
||||||
"""
|
"""
|
||||||
if current_env is not None:
|
if current_env is not None:
|
||||||
|
# Step 1: tell the sim to exit the scene BEFORE we close the connection
|
||||||
if verbose:
|
if verbose:
|
||||||
log(f'[W3 Runner] Closing current env before track switch...')
|
log('[W3 Runner] Sending exit_scene via existing viewer connection...')
|
||||||
|
_send_exit_scene(current_env, verbose=verbose)
|
||||||
|
|
||||||
|
# Step 2: now close the env (ADR-006)
|
||||||
|
if verbose:
|
||||||
|
log('[W3 Runner] Closing current env...')
|
||||||
try:
|
try:
|
||||||
current_env.close()
|
current_env.close()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log(f'[W3 Runner] Warning: env.close() raised: {e}')
|
log(f'[W3 Runner] Warning: env.close() raised: {e}')
|
||||||
time.sleep(2) # ADR-006
|
time.sleep(2) # ADR-006
|
||||||
|
|
||||||
if verbose:
|
# Step 3: wait for sim to reach the scene-selection menu
|
||||||
log(f'[W3 Runner] Sending exit_scene to sim...')
|
|
||||||
send_exit_scene_raw()
|
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
log(f'[W3 Runner] Waiting {EXIT_SCENE_WAIT}s for sim to reach main menu...')
|
log(f'[W3 Runner] Waiting {EXIT_SCENE_WAIT}s for sim to reach main menu...')
|
||||||
time.sleep(EXIT_SCENE_WAIT)
|
time.sleep(EXIT_SCENE_WAIT)
|
||||||
|
|
||||||
|
# Step 4: connect to the target track
|
||||||
if verbose:
|
if verbose:
|
||||||
log(f'[W3 Runner] Connecting to {next_env_id}...')
|
log(f'[W3 Runner] Connecting to {next_env_id}...')
|
||||||
raw_env = gym.make(next_env_id)
|
raw_env = gym.make(next_env_id)
|
||||||
|
|
@ -392,16 +424,13 @@ def main():
|
||||||
log(f'[W3 Runner] eval_episodes ={args.eval_episodes}')
|
log(f'[W3 Runner] eval_episodes ={args.eval_episodes}')
|
||||||
|
|
||||||
# ---- 1. Connect to first training track ----
|
# ---- 1. Connect to first training track ----
|
||||||
|
# Assume sim is already at the main menu (user-started, or previous run exited cleanly).
|
||||||
|
# gym.make() on the first track will load it directly from the menu.
|
||||||
first_track_name, first_env_id = TRAINING_TRACKS[0]
|
first_track_name, first_env_id = TRAINING_TRACKS[0]
|
||||||
log(f'\n[W3 Runner] Starting on first training track: {first_track_name} ({first_env_id})')
|
log(f'\n[W3 Runner] Connecting to first training track: {first_track_name} ({first_env_id})')
|
||||||
|
|
||||||
env = None
|
env = None
|
||||||
try:
|
try:
|
||||||
# Always send exit_scene first to ensure sim is at main menu
|
|
||||||
log(f'[W3 Runner] Sending exit_scene to clean sim state...')
|
|
||||||
send_exit_scene_raw()
|
|
||||||
time.sleep(EXIT_SCENE_WAIT)
|
|
||||||
|
|
||||||
raw_env = gym.make(first_env_id)
|
raw_env = gym.make(first_env_id)
|
||||||
env = wrap_env(raw_env)
|
env = wrap_env(raw_env)
|
||||||
log(f'[W3 Runner] ✅ Connected to {first_env_id}')
|
log(f'[W3 Runner] ✅ Connected to {first_env_id}')
|
||||||
|
|
|
||||||
|
|
@ -502,3 +502,42 @@
|
||||||
[2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
[2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||||
[2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
[2026-04-14 12:45:34] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||||
[2026-04-14 12:45:34] [AutoResearch] Only 1 results — using random proposal.
|
[2026-04-14 12:45:34] [AutoResearch] Only 1 results — using random proposal.
|
||||||
|
[2026-04-14 13:28:43] [AutoResearch] GP UCB top-5 candidates:
|
||||||
|
[2026-04-14 13:28:43] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
|
||||||
|
[2026-04-14 13:28:43] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
|
||||||
|
[2026-04-14 13:28:43] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
|
||||||
|
[2026-04-14 13:28:43] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
|
||||||
|
[2026-04-14 13:28:43] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
|
||||||
|
[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
|
||||||
|
[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
|
||||||
|
[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
|
||||||
|
[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
|
||||||
|
[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||||
|
[2026-04-14 13:28:43] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||||
|
[2026-04-14 13:28:43] [AutoResearch] Only 1 results — using random proposal.
|
||||||
|
[2026-04-14 13:29:04] [AutoResearch] GP UCB top-5 candidates:
|
||||||
|
[2026-04-14 13:29:04] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
|
||||||
|
[2026-04-14 13:29:04] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
|
||||||
|
[2026-04-14 13:29:04] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
|
||||||
|
[2026-04-14 13:29:04] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
|
||||||
|
[2026-04-14 13:29:04] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
|
||||||
|
[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
|
||||||
|
[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
|
||||||
|
[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
|
||||||
|
[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
|
||||||
|
[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||||
|
[2026-04-14 13:29:04] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||||
|
[2026-04-14 13:29:04] [AutoResearch] Only 1 results — using random proposal.
|
||||||
|
[2026-04-14 13:29:30] [AutoResearch] GP UCB top-5 candidates:
|
||||||
|
[2026-04-14 13:29:30] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
|
||||||
|
[2026-04-14 13:29:30] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
|
||||||
|
[2026-04-14 13:29:30] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
|
||||||
|
[2026-04-14 13:29:30] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
|
||||||
|
[2026-04-14 13:29:30] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
|
||||||
|
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
|
||||||
|
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
|
||||||
|
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
|
||||||
|
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
|
||||||
|
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||||
|
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||||
|
[2026-04-14 13:29:30] [AutoResearch] Only 1 results — using random proposal.
|
||||||
|
|
|
||||||
|
|
@ -18,3 +18,36 @@
|
||||||
[2026-04-14 12:45:39] [Wave3] Only 0 results — using random proposal.
|
[2026-04-14 12:45:39] [Wave3] Only 0 results — using random proposal.
|
||||||
[2026-04-14 12:45:39] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
[2026-04-14 12:45:39] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
||||||
[2026-04-14 12:45:39] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={}
|
[2026-04-14 12:45:39] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={}
|
||||||
|
[2026-04-14 12:47:25] =================================================================
|
||||||
|
[2026-04-14 12:47:25] [Wave3] Multi-Track Autoresearch — GP+UCB Generalization Search
|
||||||
|
[2026-04-14 12:47:25] [Wave3] Training tracks : generated_road, generated_track, mountain_track
|
||||||
|
[2026-04-14 12:47:25] [Wave3] Test tracks : mini_monaco, warren (zero-shot)
|
||||||
|
[2026-04-14 12:47:25] [Wave3] Max trials : 25 | kappa=2.0 | push every 5
|
||||||
|
[2026-04-14 12:47:25] [Wave3] Results file : /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase3.jsonl
|
||||||
|
[2026-04-14 12:47:25] [Wave3] Champion dir : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-champion
|
||||||
|
[2026-04-14 12:47:25] [Wave3] Warm start : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip
|
||||||
|
[2026-04-14 12:47:25] =================================================================
|
||||||
|
[2026-04-14 12:47:25] [Wave3] Loaded 0 existing Phase 3 results.
|
||||||
|
[2026-04-14 12:47:25] [Wave3] No Wave 3 champion yet.
|
||||||
|
[2026-04-14 12:47:25] [Wave3] Starting from trial 1.
|
||||||
|
[2026-04-14 12:47:25]
|
||||||
|
[Wave3] ========== Trial 1/25 ==========
|
||||||
|
[2026-04-14 12:47:25] [Wave3] Seed trial 1/2: using hardcoded params.
|
||||||
|
[2026-04-14 12:47:25] [Wave3] Proposed params: {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 150000}
|
||||||
|
[2026-04-14 12:47:27] [Wave3] Launching trial 1: {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 150000}
|
||||||
|
[2026-04-14 12:47:27] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 150000 --steps-per-switch 10000 --learning-rate 0.000225 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip
|
||||||
|
[2026-04-14 13:28:47] [Wave3] Seed trial 1/2: using hardcoded params.
|
||||||
|
[2026-04-14 13:28:47] [Wave3] Seed trial 2/2: using hardcoded params.
|
||||||
|
[2026-04-14 13:28:47] [Wave3] Only 0 results — using random proposal.
|
||||||
|
[2026-04-14 13:28:47] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
||||||
|
[2026-04-14 13:28:47] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={}
|
||||||
|
[2026-04-14 13:29:08] [Wave3] Seed trial 1/2: using hardcoded params.
|
||||||
|
[2026-04-14 13:29:08] [Wave3] Seed trial 2/2: using hardcoded params.
|
||||||
|
[2026-04-14 13:29:08] [Wave3] Only 0 results — using random proposal.
|
||||||
|
[2026-04-14 13:29:08] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
||||||
|
[2026-04-14 13:29:08] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={}
|
||||||
|
[2026-04-14 13:29:34] [Wave3] Seed trial 1/2: using hardcoded params.
|
||||||
|
[2026-04-14 13:29:34] [Wave3] Seed trial 2/2: using hardcoded params.
|
||||||
|
[2026-04-14 13:29:34] [Wave3] Only 0 results — using random proposal.
|
||||||
|
[2026-04-14 13:29:34] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
||||||
|
[2026-04-14 13:29:34] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={}
|
||||||
|
|
|
||||||
|
|
@ -63,9 +63,9 @@ os.makedirs(CHAMPION_DIR, exist_ok=True)
|
||||||
|
|
||||||
# ---- Hyperparameter search space ----
|
# ---- Hyperparameter search space ----
|
||||||
PARAM_SPACE = {
|
PARAM_SPACE = {
|
||||||
'learning_rate': {'type': 'float', 'min': 5e-5, 'max': 1e-3},
|
'learning_rate': {'type': 'float', 'min': 5e-5, 'max': 1e-3},
|
||||||
'steps_per_switch': {'type': 'int', 'min': 2000, 'max': 25000},
|
'steps_per_switch': {'type': 'int', 'min': 2000, 'max': 15000},
|
||||||
'total_timesteps': {'type': 'int', 'min': 80000, 'max': 400000},
|
'total_timesteps': {'type': 'int', 'min': 30000, 'max': 150000},
|
||||||
}
|
}
|
||||||
PARAM_KEYS = list(PARAM_SPACE.keys())
|
PARAM_KEYS = list(PARAM_SPACE.keys())
|
||||||
|
|
||||||
|
|
@ -81,10 +81,10 @@ JOB_TIMEOUT = 7200 # 2h — 400k steps on CPU may need time
|
||||||
# ---- Seed trials near Phase 2 champion ----
|
# ---- Seed trials near Phase 2 champion ----
|
||||||
# GP warm-up: first 2 trials use known-good parameters so GP has real prior data
|
# GP warm-up: first 2 trials use known-good parameters so GP has real prior data
|
||||||
SEED_PARAMS = [
|
SEED_PARAMS = [
|
||||||
# Phase 2 champion settings — warm-start, moderate switching
|
# 3 full rotations through all 3 training tracks (~35 min per trial)
|
||||||
{'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 150000},
|
{'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000},
|
||||||
# Higher timesteps, less frequent switching — encourage deeper learning per track
|
# Slower switching, more time per track (~45 min per trial)
|
||||||
{'learning_rate': 0.000225, 'steps_per_switch': 20000, 'total_timesteps': 300000},
|
{'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -188,9 +188,11 @@ def test_close_and_switch_calls_env_close():
|
||||||
"""close_and_switch() must call env.close() on the old env (ADR-006)."""
|
"""close_and_switch() must call env.close() on the old env (ADR-006)."""
|
||||||
from multitrack_runner import close_and_switch
|
from multitrack_runner import close_and_switch
|
||||||
old_env = MockGymEnv()
|
old_env = MockGymEnv()
|
||||||
|
# Give it a fake viewer so _send_exit_scene has something to call
|
||||||
|
mock_viewer = MagicMock()
|
||||||
|
old_env.unwrapped.viewer = mock_viewer
|
||||||
|
|
||||||
with patch('multitrack_runner.send_exit_scene_raw', return_value=True), \
|
with patch('multitrack_runner.gym.make', return_value=MockGymEnv()), \
|
||||||
patch('multitrack_runner.gym.make', return_value=MockGymEnv()), \
|
|
||||||
patch('multitrack_runner.wrap_env', side_effect=lambda e: e), \
|
patch('multitrack_runner.wrap_env', side_effect=lambda e: e), \
|
||||||
patch('time.sleep'):
|
patch('time.sleep'):
|
||||||
close_and_switch(old_env, 'donkey-generated-track-v0', verbose=False)
|
close_and_switch(old_env, 'donkey-generated-track-v0', verbose=False)
|
||||||
|
|
@ -203,8 +205,7 @@ def test_close_and_switch_returns_new_env():
|
||||||
from multitrack_runner import close_and_switch
|
from multitrack_runner import close_and_switch
|
||||||
new_env = MockGymEnv()
|
new_env = MockGymEnv()
|
||||||
|
|
||||||
with patch('multitrack_runner.send_exit_scene_raw', return_value=True), \
|
with patch('multitrack_runner.gym.make', return_value=new_env), \
|
||||||
patch('multitrack_runner.gym.make', return_value=new_env), \
|
|
||||||
patch('multitrack_runner.wrap_env', side_effect=lambda e: e), \
|
patch('multitrack_runner.wrap_env', side_effect=lambda e: e), \
|
||||||
patch('time.sleep'):
|
patch('time.sleep'):
|
||||||
result = close_and_switch(None, 'donkey-generated-track-v0', verbose=False)
|
result = close_and_switch(None, 'donkey-generated-track-v0', verbose=False)
|
||||||
|
|
@ -212,7 +213,23 @@ def test_close_and_switch_returns_new_env():
|
||||||
assert result is new_env
|
assert result is new_env
|
||||||
|
|
||||||
|
|
||||||
def test_evaluate_test_tracks_returns_dict_with_track_keys():
|
def test_close_and_switch_uses_viewer_not_raw_socket():
|
||||||
|
"""exit_scene must be sent via env.unwrapped.viewer, not a new raw connection."""
|
||||||
|
from multitrack_runner import close_and_switch
|
||||||
|
old_env = MockGymEnv()
|
||||||
|
mock_viewer = MagicMock()
|
||||||
|
old_env.unwrapped.viewer = mock_viewer # attach mock viewer
|
||||||
|
|
||||||
|
with patch('multitrack_runner.gym.make', return_value=MockGymEnv()), \
|
||||||
|
patch('multitrack_runner.wrap_env', side_effect=lambda e: e), \
|
||||||
|
patch('time.sleep'):
|
||||||
|
close_and_switch(old_env, 'donkey-generated-track-v0', verbose=False)
|
||||||
|
|
||||||
|
mock_viewer.exit_scene.assert_called_once(), \
|
||||||
|
'exit_scene must be called on existing viewer (not a new raw socket)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""evaluate_test_tracks() should return a dict keyed by track names."""
|
"""evaluate_test_tracks() should return a dict keyed by track names."""
|
||||||
from multitrack_runner import evaluate_test_tracks, TEST_TRACKS
|
from multitrack_runner import evaluate_test_tracks, TEST_TRACKS
|
||||||
|
|
||||||
|
|
@ -287,8 +304,8 @@ def test_wave3_encode_decode_round_trip():
|
||||||
|
|
||||||
original = {
|
original = {
|
||||||
'learning_rate': 0.000225,
|
'learning_rate': 0.000225,
|
||||||
'steps_per_switch': 10000,
|
'steps_per_switch': 5000,
|
||||||
'total_timesteps': 200000,
|
'total_timesteps': 90000,
|
||||||
}
|
}
|
||||||
vec = encode_params(original)
|
vec = encode_params(original)
|
||||||
recovered = decode_params(vec)
|
recovered = decode_params(vec)
|
||||||
|
|
@ -504,9 +521,9 @@ def test_wave3_param_space_covers_phase2_champion():
|
||||||
# Phase 2 champion: lr=0.000225, which falls in [5e-5, 1e-3]
|
# Phase 2 champion: lr=0.000225, which falls in [5e-5, 1e-3]
|
||||||
assert PARAM_SPACE['learning_rate']['min'] <= 0.000225 <= PARAM_SPACE['learning_rate']['max']
|
assert PARAM_SPACE['learning_rate']['min'] <= 0.000225 <= PARAM_SPACE['learning_rate']['max']
|
||||||
# Moderate switching schedule
|
# Moderate switching schedule
|
||||||
assert PARAM_SPACE['steps_per_switch']['min'] <= 10000 <= PARAM_SPACE['steps_per_switch']['max']
|
assert PARAM_SPACE['steps_per_switch']['min'] <= 5000 <= PARAM_SPACE['steps_per_switch']['max']
|
||||||
# Phase 2 had 13k timesteps — Phase 3 needs much more; check min >= 50k
|
# Phase 3 needs more than Phase 2's 13k; check min >= 20k
|
||||||
assert PARAM_SPACE['total_timesteps']['min'] >= 50000
|
assert PARAM_SPACE['total_timesteps']['min'] >= 20000
|
||||||
|
|
||||||
|
|
||||||
def test_wave3_seed_params_within_space():
|
def test_wave3_seed_params_within_space():
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue