diff --git a/agent/multitrack_runner.py b/agent/multitrack_runner.py index a02de6e..4aee9df 100644 --- a/agent/multitrack_runner.py +++ b/agent/multitrack_runner.py @@ -125,7 +125,14 @@ class HealthCheckCallback(BaseCallback): class StuckTerminationWrapper(gym.Wrapper): """ Terminates the episode when the car hasn't made meaningful positional - progress over `stuck_steps` consecutive steps. + progress over `stuck_steps` consecutive steps OR `max_stuck_seconds` + wall-clock seconds, whichever comes first. + + The wall-clock timeout is critical for DummyVecEnv: when both cars are + simultaneously stuck against a wall, Unity's physics engine slows to + 1-2 FPS (heavy collision computation). At that rate, stuck_steps=40 + can take 1+ minutes of wall-clock time. The wall-clock timeout catches + this case regardless of sim speed. Handles two cases the sim misses: 1. Car pressed slowly against a barrier — Unity's hit detection needs a @@ -135,14 +142,20 @@ class StuckTerminationWrapper(gym.Wrapper): When stuck is detected: terminated=True so SpeedRewardWrapper returns -1.0. """ - def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5): + def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5, + max_stuck_seconds: float = 12.0): super().__init__(env) - self.stuck_steps = stuck_steps - self.min_displacement = min_displacement - self._pos_buf: deque = deque(maxlen=stuck_steps) + self.stuck_steps = stuck_steps + self.min_displacement = min_displacement + self.max_stuck_seconds = max_stuck_seconds + self._pos_buf: deque = deque(maxlen=stuck_steps) + self._last_progress_pos = None + self._last_progress_t = None def reset(self, **kwargs): self._pos_buf.clear() + self._last_progress_pos = None + self._last_progress_t = None return self.env.reset(**kwargs) def step(self, action): @@ -154,13 +167,34 @@ class StuckTerminationWrapper(gym.Wrapper): terminated, truncated = done, False pos = info.get('pos', None) + now = time.time() + if pos is not None: try: - self._pos_buf.append(np.array(list(pos)[:3], dtype=np.float64)) + pos_arr = np.array(list(pos)[:3], dtype=np.float64) + self._pos_buf.append(pos_arr) + + # Wall-clock stuck detection: reset timer whenever car moves > min_displacement + if self._last_progress_pos is None: + self._last_progress_pos = pos_arr + self._last_progress_t = now + else: + moved = float(np.linalg.norm(pos_arr - self._last_progress_pos)) + if moved >= self.min_displacement: + # Made meaningful progress — reset wall-clock timer + self._last_progress_pos = pos_arr + self._last_progress_t = now + elif (now - self._last_progress_t) > self.max_stuck_seconds: + # Wall-clock timeout — terminate regardless of step count + if not terminated: + terminated = True + info['stuck_termination'] = True + info['stuck_reason'] = 'wall_clock_timeout' + except (TypeError, ValueError): pass - # Only check once the buffer is full + # Step-count stuck detection (original logic) if not terminated and len(self._pos_buf) >= self.stuck_steps: displacement = float(np.linalg.norm( self._pos_buf[-1] - self._pos_buf[0] @@ -168,6 +202,7 @@ class StuckTerminationWrapper(gym.Wrapper): if displacement < self.min_displacement: terminated = True info['stuck_termination'] = True + info['stuck_reason'] = 'step_count' if len(result) == 5: return obs, reward, terminated, truncated, info diff --git a/tests/test_wave3.py b/tests/test_wave3.py index 39c77bb..cc40332 100644 --- a/tests/test_wave3.py +++ b/tests/test_wave3.py @@ -588,3 +588,70 @@ def test_stuck_termination_does_not_fire_when_moving(): for _ in range(12): _, _, terminated, _, _ = wrapper.step(np.array([0.0, 0.5])) assert not terminated, 'Moving car should not trigger stuck termination' + + +def test_stuck_termination_wall_clock_timeout(): + """ + Wall-clock timeout must terminate episode when sim slows way down. + With DummyVecEnv, both cars can get stuck simultaneously causing Unity + physics to slow to 1-2 FPS. At that rate, stuck_steps=40 takes 1+ minutes. + The wall-clock timeout catches this regardless of step count. + """ + import time + from multitrack_runner import StuckTerminationWrapper + + class FrozenEnv(MockGymEnv): + def step(self, action): + time.sleep(0.05) # simulate slow sim step (20 FPS = slow physics) + info = {'pos': [5.0, 0.0, 3.0], 'speed': 0.01, 'cte': 0.1} + return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info + + # stuck_steps=1000 (would take forever), but max_stuck_seconds=0.3 (short) + wrapper = StuckTerminationWrapper( + FrozenEnv(), stuck_steps=1000, min_displacement=0.5, max_stuck_seconds=0.3 + ) + wrapper.reset() + terminated = False + t0 = time.time() + for _ in range(50): + _, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5])) + if terminated: + break + elapsed = time.time() - t0 + assert terminated, 'Wall-clock timeout should terminate before stuck_steps=1000' + assert info.get('stuck_termination') is True + assert info.get('stuck_reason') == 'wall_clock_timeout' + assert elapsed < 5.0, f'Should have terminated quickly, took {elapsed:.1f}s' + + +def test_stuck_termination_wall_clock_resets_on_progress(): + """ + Wall-clock timer must reset when car makes meaningful progress. + A car that moves slowly but consistently should not be terminated. + """ + import time + from multitrack_runner import StuckTerminationWrapper + + class SlowMovingEnv(MockGymEnv): + def __init__(self): + super().__init__() + self._step = 0 + def step(self, action): + # Move 0.6m every 5 steps (enough to reset timer) + x = (self._step // 5) * 0.6 + info = {'pos': [x, 0.0, 0.0], 'speed': 0.5, 'cte': 0.1} + self._step += 1 + return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info + + # Short wall-clock timeout but car makes progress often enough + wrapper = StuckTerminationWrapper( + SlowMovingEnv(), stuck_steps=1000, min_displacement=0.5, max_stuck_seconds=0.5 + ) + wrapper.reset() + terminated = False + for _ in range(30): + _, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5])) + if terminated: + break + time.sleep(0.05) # simulate slow sim: 20 FPS + assert not terminated, 'Slowly-but-consistently moving car should not be terminated'