fix: StuckTerminationWrapper — wall-clock timeout (12s) prevents 1min+ stuck episodes

When both DummyVecEnv cars get stuck against walls simultaneously, Unity physics slows to 1-2 FPS (heavy collision computation). At that speed, stuck_steps=40 takes 1+ minute of wall-clock time — observed twice by user. Fix: add max_stuck_seconds=12.0 wall-clock timeout. Timer resets whenever car moves >= min_displacement. Fires regardless of step count if car hasn't moved in 12 real-world seconds. Both triggers preserved (step count OR time).
2026-04-19 16:30:50 -04:00 · 2026-04-19 16:30:50 -04:00 · 8b84409e58
parent dc563e2b6c
commit 8b84409e58
2 changed files with 109 additions and 7 deletions
--- a/agent/multitrack_runner.py
+++ b/agent/multitrack_runner.py
@ -125,7 +125,14 @@ class HealthCheckCallback(BaseCallback):
 class StuckTerminationWrapper(gym.Wrapper):
    """
    Terminates the episode when the car hasn't made meaningful positional
-    progress over `stuck_steps` consecutive steps.
+    progress over `stuck_steps` consecutive steps OR `max_stuck_seconds`
    wall-clock seconds, whichever comes first.
    The wall-clock timeout is critical for DummyVecEnv: when both cars are
    simultaneously stuck against a wall, Unity's physics engine slows to
    1-2 FPS (heavy collision computation). At that rate, stuck_steps=40
    can take 1+ minutes of wall-clock time. The wall-clock timeout catches
    this case regardless of sim speed.
    Handles two cases the sim misses:
    1. Car pressed slowly against a barrier — Unity's hit detection needs a
@ -135,14 +142,20 @@ class StuckTerminationWrapper(gym.Wrapper):
    When stuck is detected: terminated=True so SpeedRewardWrapper returns -1.0.
    """
-    def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5):
+    def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5,
                 max_stuck_seconds: float = 12.0):
        super().__init__(env)
-        self.stuck_steps      = stuck_steps
+        self.stuck_steps        = stuck_steps
-        self.min_displacement = min_displacement
+        self.min_displacement   = min_displacement
-        self._pos_buf: deque  = deque(maxlen=stuck_steps)
+        self.max_stuck_seconds  = max_stuck_seconds
        self._pos_buf: deque    = deque(maxlen=stuck_steps)
        self._last_progress_pos = None
        self._last_progress_t   = None
    def reset(self, **kwargs):
        self._pos_buf.clear()
        self._last_progress_pos = None
        self._last_progress_t   = None
        return self.env.reset(**kwargs)
    def step(self, action):
@ -154,13 +167,34 @@ class StuckTerminationWrapper(gym.Wrapper):
            terminated, truncated = done, False
        pos = info.get('pos', None)
        now = time.time()
        if pos is not None:
            try:
-                self._pos_buf.append(np.array(list(pos)[:3], dtype=np.float64))
+                pos_arr = np.array(list(pos)[:3], dtype=np.float64)
                self._pos_buf.append(pos_arr)
                # Wall-clock stuck detection: reset timer whenever car moves > min_displacement
                if self._last_progress_pos is None:
                    self._last_progress_pos = pos_arr
                    self._last_progress_t   = now
                else:
                    moved = float(np.linalg.norm(pos_arr - self._last_progress_pos))
                    if moved >= self.min_displacement:
                        # Made meaningful progress — reset wall-clock timer
                        self._last_progress_pos = pos_arr
                        self._last_progress_t   = now
                    elif (now - self._last_progress_t) > self.max_stuck_seconds:
                        # Wall-clock timeout — terminate regardless of step count
                        if not terminated:
                            terminated = True
                            info['stuck_termination'] = True
                            info['stuck_reason'] = 'wall_clock_timeout'
            except (TypeError, ValueError):
                pass
-        # Only check once the buffer is full
+        # Step-count stuck detection (original logic)
        if not terminated and len(self._pos_buf) >= self.stuck_steps:
            displacement = float(np.linalg.norm(
                self._pos_buf[-1] - self._pos_buf[0]
@ -168,6 +202,7 @@ class StuckTerminationWrapper(gym.Wrapper):
            if displacement < self.min_displacement:
                terminated = True
                info['stuck_termination'] = True
                info['stuck_reason'] = 'step_count'
        if len(result) == 5:
            return obs, reward, terminated, truncated, info
--- a/tests/test_wave3.py
+++ b/tests/test_wave3.py
@ -588,3 +588,70 @@ def test_stuck_termination_does_not_fire_when_moving():
    for _ in range(12):
        _, _, terminated, _, _ = wrapper.step(np.array([0.0, 0.5]))
    assert not terminated, 'Moving car should not trigger stuck termination'
 def test_stuck_termination_wall_clock_timeout():
    """
    Wall-clock timeout must terminate episode when sim slows way down.
    With DummyVecEnv, both cars can get stuck simultaneously causing Unity
    physics to slow to 1-2 FPS. At that rate, stuck_steps=40 takes 1+ minutes.
    The wall-clock timeout catches this regardless of step count.
    """
    import time
    from multitrack_runner import StuckTerminationWrapper
    class FrozenEnv(MockGymEnv):
        def step(self, action):
            time.sleep(0.05)  # simulate slow sim step (20 FPS = slow physics)
            info = {'pos': [5.0, 0.0, 3.0], 'speed': 0.01, 'cte': 0.1}
            return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
    # stuck_steps=1000 (would take forever), but max_stuck_seconds=0.3 (short)
    wrapper = StuckTerminationWrapper(
        FrozenEnv(), stuck_steps=1000, min_displacement=0.5, max_stuck_seconds=0.3
    )
    wrapper.reset()
    terminated = False
    t0 = time.time()
    for _ in range(50):
        _, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5]))
        if terminated:
            break
    elapsed = time.time() - t0
    assert terminated, 'Wall-clock timeout should terminate before stuck_steps=1000'
    assert info.get('stuck_termination') is True
    assert info.get('stuck_reason') == 'wall_clock_timeout'
    assert elapsed < 5.0, f'Should have terminated quickly, took {elapsed:.1f}s'
 def test_stuck_termination_wall_clock_resets_on_progress():
    """
    Wall-clock timer must reset when car makes meaningful progress.
    A car that moves slowly but consistently should not be terminated.
    """
    import time
    from multitrack_runner import StuckTerminationWrapper
    class SlowMovingEnv(MockGymEnv):
        def __init__(self):
            super().__init__()
            self._step = 0
        def step(self, action):
            # Move 0.6m every 5 steps (enough to reset timer)
            x = (self._step // 5) * 0.6
            info = {'pos': [x, 0.0, 0.0], 'speed': 0.5, 'cte': 0.1}
            self._step += 1
            return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
    # Short wall-clock timeout but car makes progress often enough
    wrapper = StuckTerminationWrapper(
        SlowMovingEnv(), stuck_steps=1000, min_displacement=0.5, max_stuck_seconds=0.5
    )
    wrapper.reset()
    terminated = False
    for _ in range(30):
        _, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5]))
        if terminated:
            break
        time.sleep(0.05)  # simulate slow sim: 20 FPS
    assert not terminated, 'Slowly-but-consistently moving car should not be terminated'