fix: StuckTerminationWrapper — wall-clock timeout (12s) prevents 1min+ stuck episodes

When both DummyVecEnv cars get stuck against walls simultaneously, Unity physics slows to 1-2 FPS (heavy collision computation). At that speed, stuck_steps=40 takes 1+ minute of wall-clock time — observed twice by user. Fix: add max_stuck_seconds=12.0 wall-clock timeout. Timer resets whenever car moves >= min_displacement. Fires regardless of step count if car hasn't moved in 12 real-world seconds. Both triggers preserved (step count OR time).
2026-04-19 16:30:50 -04:00 · 2026-04-19 16:30:50 -04:00 · 8b84409e58
parent dc563e2b6c
commit 8b84409e58
2 changed files with 109 additions and 7 deletions
--- a/agent/multitrack_runner.py
+++ b/agent/multitrack_runner.py
@ -125,7 +125,14 @@ class HealthCheckCallback(BaseCallback):
 class StuckTerminationWrapper(gym.Wrapper):
    """
    Terminates the episode when the car hasn't made meaningful positional
-    progress over `stuck_steps` consecutive steps.
+    progress over `stuck_steps` consecutive steps OR `max_stuck_seconds`
+    wall-clock seconds, whichever comes first.
+
+    The wall-clock timeout is critical for DummyVecEnv: when both cars are
+    simultaneously stuck against a wall, Unity's physics engine slows to
+    1-2 FPS (heavy collision computation). At that rate, stuck_steps=40
+    can take 1+ minutes of wall-clock time. The wall-clock timeout catches
+    this case regardless of sim speed.

    Handles two cases the sim misses:
    1. Car pressed slowly against a barrier — Unity's hit detection needs a
@ -135,14 +142,20 @@ class StuckTerminationWrapper(gym.Wrapper):

    When stuck is detected: terminated=True so SpeedRewardWrapper returns -1.0.
    """
-    def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5):
+    def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5,
+                 max_stuck_seconds: float = 12.0):
        super().__init__(env)
-        self.stuck_steps      = stuck_steps
-        self.min_displacement = min_displacement
-        self._pos_buf: deque  = deque(maxlen=stuck_steps)
+        self.stuck_steps        = stuck_steps
+        self.min_displacement   = min_displacement
+        self.max_stuck_seconds  = max_stuck_seconds
+        self._pos_buf: deque    = deque(maxlen=stuck_steps)
+        self._last_progress_pos = None
+        self._last_progress_t   = None

    def reset(self, **kwargs):
        self._pos_buf.clear()
+        self._last_progress_pos = None
+        self._last_progress_t   = None
        return self.env.reset(**kwargs)

    def step(self, action):
@ -154,13 +167,34 @@ class StuckTerminationWrapper(gym.Wrapper):
            terminated, truncated = done, False

        pos = info.get('pos', None)
+        now = time.time()
+
        if pos is not None:
            try:
-                self._pos_buf.append(np.array(list(pos)[:3], dtype=np.float64))
+                pos_arr = np.array(list(pos)[:3], dtype=np.float64)
+                self._pos_buf.append(pos_arr)
+
+                # Wall-clock stuck detection: reset timer whenever car moves > min_displacement
+                if self._last_progress_pos is None:
+                    self._last_progress_pos = pos_arr
+                    self._last_progress_t   = now
+                else:
+                    moved = float(np.linalg.norm(pos_arr - self._last_progress_pos))
+                    if moved >= self.min_displacement:
+                        # Made meaningful progress — reset wall-clock timer
+                        self._last_progress_pos = pos_arr
+                        self._last_progress_t   = now
+                    elif (now - self._last_progress_t) > self.max_stuck_seconds:
+                        # Wall-clock timeout — terminate regardless of step count
+                        if not terminated:
+                            terminated = True
+                            info['stuck_termination'] = True
+                            info['stuck_reason'] = 'wall_clock_timeout'
+
            except (TypeError, ValueError):
                pass

-        # Only check once the buffer is full
+        # Step-count stuck detection (original logic)
        if not terminated and len(self._pos_buf) >= self.stuck_steps:
            displacement = float(np.linalg.norm(
                self._pos_buf[-1] - self._pos_buf[0]
@ -168,6 +202,7 @@ class StuckTerminationWrapper(gym.Wrapper):
            if displacement < self.min_displacement:
                terminated = True
                info['stuck_termination'] = True
+                info['stuck_reason'] = 'step_count'

        if len(result) == 5:
            return obs, reward, terminated, truncated, info
--- a/tests/test_wave3.py
+++ b/tests/test_wave3.py
@ -588,3 +588,70 @@ def test_stuck_termination_does_not_fire_when_moving():
    for _ in range(12):
        _, _, terminated, _, _ = wrapper.step(np.array([0.0, 0.5]))
    assert not terminated, 'Moving car should not trigger stuck termination'
+
+
+def test_stuck_termination_wall_clock_timeout():
+    """
+    Wall-clock timeout must terminate episode when sim slows way down.
+    With DummyVecEnv, both cars can get stuck simultaneously causing Unity
+    physics to slow to 1-2 FPS. At that rate, stuck_steps=40 takes 1+ minutes.
+    The wall-clock timeout catches this regardless of step count.
+    """
+    import time
+    from multitrack_runner import StuckTerminationWrapper
+
+    class FrozenEnv(MockGymEnv):
+        def step(self, action):
+            time.sleep(0.05)  # simulate slow sim step (20 FPS = slow physics)
+            info = {'pos': [5.0, 0.0, 3.0], 'speed': 0.01, 'cte': 0.1}
+            return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
+
+    # stuck_steps=1000 (would take forever), but max_stuck_seconds=0.3 (short)
+    wrapper = StuckTerminationWrapper(
+        FrozenEnv(), stuck_steps=1000, min_displacement=0.5, max_stuck_seconds=0.3
+    )
+    wrapper.reset()
+    terminated = False
+    t0 = time.time()
+    for _ in range(50):
+        _, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5]))
+        if terminated:
+            break
+    elapsed = time.time() - t0
+    assert terminated, 'Wall-clock timeout should terminate before stuck_steps=1000'
+    assert info.get('stuck_termination') is True
+    assert info.get('stuck_reason') == 'wall_clock_timeout'
+    assert elapsed < 5.0, f'Should have terminated quickly, took {elapsed:.1f}s'
+
+
+def test_stuck_termination_wall_clock_resets_on_progress():
+    """
+    Wall-clock timer must reset when car makes meaningful progress.
+    A car that moves slowly but consistently should not be terminated.
+    """
+    import time
+    from multitrack_runner import StuckTerminationWrapper
+
+    class SlowMovingEnv(MockGymEnv):
+        def __init__(self):
+            super().__init__()
+            self._step = 0
+        def step(self, action):
+            # Move 0.6m every 5 steps (enough to reset timer)
+            x = (self._step // 5) * 0.6
+            info = {'pos': [x, 0.0, 0.0], 'speed': 0.5, 'cte': 0.1}
+            self._step += 1
+            return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
+
+    # Short wall-clock timeout but car makes progress often enough
+    wrapper = StuckTerminationWrapper(
+        SlowMovingEnv(), stuck_steps=1000, min_displacement=0.5, max_stuck_seconds=0.5
+    )
+    wrapper.reset()
+    terminated = False
+    for _ in range(30):
+        _, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5]))
+        if terminated:
+            break
+        time.sleep(0.05)  # simulate slow sim: 20 FPS
+    assert not terminated, 'Slowly-but-consistently moving car should not be terminated'