fix: StuckTerminationWrapper — wall-clock timeout (12s) prevents 1min+ stuck episodes

When both DummyVecEnv cars get stuck against walls simultaneously, Unity
physics slows to 1-2 FPS (heavy collision computation). At that speed,
stuck_steps=40 takes 1+ minute of wall-clock time — observed twice by user.

Fix: add max_stuck_seconds=12.0 wall-clock timeout. Timer resets whenever
car moves >= min_displacement. Fires regardless of step count if car hasn't
moved in 12 real-world seconds. Both triggers preserved (step count OR time).
This commit is contained in:
Paul Huliganga 2026-04-19 16:30:50 -04:00
parent dc563e2b6c
commit 8b84409e58
2 changed files with 109 additions and 7 deletions

View File

@ -125,7 +125,14 @@ class HealthCheckCallback(BaseCallback):
class StuckTerminationWrapper(gym.Wrapper): class StuckTerminationWrapper(gym.Wrapper):
""" """
Terminates the episode when the car hasn't made meaningful positional Terminates the episode when the car hasn't made meaningful positional
progress over `stuck_steps` consecutive steps. progress over `stuck_steps` consecutive steps OR `max_stuck_seconds`
wall-clock seconds, whichever comes first.
The wall-clock timeout is critical for DummyVecEnv: when both cars are
simultaneously stuck against a wall, Unity's physics engine slows to
1-2 FPS (heavy collision computation). At that rate, stuck_steps=40
can take 1+ minutes of wall-clock time. The wall-clock timeout catches
this case regardless of sim speed.
Handles two cases the sim misses: Handles two cases the sim misses:
1. Car pressed slowly against a barrier Unity's hit detection needs a 1. Car pressed slowly against a barrier Unity's hit detection needs a
@ -135,14 +142,20 @@ class StuckTerminationWrapper(gym.Wrapper):
When stuck is detected: terminated=True so SpeedRewardWrapper returns -1.0. When stuck is detected: terminated=True so SpeedRewardWrapper returns -1.0.
""" """
def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5): def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5,
max_stuck_seconds: float = 12.0):
super().__init__(env) super().__init__(env)
self.stuck_steps = stuck_steps self.stuck_steps = stuck_steps
self.min_displacement = min_displacement self.min_displacement = min_displacement
self._pos_buf: deque = deque(maxlen=stuck_steps) self.max_stuck_seconds = max_stuck_seconds
self._pos_buf: deque = deque(maxlen=stuck_steps)
self._last_progress_pos = None
self._last_progress_t = None
def reset(self, **kwargs): def reset(self, **kwargs):
self._pos_buf.clear() self._pos_buf.clear()
self._last_progress_pos = None
self._last_progress_t = None
return self.env.reset(**kwargs) return self.env.reset(**kwargs)
def step(self, action): def step(self, action):
@ -154,13 +167,34 @@ class StuckTerminationWrapper(gym.Wrapper):
terminated, truncated = done, False terminated, truncated = done, False
pos = info.get('pos', None) pos = info.get('pos', None)
now = time.time()
if pos is not None: if pos is not None:
try: try:
self._pos_buf.append(np.array(list(pos)[:3], dtype=np.float64)) pos_arr = np.array(list(pos)[:3], dtype=np.float64)
self._pos_buf.append(pos_arr)
# Wall-clock stuck detection: reset timer whenever car moves > min_displacement
if self._last_progress_pos is None:
self._last_progress_pos = pos_arr
self._last_progress_t = now
else:
moved = float(np.linalg.norm(pos_arr - self._last_progress_pos))
if moved >= self.min_displacement:
# Made meaningful progress — reset wall-clock timer
self._last_progress_pos = pos_arr
self._last_progress_t = now
elif (now - self._last_progress_t) > self.max_stuck_seconds:
# Wall-clock timeout — terminate regardless of step count
if not terminated:
terminated = True
info['stuck_termination'] = True
info['stuck_reason'] = 'wall_clock_timeout'
except (TypeError, ValueError): except (TypeError, ValueError):
pass pass
# Only check once the buffer is full # Step-count stuck detection (original logic)
if not terminated and len(self._pos_buf) >= self.stuck_steps: if not terminated and len(self._pos_buf) >= self.stuck_steps:
displacement = float(np.linalg.norm( displacement = float(np.linalg.norm(
self._pos_buf[-1] - self._pos_buf[0] self._pos_buf[-1] - self._pos_buf[0]
@ -168,6 +202,7 @@ class StuckTerminationWrapper(gym.Wrapper):
if displacement < self.min_displacement: if displacement < self.min_displacement:
terminated = True terminated = True
info['stuck_termination'] = True info['stuck_termination'] = True
info['stuck_reason'] = 'step_count'
if len(result) == 5: if len(result) == 5:
return obs, reward, terminated, truncated, info return obs, reward, terminated, truncated, info

View File

@ -588,3 +588,70 @@ def test_stuck_termination_does_not_fire_when_moving():
for _ in range(12): for _ in range(12):
_, _, terminated, _, _ = wrapper.step(np.array([0.0, 0.5])) _, _, terminated, _, _ = wrapper.step(np.array([0.0, 0.5]))
assert not terminated, 'Moving car should not trigger stuck termination' assert not terminated, 'Moving car should not trigger stuck termination'
def test_stuck_termination_wall_clock_timeout():
"""
Wall-clock timeout must terminate episode when sim slows way down.
With DummyVecEnv, both cars can get stuck simultaneously causing Unity
physics to slow to 1-2 FPS. At that rate, stuck_steps=40 takes 1+ minutes.
The wall-clock timeout catches this regardless of step count.
"""
import time
from multitrack_runner import StuckTerminationWrapper
class FrozenEnv(MockGymEnv):
def step(self, action):
time.sleep(0.05) # simulate slow sim step (20 FPS = slow physics)
info = {'pos': [5.0, 0.0, 3.0], 'speed': 0.01, 'cte': 0.1}
return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
# stuck_steps=1000 (would take forever), but max_stuck_seconds=0.3 (short)
wrapper = StuckTerminationWrapper(
FrozenEnv(), stuck_steps=1000, min_displacement=0.5, max_stuck_seconds=0.3
)
wrapper.reset()
terminated = False
t0 = time.time()
for _ in range(50):
_, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5]))
if terminated:
break
elapsed = time.time() - t0
assert terminated, 'Wall-clock timeout should terminate before stuck_steps=1000'
assert info.get('stuck_termination') is True
assert info.get('stuck_reason') == 'wall_clock_timeout'
assert elapsed < 5.0, f'Should have terminated quickly, took {elapsed:.1f}s'
def test_stuck_termination_wall_clock_resets_on_progress():
"""
Wall-clock timer must reset when car makes meaningful progress.
A car that moves slowly but consistently should not be terminated.
"""
import time
from multitrack_runner import StuckTerminationWrapper
class SlowMovingEnv(MockGymEnv):
def __init__(self):
super().__init__()
self._step = 0
def step(self, action):
# Move 0.6m every 5 steps (enough to reset timer)
x = (self._step // 5) * 0.6
info = {'pos': [x, 0.0, 0.0], 'speed': 0.5, 'cte': 0.1}
self._step += 1
return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
# Short wall-clock timeout but car makes progress often enough
wrapper = StuckTerminationWrapper(
SlowMovingEnv(), stuck_steps=1000, min_displacement=0.5, max_stuck_seconds=0.5
)
wrapper.reset()
terminated = False
for _ in range(30):
_, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5]))
if terminated:
break
time.sleep(0.05) # simulate slow sim: 20 FPS
assert not terminated, 'Slowly-but-consistently moving car should not be terminated'