fix: StuckTerminationWrapper — wall-clock timeout (12s) prevents 1min+ stuck episodes
When both DummyVecEnv cars get stuck against walls simultaneously, Unity physics slows to 1-2 FPS (heavy collision computation). At that speed, stuck_steps=40 takes 1+ minute of wall-clock time — observed twice by user. Fix: add max_stuck_seconds=12.0 wall-clock timeout. Timer resets whenever car moves >= min_displacement. Fires regardless of step count if car hasn't moved in 12 real-world seconds. Both triggers preserved (step count OR time).
This commit is contained in:
parent
dc563e2b6c
commit
8b84409e58
|
|
@ -125,7 +125,14 @@ class HealthCheckCallback(BaseCallback):
|
|||
class StuckTerminationWrapper(gym.Wrapper):
|
||||
"""
|
||||
Terminates the episode when the car hasn't made meaningful positional
|
||||
progress over `stuck_steps` consecutive steps.
|
||||
progress over `stuck_steps` consecutive steps OR `max_stuck_seconds`
|
||||
wall-clock seconds, whichever comes first.
|
||||
|
||||
The wall-clock timeout is critical for DummyVecEnv: when both cars are
|
||||
simultaneously stuck against a wall, Unity's physics engine slows to
|
||||
1-2 FPS (heavy collision computation). At that rate, stuck_steps=40
|
||||
can take 1+ minutes of wall-clock time. The wall-clock timeout catches
|
||||
this case regardless of sim speed.
|
||||
|
||||
Handles two cases the sim misses:
|
||||
1. Car pressed slowly against a barrier — Unity's hit detection needs a
|
||||
|
|
@ -135,14 +142,20 @@ class StuckTerminationWrapper(gym.Wrapper):
|
|||
|
||||
When stuck is detected: terminated=True so SpeedRewardWrapper returns -1.0.
|
||||
"""
|
||||
def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5):
|
||||
def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5,
|
||||
max_stuck_seconds: float = 12.0):
|
||||
super().__init__(env)
|
||||
self.stuck_steps = stuck_steps
|
||||
self.min_displacement = min_displacement
|
||||
self._pos_buf: deque = deque(maxlen=stuck_steps)
|
||||
self.stuck_steps = stuck_steps
|
||||
self.min_displacement = min_displacement
|
||||
self.max_stuck_seconds = max_stuck_seconds
|
||||
self._pos_buf: deque = deque(maxlen=stuck_steps)
|
||||
self._last_progress_pos = None
|
||||
self._last_progress_t = None
|
||||
|
||||
def reset(self, **kwargs):
|
||||
self._pos_buf.clear()
|
||||
self._last_progress_pos = None
|
||||
self._last_progress_t = None
|
||||
return self.env.reset(**kwargs)
|
||||
|
||||
def step(self, action):
|
||||
|
|
@ -154,13 +167,34 @@ class StuckTerminationWrapper(gym.Wrapper):
|
|||
terminated, truncated = done, False
|
||||
|
||||
pos = info.get('pos', None)
|
||||
now = time.time()
|
||||
|
||||
if pos is not None:
|
||||
try:
|
||||
self._pos_buf.append(np.array(list(pos)[:3], dtype=np.float64))
|
||||
pos_arr = np.array(list(pos)[:3], dtype=np.float64)
|
||||
self._pos_buf.append(pos_arr)
|
||||
|
||||
# Wall-clock stuck detection: reset timer whenever car moves > min_displacement
|
||||
if self._last_progress_pos is None:
|
||||
self._last_progress_pos = pos_arr
|
||||
self._last_progress_t = now
|
||||
else:
|
||||
moved = float(np.linalg.norm(pos_arr - self._last_progress_pos))
|
||||
if moved >= self.min_displacement:
|
||||
# Made meaningful progress — reset wall-clock timer
|
||||
self._last_progress_pos = pos_arr
|
||||
self._last_progress_t = now
|
||||
elif (now - self._last_progress_t) > self.max_stuck_seconds:
|
||||
# Wall-clock timeout — terminate regardless of step count
|
||||
if not terminated:
|
||||
terminated = True
|
||||
info['stuck_termination'] = True
|
||||
info['stuck_reason'] = 'wall_clock_timeout'
|
||||
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Only check once the buffer is full
|
||||
# Step-count stuck detection (original logic)
|
||||
if not terminated and len(self._pos_buf) >= self.stuck_steps:
|
||||
displacement = float(np.linalg.norm(
|
||||
self._pos_buf[-1] - self._pos_buf[0]
|
||||
|
|
@ -168,6 +202,7 @@ class StuckTerminationWrapper(gym.Wrapper):
|
|||
if displacement < self.min_displacement:
|
||||
terminated = True
|
||||
info['stuck_termination'] = True
|
||||
info['stuck_reason'] = 'step_count'
|
||||
|
||||
if len(result) == 5:
|
||||
return obs, reward, terminated, truncated, info
|
||||
|
|
|
|||
|
|
@ -588,3 +588,70 @@ def test_stuck_termination_does_not_fire_when_moving():
|
|||
for _ in range(12):
|
||||
_, _, terminated, _, _ = wrapper.step(np.array([0.0, 0.5]))
|
||||
assert not terminated, 'Moving car should not trigger stuck termination'
|
||||
|
||||
|
||||
def test_stuck_termination_wall_clock_timeout():
|
||||
"""
|
||||
Wall-clock timeout must terminate episode when sim slows way down.
|
||||
With DummyVecEnv, both cars can get stuck simultaneously causing Unity
|
||||
physics to slow to 1-2 FPS. At that rate, stuck_steps=40 takes 1+ minutes.
|
||||
The wall-clock timeout catches this regardless of step count.
|
||||
"""
|
||||
import time
|
||||
from multitrack_runner import StuckTerminationWrapper
|
||||
|
||||
class FrozenEnv(MockGymEnv):
|
||||
def step(self, action):
|
||||
time.sleep(0.05) # simulate slow sim step (20 FPS = slow physics)
|
||||
info = {'pos': [5.0, 0.0, 3.0], 'speed': 0.01, 'cte': 0.1}
|
||||
return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
|
||||
|
||||
# stuck_steps=1000 (would take forever), but max_stuck_seconds=0.3 (short)
|
||||
wrapper = StuckTerminationWrapper(
|
||||
FrozenEnv(), stuck_steps=1000, min_displacement=0.5, max_stuck_seconds=0.3
|
||||
)
|
||||
wrapper.reset()
|
||||
terminated = False
|
||||
t0 = time.time()
|
||||
for _ in range(50):
|
||||
_, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5]))
|
||||
if terminated:
|
||||
break
|
||||
elapsed = time.time() - t0
|
||||
assert terminated, 'Wall-clock timeout should terminate before stuck_steps=1000'
|
||||
assert info.get('stuck_termination') is True
|
||||
assert info.get('stuck_reason') == 'wall_clock_timeout'
|
||||
assert elapsed < 5.0, f'Should have terminated quickly, took {elapsed:.1f}s'
|
||||
|
||||
|
||||
def test_stuck_termination_wall_clock_resets_on_progress():
|
||||
"""
|
||||
Wall-clock timer must reset when car makes meaningful progress.
|
||||
A car that moves slowly but consistently should not be terminated.
|
||||
"""
|
||||
import time
|
||||
from multitrack_runner import StuckTerminationWrapper
|
||||
|
||||
class SlowMovingEnv(MockGymEnv):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._step = 0
|
||||
def step(self, action):
|
||||
# Move 0.6m every 5 steps (enough to reset timer)
|
||||
x = (self._step // 5) * 0.6
|
||||
info = {'pos': [x, 0.0, 0.0], 'speed': 0.5, 'cte': 0.1}
|
||||
self._step += 1
|
||||
return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
|
||||
|
||||
# Short wall-clock timeout but car makes progress often enough
|
||||
wrapper = StuckTerminationWrapper(
|
||||
SlowMovingEnv(), stuck_steps=1000, min_displacement=0.5, max_stuck_seconds=0.5
|
||||
)
|
||||
wrapper.reset()
|
||||
terminated = False
|
||||
for _ in range(30):
|
||||
_, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5]))
|
||||
if terminated:
|
||||
break
|
||||
time.sleep(0.05) # simulate slow sim: 20 FPS
|
||||
assert not terminated, 'Slowly-but-consistently moving car should not be terminated'
|
||||
|
|
|
|||
Loading…
Reference in New Issue