fix: StuckTerminationWrapper — wall-clock timeout (12s) prevents 1min+ stuck episodes
When both DummyVecEnv cars get stuck against walls simultaneously, Unity physics slows to 1-2 FPS (heavy collision computation). At that speed, stuck_steps=40 takes 1+ minute of wall-clock time — observed twice by user. Fix: add max_stuck_seconds=12.0 wall-clock timeout. Timer resets whenever car moves >= min_displacement. Fires regardless of step count if car hasn't moved in 12 real-world seconds. Both triggers preserved (step count OR time).
This commit is contained in:
parent
dc563e2b6c
commit
8b84409e58
|
|
@ -125,7 +125,14 @@ class HealthCheckCallback(BaseCallback):
|
||||||
class StuckTerminationWrapper(gym.Wrapper):
|
class StuckTerminationWrapper(gym.Wrapper):
|
||||||
"""
|
"""
|
||||||
Terminates the episode when the car hasn't made meaningful positional
|
Terminates the episode when the car hasn't made meaningful positional
|
||||||
progress over `stuck_steps` consecutive steps.
|
progress over `stuck_steps` consecutive steps OR `max_stuck_seconds`
|
||||||
|
wall-clock seconds, whichever comes first.
|
||||||
|
|
||||||
|
The wall-clock timeout is critical for DummyVecEnv: when both cars are
|
||||||
|
simultaneously stuck against a wall, Unity's physics engine slows to
|
||||||
|
1-2 FPS (heavy collision computation). At that rate, stuck_steps=40
|
||||||
|
can take 1+ minutes of wall-clock time. The wall-clock timeout catches
|
||||||
|
this case regardless of sim speed.
|
||||||
|
|
||||||
Handles two cases the sim misses:
|
Handles two cases the sim misses:
|
||||||
1. Car pressed slowly against a barrier — Unity's hit detection needs a
|
1. Car pressed slowly against a barrier — Unity's hit detection needs a
|
||||||
|
|
@ -135,14 +142,20 @@ class StuckTerminationWrapper(gym.Wrapper):
|
||||||
|
|
||||||
When stuck is detected: terminated=True so SpeedRewardWrapper returns -1.0.
|
When stuck is detected: terminated=True so SpeedRewardWrapper returns -1.0.
|
||||||
"""
|
"""
|
||||||
def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5):
|
def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5,
|
||||||
|
max_stuck_seconds: float = 12.0):
|
||||||
super().__init__(env)
|
super().__init__(env)
|
||||||
self.stuck_steps = stuck_steps
|
self.stuck_steps = stuck_steps
|
||||||
self.min_displacement = min_displacement
|
self.min_displacement = min_displacement
|
||||||
|
self.max_stuck_seconds = max_stuck_seconds
|
||||||
self._pos_buf: deque = deque(maxlen=stuck_steps)
|
self._pos_buf: deque = deque(maxlen=stuck_steps)
|
||||||
|
self._last_progress_pos = None
|
||||||
|
self._last_progress_t = None
|
||||||
|
|
||||||
def reset(self, **kwargs):
|
def reset(self, **kwargs):
|
||||||
self._pos_buf.clear()
|
self._pos_buf.clear()
|
||||||
|
self._last_progress_pos = None
|
||||||
|
self._last_progress_t = None
|
||||||
return self.env.reset(**kwargs)
|
return self.env.reset(**kwargs)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
|
|
@ -154,13 +167,34 @@ class StuckTerminationWrapper(gym.Wrapper):
|
||||||
terminated, truncated = done, False
|
terminated, truncated = done, False
|
||||||
|
|
||||||
pos = info.get('pos', None)
|
pos = info.get('pos', None)
|
||||||
|
now = time.time()
|
||||||
|
|
||||||
if pos is not None:
|
if pos is not None:
|
||||||
try:
|
try:
|
||||||
self._pos_buf.append(np.array(list(pos)[:3], dtype=np.float64))
|
pos_arr = np.array(list(pos)[:3], dtype=np.float64)
|
||||||
|
self._pos_buf.append(pos_arr)
|
||||||
|
|
||||||
|
# Wall-clock stuck detection: reset timer whenever car moves > min_displacement
|
||||||
|
if self._last_progress_pos is None:
|
||||||
|
self._last_progress_pos = pos_arr
|
||||||
|
self._last_progress_t = now
|
||||||
|
else:
|
||||||
|
moved = float(np.linalg.norm(pos_arr - self._last_progress_pos))
|
||||||
|
if moved >= self.min_displacement:
|
||||||
|
# Made meaningful progress — reset wall-clock timer
|
||||||
|
self._last_progress_pos = pos_arr
|
||||||
|
self._last_progress_t = now
|
||||||
|
elif (now - self._last_progress_t) > self.max_stuck_seconds:
|
||||||
|
# Wall-clock timeout — terminate regardless of step count
|
||||||
|
if not terminated:
|
||||||
|
terminated = True
|
||||||
|
info['stuck_termination'] = True
|
||||||
|
info['stuck_reason'] = 'wall_clock_timeout'
|
||||||
|
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Only check once the buffer is full
|
# Step-count stuck detection (original logic)
|
||||||
if not terminated and len(self._pos_buf) >= self.stuck_steps:
|
if not terminated and len(self._pos_buf) >= self.stuck_steps:
|
||||||
displacement = float(np.linalg.norm(
|
displacement = float(np.linalg.norm(
|
||||||
self._pos_buf[-1] - self._pos_buf[0]
|
self._pos_buf[-1] - self._pos_buf[0]
|
||||||
|
|
@ -168,6 +202,7 @@ class StuckTerminationWrapper(gym.Wrapper):
|
||||||
if displacement < self.min_displacement:
|
if displacement < self.min_displacement:
|
||||||
terminated = True
|
terminated = True
|
||||||
info['stuck_termination'] = True
|
info['stuck_termination'] = True
|
||||||
|
info['stuck_reason'] = 'step_count'
|
||||||
|
|
||||||
if len(result) == 5:
|
if len(result) == 5:
|
||||||
return obs, reward, terminated, truncated, info
|
return obs, reward, terminated, truncated, info
|
||||||
|
|
|
||||||
|
|
@ -588,3 +588,70 @@ def test_stuck_termination_does_not_fire_when_moving():
|
||||||
for _ in range(12):
|
for _ in range(12):
|
||||||
_, _, terminated, _, _ = wrapper.step(np.array([0.0, 0.5]))
|
_, _, terminated, _, _ = wrapper.step(np.array([0.0, 0.5]))
|
||||||
assert not terminated, 'Moving car should not trigger stuck termination'
|
assert not terminated, 'Moving car should not trigger stuck termination'
|
||||||
|
|
||||||
|
|
||||||
|
def test_stuck_termination_wall_clock_timeout():
|
||||||
|
"""
|
||||||
|
Wall-clock timeout must terminate episode when sim slows way down.
|
||||||
|
With DummyVecEnv, both cars can get stuck simultaneously causing Unity
|
||||||
|
physics to slow to 1-2 FPS. At that rate, stuck_steps=40 takes 1+ minutes.
|
||||||
|
The wall-clock timeout catches this regardless of step count.
|
||||||
|
"""
|
||||||
|
import time
|
||||||
|
from multitrack_runner import StuckTerminationWrapper
|
||||||
|
|
||||||
|
class FrozenEnv(MockGymEnv):
|
||||||
|
def step(self, action):
|
||||||
|
time.sleep(0.05) # simulate slow sim step (20 FPS = slow physics)
|
||||||
|
info = {'pos': [5.0, 0.0, 3.0], 'speed': 0.01, 'cte': 0.1}
|
||||||
|
return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
|
||||||
|
|
||||||
|
# stuck_steps=1000 (would take forever), but max_stuck_seconds=0.3 (short)
|
||||||
|
wrapper = StuckTerminationWrapper(
|
||||||
|
FrozenEnv(), stuck_steps=1000, min_displacement=0.5, max_stuck_seconds=0.3
|
||||||
|
)
|
||||||
|
wrapper.reset()
|
||||||
|
terminated = False
|
||||||
|
t0 = time.time()
|
||||||
|
for _ in range(50):
|
||||||
|
_, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5]))
|
||||||
|
if terminated:
|
||||||
|
break
|
||||||
|
elapsed = time.time() - t0
|
||||||
|
assert terminated, 'Wall-clock timeout should terminate before stuck_steps=1000'
|
||||||
|
assert info.get('stuck_termination') is True
|
||||||
|
assert info.get('stuck_reason') == 'wall_clock_timeout'
|
||||||
|
assert elapsed < 5.0, f'Should have terminated quickly, took {elapsed:.1f}s'
|
||||||
|
|
||||||
|
|
||||||
|
def test_stuck_termination_wall_clock_resets_on_progress():
|
||||||
|
"""
|
||||||
|
Wall-clock timer must reset when car makes meaningful progress.
|
||||||
|
A car that moves slowly but consistently should not be terminated.
|
||||||
|
"""
|
||||||
|
import time
|
||||||
|
from multitrack_runner import StuckTerminationWrapper
|
||||||
|
|
||||||
|
class SlowMovingEnv(MockGymEnv):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self._step = 0
|
||||||
|
def step(self, action):
|
||||||
|
# Move 0.6m every 5 steps (enough to reset timer)
|
||||||
|
x = (self._step // 5) * 0.6
|
||||||
|
info = {'pos': [x, 0.0, 0.0], 'speed': 0.5, 'cte': 0.1}
|
||||||
|
self._step += 1
|
||||||
|
return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
|
||||||
|
|
||||||
|
# Short wall-clock timeout but car makes progress often enough
|
||||||
|
wrapper = StuckTerminationWrapper(
|
||||||
|
SlowMovingEnv(), stuck_steps=1000, min_displacement=0.5, max_stuck_seconds=0.5
|
||||||
|
)
|
||||||
|
wrapper.reset()
|
||||||
|
terminated = False
|
||||||
|
for _ in range(30):
|
||||||
|
_, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5]))
|
||||||
|
if terminated:
|
||||||
|
break
|
||||||
|
time.sleep(0.05) # simulate slow sim: 20 FPS
|
||||||
|
assert not terminated, 'Slowly-but-consistently moving car should not be terminated'
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue