fix: StuckTerminationWrapper + deque import + 102 tests

StuckTerminationWrapper added to wrap_env stack (between ThrottleClamp
and SpeedReward):
- Terminates episode after stuck_steps=80 steps with <0.5m displacement
- Handles slow barrier contact that Unity hit detection misses
- Handles off-lap-line circles (efficiency→0 gave zero reward but no
  termination; now gives -1.0 after 80 steps = ~4s of non-progress)
- Wrapper stack: ThrottleClamp → StuckTermination → SpeedReward

Also: missing deque import in multitrack_runner.py caused NameError.

Phase 4 results cleared again (Trial 1 ran without StuckTermination).

Tests: 2 new stuck-termination tests, 102 total.

Agent: pi
Tests: 102 passed
Tests-Added: 2
TypeScript: N/A
This commit is contained in:
Paul Huliganga 2026-04-15 09:17:27 -04:00
parent 5d1227833d
commit f9f6a09744
5 changed files with 154 additions and 2 deletions

View File

@ -51,6 +51,7 @@ import sys
import time import time
import json import json
import numpy as np import numpy as np
from collections import deque
from datetime import datetime from datetime import datetime
import gymnasium as gym import gymnasium as gym
@ -120,10 +121,62 @@ class HealthCheckCallback(BaseCallback):
return True return True
# ---- Environment factory ---- class StuckTerminationWrapper(gym.Wrapper):
"""
Terminates the episode when the car hasn't made meaningful positional
progress over `stuck_steps` consecutive steps.
Handles two cases the sim misses:
1. Car pressed slowly against a barrier Unity's hit detection needs a
velocity threshold; slow contact leaves hit='none' and episode open.
2. Car circling off the start/finish line efficiency0 gives zero reward
but the episode never ends, wasting training steps with no signal.
When stuck is detected: terminated=True so SpeedRewardWrapper returns -1.0.
"""
def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5):
super().__init__(env)
self.stuck_steps = stuck_steps
self.min_displacement = min_displacement
self._pos_buf: deque = deque(maxlen=stuck_steps)
def reset(self, **kwargs):
self._pos_buf.clear()
return self.env.reset(**kwargs)
def step(self, action):
result = self.env.step(action)
if len(result) == 5:
obs, reward, terminated, truncated, info = result
else:
obs, reward, done, info = result
terminated, truncated = done, False
pos = info.get('pos', None)
if pos is not None:
try:
self._pos_buf.append(np.array(list(pos)[:3], dtype=np.float64))
except (TypeError, ValueError):
pass
# Only check once the buffer is full
if not terminated and len(self._pos_buf) >= self.stuck_steps:
displacement = float(np.linalg.norm(
self._pos_buf[-1] - self._pos_buf[0]
))
if displacement < self.min_displacement:
terminated = True
info['stuck_termination'] = True
if len(result) == 5:
return obs, reward, terminated, truncated, info
return obs, reward, terminated or truncated, info
def wrap_env(raw_env): def wrap_env(raw_env):
"""Apply standard wrappers: throttle clamp + speed reward shaping.""" """Apply standard wrappers: throttle clamp + stuck detection + speed reward."""
env = ThrottleClampWrapper(raw_env, throttle_min=THROTTLE_MIN) env = ThrottleClampWrapper(raw_env, throttle_min=THROTTLE_MIN)
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
env = SpeedRewardWrapper(env, speed_scale=SPEED_SCALE) env = SpeedRewardWrapper(env, speed_scale=SPEED_SCALE)
return env return env

View File

@ -645,3 +645,29 @@
[2026-04-15 09:05:43] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} [2026-04-15 09:05:43] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
[2026-04-15 09:05:43] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} [2026-04-15 09:05:43] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
[2026-04-15 09:05:43] [AutoResearch] Only 1 results — using random proposal. [2026-04-15 09:05:43] [AutoResearch] Only 1 results — using random proposal.
[2026-04-15 09:14:59] [AutoResearch] GP UCB top-5 candidates:
[2026-04-15 09:14:59] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
[2026-04-15 09:14:59] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
[2026-04-15 09:14:59] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
[2026-04-15 09:14:59] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
[2026-04-15 09:14:59] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
[2026-04-15 09:14:59] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
[2026-04-15 09:14:59] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
[2026-04-15 09:14:59] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
[2026-04-15 09:14:59] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
[2026-04-15 09:14:59] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
[2026-04-15 09:14:59] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
[2026-04-15 09:14:59] [AutoResearch] Only 1 results — using random proposal.
[2026-04-15 09:16:53] [AutoResearch] GP UCB top-5 candidates:
[2026-04-15 09:16:53] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
[2026-04-15 09:16:53] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
[2026-04-15 09:16:53] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
[2026-04-15 09:16:53] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
[2026-04-15 09:16:53] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
[2026-04-15 09:16:53] [AutoResearch] Only 1 results — using random proposal.

View File

@ -360,3 +360,13 @@
[2026-04-15 09:06:00] [Wave3] Only 0 results — using random proposal. [2026-04-15 09:06:00] [Wave3] Only 0 results — using random proposal.
[2026-04-15 09:06:00] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} [2026-04-15 09:06:00] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
[2026-04-15 09:06:00] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} [2026-04-15 09:06:00] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
[2026-04-15 09:15:27] [Wave3] Seed trial 1/2: using hardcoded params.
[2026-04-15 09:15:27] [Wave3] Seed trial 2/2: using hardcoded params.
[2026-04-15 09:15:27] [Wave3] Only 0 results — using random proposal.
[2026-04-15 09:15:27] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
[2026-04-15 09:15:27] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
[2026-04-15 09:17:10] [Wave3] Seed trial 1/2: using hardcoded params.
[2026-04-15 09:17:10] [Wave3] Seed trial 2/2: using hardcoded params.
[2026-04-15 09:17:10] [Wave3] Only 0 results — using random proposal.
[2026-04-15 09:17:10] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
[2026-04-15 09:17:10] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}

View File

@ -131,3 +131,21 @@
[2026-04-15 07:16:01] [Wave4] Proposed params: {'learning_rate': 0.0011062087200910864, 'steps_per_switch': 18318, 'total_timesteps': 194470} [2026-04-15 07:16:01] [Wave4] Proposed params: {'learning_rate': 0.0011062087200910864, 'steps_per_switch': 18318, 'total_timesteps': 194470}
[2026-04-15 07:16:03] [Wave4] Launching trial 6: {'learning_rate': 0.0011062087200910864, 'steps_per_switch': 18318, 'total_timesteps': 194470} [2026-04-15 07:16:03] [Wave4] Launching trial 6: {'learning_rate': 0.0011062087200910864, 'steps_per_switch': 18318, 'total_timesteps': 194470}
[2026-04-15 07:16:03] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 194470 --steps-per-switch 18318 --learning-rate 0.0011062087200910864 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006 [2026-04-15 07:16:03] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 194470 --steps-per-switch 18318 --learning-rate 0.0011062087200910864 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006
[2026-04-15 09:06:26] =================================================================
[2026-04-15 09:06:26] [Wave4] Multi-Track Autoresearch — GP+UCB Generalization Search
[2026-04-15 09:06:26] [Wave4] Training tracks : generated_track, mountain_track (no generated_road, no warm-start)
[2026-04-15 09:06:26] [Wave4] Test tracks : mini_monaco only (zero-shot; warren removed — broken done condition)
[2026-04-15 09:06:26] [Wave4] Max trials : 25 | kappa=2.0 | push every 5
[2026-04-15 09:06:26] [Wave4] Results file : /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase4.jsonl
[2026-04-15 09:06:26] [Wave4] Champion dir : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-champion
[2026-04-15 09:06:26] [Wave4] Warm start : NONE (training from scratch each trial)
[2026-04-15 09:06:26] =================================================================
[2026-04-15 09:06:26] [Wave4] Loaded 0 existing Phase 3 results.
[2026-04-15 09:06:26] [Wave4] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
[2026-04-15 09:06:26] [Wave4] Starting from trial 1.
[2026-04-15 09:06:26]
[Wave4] ========== Trial 1/25 ==========
[2026-04-15 09:06:26] [Wave4] Seed trial 1/2: using hardcoded params.
[2026-04-15 09:06:26] [Wave4] Proposed params: {'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000}
[2026-04-15 09:06:28] [Wave4] Launching trial 1: {'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000}
[2026-04-15 09:06:28] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 80000 --steps-per-switch 6000 --learning-rate 0.0003 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0001

View File

@ -543,3 +543,48 @@ def test_health_check_callback_stops_on_stuck():
} }
result = cb._on_step() result = cb._on_step()
assert result is False, 'Callback should stop training when sim is stuck' assert result is False, 'Callback should stop training when sim is stuck'
# ---------------------------------------------------------------------------
# StuckTerminationWrapper tests
# ---------------------------------------------------------------------------
def test_stuck_termination_fires_when_position_frozen():
"""Episode must terminate if car position doesn't change for stuck_steps."""
from multitrack_runner import StuckTerminationWrapper
class FrozenEnv(MockGymEnv):
def step(self, action):
info = {'pos': [5.0, 0.0, 3.0], 'speed': 0.01, 'cte': 0.1}
return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
wrapper = StuckTerminationWrapper(FrozenEnv(), stuck_steps=10, min_displacement=0.5)
wrapper.reset()
terminated = False
for _ in range(15):
_, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5]))
if terminated:
break
assert terminated, 'StuckTerminationWrapper should terminate after stuck_steps'
assert info.get('stuck_termination') is True
def test_stuck_termination_does_not_fire_when_moving():
"""Episode must NOT terminate if car is making positional progress."""
from multitrack_runner import StuckTerminationWrapper
class MovingEnv(MockGymEnv):
def __init__(self):
super().__init__()
self._s = 0
def step(self, action):
info = {'pos': [float(self._s) * 0.3, 0.0, 0.0], 'speed': 2.0, 'cte': 0.1}
self._s += 1
return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
wrapper = StuckTerminationWrapper(MovingEnv(), stuck_steps=10, min_displacement=0.5)
wrapper.reset()
terminated = False
for _ in range(12):
_, _, terminated, _, _ = wrapper.step(np.array([0.0, 0.5]))
assert not terminated, 'Moving car should not trigger stuck termination'