fix: StuckTerminationWrapper + deque import + 102 tests
StuckTerminationWrapper added to wrap_env stack (between ThrottleClamp and SpeedReward): - Terminates episode after stuck_steps=80 steps with <0.5m displacement - Handles slow barrier contact that Unity hit detection misses - Handles off-lap-line circles (efficiency→0 gave zero reward but no termination; now gives -1.0 after 80 steps = ~4s of non-progress) - Wrapper stack: ThrottleClamp → StuckTermination → SpeedReward Also: missing deque import in multitrack_runner.py caused NameError. Phase 4 results cleared again (Trial 1 ran without StuckTermination). Tests: 2 new stuck-termination tests, 102 total. Agent: pi Tests: 102 passed Tests-Added: 2 TypeScript: N/A
This commit is contained in:
parent
5d1227833d
commit
f9f6a09744
|
|
@ -51,6 +51,7 @@ import sys
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from collections import deque
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import gymnasium as gym
|
import gymnasium as gym
|
||||||
|
|
@ -120,10 +121,62 @@ class HealthCheckCallback(BaseCallback):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
# ---- Environment factory ----
|
class StuckTerminationWrapper(gym.Wrapper):
|
||||||
|
"""
|
||||||
|
Terminates the episode when the car hasn't made meaningful positional
|
||||||
|
progress over `stuck_steps` consecutive steps.
|
||||||
|
|
||||||
|
Handles two cases the sim misses:
|
||||||
|
1. Car pressed slowly against a barrier — Unity's hit detection needs a
|
||||||
|
velocity threshold; slow contact leaves hit='none' and episode open.
|
||||||
|
2. Car circling off the start/finish line — efficiency→0 gives zero reward
|
||||||
|
but the episode never ends, wasting training steps with no signal.
|
||||||
|
|
||||||
|
When stuck is detected: terminated=True so SpeedRewardWrapper returns -1.0.
|
||||||
|
"""
|
||||||
|
def __init__(self, env, stuck_steps: int = 80, min_displacement: float = 0.5):
|
||||||
|
super().__init__(env)
|
||||||
|
self.stuck_steps = stuck_steps
|
||||||
|
self.min_displacement = min_displacement
|
||||||
|
self._pos_buf: deque = deque(maxlen=stuck_steps)
|
||||||
|
|
||||||
|
def reset(self, **kwargs):
|
||||||
|
self._pos_buf.clear()
|
||||||
|
return self.env.reset(**kwargs)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
result = self.env.step(action)
|
||||||
|
if len(result) == 5:
|
||||||
|
obs, reward, terminated, truncated, info = result
|
||||||
|
else:
|
||||||
|
obs, reward, done, info = result
|
||||||
|
terminated, truncated = done, False
|
||||||
|
|
||||||
|
pos = info.get('pos', None)
|
||||||
|
if pos is not None:
|
||||||
|
try:
|
||||||
|
self._pos_buf.append(np.array(list(pos)[:3], dtype=np.float64))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Only check once the buffer is full
|
||||||
|
if not terminated and len(self._pos_buf) >= self.stuck_steps:
|
||||||
|
displacement = float(np.linalg.norm(
|
||||||
|
self._pos_buf[-1] - self._pos_buf[0]
|
||||||
|
))
|
||||||
|
if displacement < self.min_displacement:
|
||||||
|
terminated = True
|
||||||
|
info['stuck_termination'] = True
|
||||||
|
|
||||||
|
if len(result) == 5:
|
||||||
|
return obs, reward, terminated, truncated, info
|
||||||
|
return obs, reward, terminated or truncated, info
|
||||||
|
|
||||||
|
|
||||||
def wrap_env(raw_env):
|
def wrap_env(raw_env):
|
||||||
"""Apply standard wrappers: throttle clamp + speed reward shaping."""
|
"""Apply standard wrappers: throttle clamp + stuck detection + speed reward."""
|
||||||
env = ThrottleClampWrapper(raw_env, throttle_min=THROTTLE_MIN)
|
env = ThrottleClampWrapper(raw_env, throttle_min=THROTTLE_MIN)
|
||||||
|
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
||||||
env = SpeedRewardWrapper(env, speed_scale=SPEED_SCALE)
|
env = SpeedRewardWrapper(env, speed_scale=SPEED_SCALE)
|
||||||
return env
|
return env
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -645,3 +645,29 @@
|
||||||
[2026-04-15 09:05:43] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
[2026-04-15 09:05:43] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||||
[2026-04-15 09:05:43] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
[2026-04-15 09:05:43] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||||
[2026-04-15 09:05:43] [AutoResearch] Only 1 results — using random proposal.
|
[2026-04-15 09:05:43] [AutoResearch] Only 1 results — using random proposal.
|
||||||
|
[2026-04-15 09:14:59] [AutoResearch] GP UCB top-5 candidates:
|
||||||
|
[2026-04-15 09:14:59] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
|
||||||
|
[2026-04-15 09:14:59] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
|
||||||
|
[2026-04-15 09:14:59] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
|
||||||
|
[2026-04-15 09:14:59] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
|
||||||
|
[2026-04-15 09:14:59] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
|
||||||
|
[2026-04-15 09:14:59] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
|
||||||
|
[2026-04-15 09:14:59] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
|
||||||
|
[2026-04-15 09:14:59] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
|
||||||
|
[2026-04-15 09:14:59] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
|
||||||
|
[2026-04-15 09:14:59] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||||
|
[2026-04-15 09:14:59] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||||
|
[2026-04-15 09:14:59] [AutoResearch] Only 1 results — using random proposal.
|
||||||
|
[2026-04-15 09:16:53] [AutoResearch] GP UCB top-5 candidates:
|
||||||
|
[2026-04-15 09:16:53] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
|
||||||
|
[2026-04-15 09:16:53] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
|
||||||
|
[2026-04-15 09:16:53] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
|
||||||
|
[2026-04-15 09:16:53] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
|
||||||
|
[2026-04-15 09:16:53] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
|
||||||
|
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
|
||||||
|
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
|
||||||
|
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
|
||||||
|
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
|
||||||
|
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||||
|
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||||
|
[2026-04-15 09:16:53] [AutoResearch] Only 1 results — using random proposal.
|
||||||
|
|
|
||||||
|
|
@ -360,3 +360,13 @@
|
||||||
[2026-04-15 09:06:00] [Wave3] Only 0 results — using random proposal.
|
[2026-04-15 09:06:00] [Wave3] Only 0 results — using random proposal.
|
||||||
[2026-04-15 09:06:00] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
[2026-04-15 09:06:00] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
||||||
[2026-04-15 09:06:00] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
|
[2026-04-15 09:06:00] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
|
||||||
|
[2026-04-15 09:15:27] [Wave3] Seed trial 1/2: using hardcoded params.
|
||||||
|
[2026-04-15 09:15:27] [Wave3] Seed trial 2/2: using hardcoded params.
|
||||||
|
[2026-04-15 09:15:27] [Wave3] Only 0 results — using random proposal.
|
||||||
|
[2026-04-15 09:15:27] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
||||||
|
[2026-04-15 09:15:27] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
|
||||||
|
[2026-04-15 09:17:10] [Wave3] Seed trial 1/2: using hardcoded params.
|
||||||
|
[2026-04-15 09:17:10] [Wave3] Seed trial 2/2: using hardcoded params.
|
||||||
|
[2026-04-15 09:17:10] [Wave3] Only 0 results — using random proposal.
|
||||||
|
[2026-04-15 09:17:10] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
||||||
|
[2026-04-15 09:17:10] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
|
||||||
|
|
|
||||||
|
|
@ -131,3 +131,21 @@
|
||||||
[2026-04-15 07:16:01] [Wave4] Proposed params: {'learning_rate': 0.0011062087200910864, 'steps_per_switch': 18318, 'total_timesteps': 194470}
|
[2026-04-15 07:16:01] [Wave4] Proposed params: {'learning_rate': 0.0011062087200910864, 'steps_per_switch': 18318, 'total_timesteps': 194470}
|
||||||
[2026-04-15 07:16:03] [Wave4] Launching trial 6: {'learning_rate': 0.0011062087200910864, 'steps_per_switch': 18318, 'total_timesteps': 194470}
|
[2026-04-15 07:16:03] [Wave4] Launching trial 6: {'learning_rate': 0.0011062087200910864, 'steps_per_switch': 18318, 'total_timesteps': 194470}
|
||||||
[2026-04-15 07:16:03] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 194470 --steps-per-switch 18318 --learning-rate 0.0011062087200910864 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006
|
[2026-04-15 07:16:03] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 194470 --steps-per-switch 18318 --learning-rate 0.0011062087200910864 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006
|
||||||
|
[2026-04-15 09:06:26] =================================================================
|
||||||
|
[2026-04-15 09:06:26] [Wave4] Multi-Track Autoresearch — GP+UCB Generalization Search
|
||||||
|
[2026-04-15 09:06:26] [Wave4] Training tracks : generated_track, mountain_track (no generated_road, no warm-start)
|
||||||
|
[2026-04-15 09:06:26] [Wave4] Test tracks : mini_monaco only (zero-shot; warren removed — broken done condition)
|
||||||
|
[2026-04-15 09:06:26] [Wave4] Max trials : 25 | kappa=2.0 | push every 5
|
||||||
|
[2026-04-15 09:06:26] [Wave4] Results file : /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase4.jsonl
|
||||||
|
[2026-04-15 09:06:26] [Wave4] Champion dir : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-champion
|
||||||
|
[2026-04-15 09:06:26] [Wave4] Warm start : NONE (training from scratch each trial)
|
||||||
|
[2026-04-15 09:06:26] =================================================================
|
||||||
|
[2026-04-15 09:06:26] [Wave4] Loaded 0 existing Phase 3 results.
|
||||||
|
[2026-04-15 09:06:26] [Wave4] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
|
||||||
|
[2026-04-15 09:06:26] [Wave4] Starting from trial 1.
|
||||||
|
[2026-04-15 09:06:26]
|
||||||
|
[Wave4] ========== Trial 1/25 ==========
|
||||||
|
[2026-04-15 09:06:26] [Wave4] Seed trial 1/2: using hardcoded params.
|
||||||
|
[2026-04-15 09:06:26] [Wave4] Proposed params: {'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000}
|
||||||
|
[2026-04-15 09:06:28] [Wave4] Launching trial 1: {'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000}
|
||||||
|
[2026-04-15 09:06:28] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 80000 --steps-per-switch 6000 --learning-rate 0.0003 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0001
|
||||||
|
|
|
||||||
|
|
@ -543,3 +543,48 @@ def test_health_check_callback_stops_on_stuck():
|
||||||
}
|
}
|
||||||
result = cb._on_step()
|
result = cb._on_step()
|
||||||
assert result is False, 'Callback should stop training when sim is stuck'
|
assert result is False, 'Callback should stop training when sim is stuck'
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# StuckTerminationWrapper tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_stuck_termination_fires_when_position_frozen():
|
||||||
|
"""Episode must terminate if car position doesn't change for stuck_steps."""
|
||||||
|
from multitrack_runner import StuckTerminationWrapper
|
||||||
|
|
||||||
|
class FrozenEnv(MockGymEnv):
|
||||||
|
def step(self, action):
|
||||||
|
info = {'pos': [5.0, 0.0, 3.0], 'speed': 0.01, 'cte': 0.1}
|
||||||
|
return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
|
||||||
|
|
||||||
|
wrapper = StuckTerminationWrapper(FrozenEnv(), stuck_steps=10, min_displacement=0.5)
|
||||||
|
wrapper.reset()
|
||||||
|
terminated = False
|
||||||
|
for _ in range(15):
|
||||||
|
_, _, terminated, _, info = wrapper.step(np.array([0.0, 0.5]))
|
||||||
|
if terminated:
|
||||||
|
break
|
||||||
|
assert terminated, 'StuckTerminationWrapper should terminate after stuck_steps'
|
||||||
|
assert info.get('stuck_termination') is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_stuck_termination_does_not_fire_when_moving():
|
||||||
|
"""Episode must NOT terminate if car is making positional progress."""
|
||||||
|
from multitrack_runner import StuckTerminationWrapper
|
||||||
|
|
||||||
|
class MovingEnv(MockGymEnv):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self._s = 0
|
||||||
|
def step(self, action):
|
||||||
|
info = {'pos': [float(self._s) * 0.3, 0.0, 0.0], 'speed': 2.0, 'cte': 0.1}
|
||||||
|
self._s += 1
|
||||||
|
return np.zeros((80,160,3), dtype=np.uint8), 0.5, False, False, info
|
||||||
|
|
||||||
|
wrapper = StuckTerminationWrapper(MovingEnv(), stuck_steps=10, min_displacement=0.5)
|
||||||
|
wrapper.reset()
|
||||||
|
terminated = False
|
||||||
|
for _ in range(12):
|
||||||
|
_, _, terminated, _, _ = wrapper.step(np.array([0.0, 0.5]))
|
||||||
|
assert not terminated, 'Moving car should not trigger stuck termination'
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue