fix: efficiency gate now TERMINATES after 20 low-efficiency steps (was zero-reward only)

Previously circles ran 20+ seconds because the efficiency gate only returned
0 reward without terminating. After 20 consecutive steps of efficiency < 0.15
(~0.7 seconds at 27 steps/sec), episode now terminates with -1.0.

Also confirmed from telemetry diagnostic: CTE does report correctly when
car goes off-track (rises steadily to 6.2m before tree collision).
The grass exploit runs long only when the open grass area has no obstacles.
Efficiency gate termination is the most reliable catch for both circles
and open-grass driving (straight-line grass = high efficiency, but
active_node progress terminator catches that case).
This commit is contained in:
Paul Huliganga 2026-04-19 17:26:38 -04:00
parent 813f888502
commit 9ffe1c5d40
1 changed files with 27 additions and 15 deletions

View File

@ -96,7 +96,8 @@ class SpeedRewardWrapper(gym.Wrapper):
min_lap_time: float = 5.0,
max_cte_terminate: float = 4.0,
cte_patience: int = 20,
progress_patience: int = 60, # ~3.3s at 18 steps/sec
progress_patience: int = 60,
efficiency_patience: int = 20, # steps of low efficiency before termination
):
super().__init__(env)
self.speed_scale = speed_scale
@ -107,11 +108,13 @@ class SpeedRewardWrapper(gym.Wrapper):
self.max_cte_terminate = max_cte_terminate
self.cte_patience = cte_patience
self.progress_patience = progress_patience
self.efficiency_patience = efficiency_patience
self._pos_history = deque(maxlen=window_size + 1)
self._last_lap_count = 0
self._high_cte_steps = 0
self._max_node_seen = -1 # highest active_node reached this episode
self._no_progress_steps = 0 # steps since max_node last increased
self._max_node_seen = -1
self._no_progress_steps = 0
self._low_eff_steps = 0
def reset(self, **kwargs):
result = self.env.reset(**kwargs)
@ -120,6 +123,7 @@ class SpeedRewardWrapper(gym.Wrapper):
self._high_cte_steps = 0
self._max_node_seen = -1
self._no_progress_steps = 0
self._low_eff_steps = 0
return result
def step(self, action):
@ -232,9 +236,17 @@ class SpeedRewardWrapper(gym.Wrapper):
return penalty, True
# --- Efficiency gate: detect circular driving ---
# Count consecutive steps of low efficiency. After patience steps, terminate.
# Previously this just returned 0 reward (no termination) which let circles
# run for 20+ seconds. Now we terminate after ~20 steps (~0.7s).
efficiency = self._compute_efficiency()
if efficiency < self.min_efficiency:
return 0.0, False
self._low_eff_steps += 1
if self._low_eff_steps >= self.efficiency_patience:
return -1.0, True # circle too long — terminate
return 0.0, False # still accumulating — zero reward
else:
self._low_eff_steps = 0
# --- CTE quality ---
cte_quality = 1.0 - min(abs(cte) / self.max_cte, 1.0)