fix: efficiency gate now TERMINATES after 20 low-efficiency steps (was zero-reward only)
Previously circles ran 20+ seconds because the efficiency gate only returned 0 reward without terminating. After 20 consecutive steps of efficiency < 0.15 (~0.7 seconds at 27 steps/sec), episode now terminates with -1.0. Also confirmed from telemetry diagnostic: CTE does report correctly when car goes off-track (rises steadily to 6.2m before tree collision). The grass exploit runs long only when the open grass area has no obstacles. Efficiency gate termination is the most reliable catch for both circles and open-grass driving (straight-line grass = high efficiency, but active_node progress terminator catches that case).
This commit is contained in:
parent
813f888502
commit
9ffe1c5d40
|
|
@ -96,7 +96,8 @@ class SpeedRewardWrapper(gym.Wrapper):
|
|||
min_lap_time: float = 5.0,
|
||||
max_cte_terminate: float = 4.0,
|
||||
cte_patience: int = 20,
|
||||
progress_patience: int = 60, # ~3.3s at 18 steps/sec
|
||||
progress_patience: int = 60,
|
||||
efficiency_patience: int = 20, # steps of low efficiency before termination
|
||||
):
|
||||
super().__init__(env)
|
||||
self.speed_scale = speed_scale
|
||||
|
|
@ -107,11 +108,13 @@ class SpeedRewardWrapper(gym.Wrapper):
|
|||
self.max_cte_terminate = max_cte_terminate
|
||||
self.cte_patience = cte_patience
|
||||
self.progress_patience = progress_patience
|
||||
self.efficiency_patience = efficiency_patience
|
||||
self._pos_history = deque(maxlen=window_size + 1)
|
||||
self._last_lap_count = 0
|
||||
self._high_cte_steps = 0
|
||||
self._max_node_seen = -1 # highest active_node reached this episode
|
||||
self._no_progress_steps = 0 # steps since max_node last increased
|
||||
self._max_node_seen = -1
|
||||
self._no_progress_steps = 0
|
||||
self._low_eff_steps = 0
|
||||
|
||||
def reset(self, **kwargs):
|
||||
result = self.env.reset(**kwargs)
|
||||
|
|
@ -120,6 +123,7 @@ class SpeedRewardWrapper(gym.Wrapper):
|
|||
self._high_cte_steps = 0
|
||||
self._max_node_seen = -1
|
||||
self._no_progress_steps = 0
|
||||
self._low_eff_steps = 0
|
||||
return result
|
||||
|
||||
def step(self, action):
|
||||
|
|
@ -232,9 +236,17 @@ class SpeedRewardWrapper(gym.Wrapper):
|
|||
return penalty, True
|
||||
|
||||
# --- Efficiency gate: detect circular driving ---
|
||||
# Count consecutive steps of low efficiency. After patience steps, terminate.
|
||||
# Previously this just returned 0 reward (no termination) which let circles
|
||||
# run for 20+ seconds. Now we terminate after ~20 steps (~0.7s).
|
||||
efficiency = self._compute_efficiency()
|
||||
if efficiency < self.min_efficiency:
|
||||
return 0.0, False
|
||||
self._low_eff_steps += 1
|
||||
if self._low_eff_steps >= self.efficiency_patience:
|
||||
return -1.0, True # circle too long — terminate
|
||||
return 0.0, False # still accumulating — zero reward
|
||||
else:
|
||||
self._low_eff_steps = 0
|
||||
|
||||
# --- CTE quality ---
|
||||
cte_quality = 1.0 - min(abs(cte) / self.max_cte, 1.0)
|
||||
|
|
|
|||
Loading…
Reference in New Issue