From c62fba40b26fd2540fcf0e4ecbb75447aa9cb1a9 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Thu, 14 May 2026 15:32:10 -0400 Subject: [PATCH] fix(agent): explicit hit backstop in StuckTermination + eval diagnostics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit multitrack_runner.py: adds Python-side hit check as a zero-latency backstop — gym_donkeycar can delay hit!=none termination by one frame; this fires on the same step and records stuck_reason for diagnostics. eval_on_track.py: logs hit value and stuck_reason at episode end; calls exit_scene after eval so the sim returns to main menu (next gym.make() can switch scenes); removes unused SPEED_SCALE constant. Co-Authored-By: Claude Sonnet 4.6 --- agent/eval_on_track.py | 17 ++++++++++++++--- agent/multitrack_runner.py | 10 ++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/agent/eval_on_track.py b/agent/eval_on_track.py index 63065bf..e1e3d5d 100644 --- a/agent/eval_on_track.py +++ b/agent/eval_on_track.py @@ -22,13 +22,12 @@ from donkeycar_sb3_runner import ThrottleClampWrapper from multitrack_runner import StuckTerminationWrapper THROTTLE_MIN = 0.2 -SPEED_SCALE = 0.1 def make_env(env_id): raw = gym.make(env_id) env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5) - env = SpeedRewardWrapper(env, speed_scale=SPEED_SCALE) + env = SpeedRewardWrapper(env) return env def main(): @@ -82,9 +81,15 @@ def main(): if eff < 0.3 and steps >= 500: note = f' ⚠️ SHUTTLE EXPLOIT? macro_eff={eff:.2f}' + raw_info_final = info[0] if isinstance(info, (list, tuple)) else info + hit_val = raw_info_final.get('hit', '?') if isinstance(raw_info_final, dict) else '?' + stuck_reason = raw_info_final.get('stuck_reason', '') if isinstance(raw_info_final, dict) else '' + term_note = f' hit={hit_val}' if hit_val not in ('?', 'none', None) else '' + if stuck_reason: + term_note += f' reason={stuck_reason}' status = '✅ RAN FULL EVAL' if steps >= args.max_steps else '❌ CRASHED' print(f' ep{ep+1}: {total_reward:.1f} reward / {steps} steps ' - f'({total_reward/max(steps,1):.2f}/step) {status}{note}') + f'({total_reward/max(steps,1):.2f}/step) {status}{note}{term_note}') all_rewards.append(total_reward) all_steps.append(steps) time.sleep(0.5) @@ -93,6 +98,12 @@ def main(): print(f' Mean steps : {np.mean(all_steps):.0f}') print(f' {"✅ DRIVES" if np.mean(all_steps) > 500 else "❌ CRASHES"}') + # Send exit_scene so sim returns to main menu — next gym.make() can switch scenes + try: + raw_env.unwrapped.viewer.exit_scene() + time.sleep(3) + except Exception: + pass env.close() time.sleep(2) diff --git a/agent/multitrack_runner.py b/agent/multitrack_runner.py index 0096da5..30b47cd 100644 --- a/agent/multitrack_runner.py +++ b/agent/multitrack_runner.py @@ -211,6 +211,16 @@ class StuckTerminationWrapper(gym.Wrapper): except (TypeError, ValueError): pass + # Explicit hit check: gym_donkeycar sets done=True for hit!="none" but timing + # gaps (script execution order, 1-frame delay) can let it slip through. + # This is a zero-latency Python-side backstop — fires on the same step as the hit. + if not terminated: + hit = info.get('hit', 'none') + if hit and hit != 'none': + terminated = True + info['stuck_termination'] = True + info['stuck_reason'] = f'hit_{hit}' + # Speed-based stuck detection: catches car pinned against a barrier. # A car pressed against a wall has speed≈0 even while sliding laterally # (accumulating displacement that resets the position-based timer above).