fix(agent): explicit hit backstop in StuckTermination + eval diagnostics

multitrack_runner.py: adds Python-side hit check as a zero-latency backstop — gym_donkeycar can delay hit!=none termination by one frame; this fires on the same step and records stuck_reason for diagnostics. eval_on_track.py: logs hit value and stuck_reason at episode end; calls exit_scene after eval so the sim returns to main menu (next gym.make() can switch scenes); removes unused SPEED_SCALE constant. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-14 15:32:10 -04:00 · 2026-05-14 15:32:10 -04:00 · c62fba40b2
parent 3c2b678771
commit c62fba40b2
2 changed files with 24 additions and 3 deletions
--- a/agent/eval_on_track.py
+++ b/agent/eval_on_track.py
@ -22,13 +22,12 @@ from donkeycar_sb3_runner import ThrottleClampWrapper
 from multitrack_runner import StuckTerminationWrapper
 THROTTLE_MIN = 0.2
 SPEED_SCALE  = 0.1
 def make_env(env_id):
    raw = gym.make(env_id)
    env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
    env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
-    env = SpeedRewardWrapper(env, speed_scale=SPEED_SCALE)
+    env = SpeedRewardWrapper(env)
    return env
 def main():
@ -82,9 +81,15 @@ def main():
            if eff < 0.3 and steps >= 500:
                note = f'  ⚠️  SHUTTLE EXPLOIT? macro_eff={eff:.2f}'
        raw_info_final = info[0] if isinstance(info, (list, tuple)) else info
        hit_val = raw_info_final.get('hit', '?') if isinstance(raw_info_final, dict) else '?'
        stuck_reason = raw_info_final.get('stuck_reason', '') if isinstance(raw_info_final, dict) else ''
        term_note = f'  hit={hit_val}' if hit_val not in ('?', 'none', None) else ''
        if stuck_reason:
            term_note += f'  reason={stuck_reason}'
        status = '✅ RAN FULL EVAL' if steps >= args.max_steps else '❌ CRASHED'
        print(f'  ep{ep+1}: {total_reward:.1f} reward / {steps} steps '
-              f'({total_reward/max(steps,1):.2f}/step)  {status}{note}')
+              f'({total_reward/max(steps,1):.2f}/step)  {status}{note}{term_note}')
        all_rewards.append(total_reward)
        all_steps.append(steps)
        time.sleep(0.5)
@ -93,6 +98,12 @@ def main():
    print(f'  Mean steps  : {np.mean(all_steps):.0f}')
    print(f'  {"✅ DRIVES" if np.mean(all_steps) > 500 else "❌ CRASHES"}')
    # Send exit_scene so sim returns to main menu — next gym.make() can switch scenes
    try:
        raw_env.unwrapped.viewer.exit_scene()
        time.sleep(3)
    except Exception:
        pass
    env.close()
    time.sleep(2)
--- a/agent/multitrack_runner.py
+++ b/agent/multitrack_runner.py
@ -211,6 +211,16 @@ class StuckTerminationWrapper(gym.Wrapper):
            except (TypeError, ValueError):
                pass
        # Explicit hit check: gym_donkeycar sets done=True for hit!="none" but timing
        # gaps (script execution order, 1-frame delay) can let it slip through.
        # This is a zero-latency Python-side backstop — fires on the same step as the hit.
        if not terminated:
            hit = info.get('hit', 'none')
            if hit and hit != 'none':
                terminated = True
                info['stuck_termination'] = True
                info['stuck_reason'] = f'hit_{hit}'
        # Speed-based stuck detection: catches car pinned against a barrier.
        # A car pressed against a wall has speed≈0 even while sliding laterally
        # (accumulating displacement that resets the position-based timer above).