fix(agent): explicit hit backstop in StuckTermination + eval diagnostics
multitrack_runner.py: adds Python-side hit check as a zero-latency backstop — gym_donkeycar can delay hit!=none termination by one frame; this fires on the same step and records stuck_reason for diagnostics. eval_on_track.py: logs hit value and stuck_reason at episode end; calls exit_scene after eval so the sim returns to main menu (next gym.make() can switch scenes); removes unused SPEED_SCALE constant. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
3c2b678771
commit
c62fba40b2
|
|
@ -22,13 +22,12 @@ from donkeycar_sb3_runner import ThrottleClampWrapper
|
||||||
from multitrack_runner import StuckTerminationWrapper
|
from multitrack_runner import StuckTerminationWrapper
|
||||||
|
|
||||||
THROTTLE_MIN = 0.2
|
THROTTLE_MIN = 0.2
|
||||||
SPEED_SCALE = 0.1
|
|
||||||
|
|
||||||
def make_env(env_id):
|
def make_env(env_id):
|
||||||
raw = gym.make(env_id)
|
raw = gym.make(env_id)
|
||||||
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
||||||
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
||||||
env = SpeedRewardWrapper(env, speed_scale=SPEED_SCALE)
|
env = SpeedRewardWrapper(env)
|
||||||
return env
|
return env
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
@ -82,9 +81,15 @@ def main():
|
||||||
if eff < 0.3 and steps >= 500:
|
if eff < 0.3 and steps >= 500:
|
||||||
note = f' ⚠️ SHUTTLE EXPLOIT? macro_eff={eff:.2f}'
|
note = f' ⚠️ SHUTTLE EXPLOIT? macro_eff={eff:.2f}'
|
||||||
|
|
||||||
|
raw_info_final = info[0] if isinstance(info, (list, tuple)) else info
|
||||||
|
hit_val = raw_info_final.get('hit', '?') if isinstance(raw_info_final, dict) else '?'
|
||||||
|
stuck_reason = raw_info_final.get('stuck_reason', '') if isinstance(raw_info_final, dict) else ''
|
||||||
|
term_note = f' hit={hit_val}' if hit_val not in ('?', 'none', None) else ''
|
||||||
|
if stuck_reason:
|
||||||
|
term_note += f' reason={stuck_reason}'
|
||||||
status = '✅ RAN FULL EVAL' if steps >= args.max_steps else '❌ CRASHED'
|
status = '✅ RAN FULL EVAL' if steps >= args.max_steps else '❌ CRASHED'
|
||||||
print(f' ep{ep+1}: {total_reward:.1f} reward / {steps} steps '
|
print(f' ep{ep+1}: {total_reward:.1f} reward / {steps} steps '
|
||||||
f'({total_reward/max(steps,1):.2f}/step) {status}{note}')
|
f'({total_reward/max(steps,1):.2f}/step) {status}{note}{term_note}')
|
||||||
all_rewards.append(total_reward)
|
all_rewards.append(total_reward)
|
||||||
all_steps.append(steps)
|
all_steps.append(steps)
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
@ -93,6 +98,12 @@ def main():
|
||||||
print(f' Mean steps : {np.mean(all_steps):.0f}')
|
print(f' Mean steps : {np.mean(all_steps):.0f}')
|
||||||
print(f' {"✅ DRIVES" if np.mean(all_steps) > 500 else "❌ CRASHES"}')
|
print(f' {"✅ DRIVES" if np.mean(all_steps) > 500 else "❌ CRASHES"}')
|
||||||
|
|
||||||
|
# Send exit_scene so sim returns to main menu — next gym.make() can switch scenes
|
||||||
|
try:
|
||||||
|
raw_env.unwrapped.viewer.exit_scene()
|
||||||
|
time.sleep(3)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
env.close()
|
env.close()
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -211,6 +211,16 @@ class StuckTerminationWrapper(gym.Wrapper):
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Explicit hit check: gym_donkeycar sets done=True for hit!="none" but timing
|
||||||
|
# gaps (script execution order, 1-frame delay) can let it slip through.
|
||||||
|
# This is a zero-latency Python-side backstop — fires on the same step as the hit.
|
||||||
|
if not terminated:
|
||||||
|
hit = info.get('hit', 'none')
|
||||||
|
if hit and hit != 'none':
|
||||||
|
terminated = True
|
||||||
|
info['stuck_termination'] = True
|
||||||
|
info['stuck_reason'] = f'hit_{hit}'
|
||||||
|
|
||||||
# Speed-based stuck detection: catches car pinned against a barrier.
|
# Speed-based stuck detection: catches car pinned against a barrier.
|
||||||
# A car pressed against a wall has speed≈0 even while sliding laterally
|
# A car pressed against a wall has speed≈0 even while sliding laterally
|
||||||
# (accumulating displacement that resets the position-based timer above).
|
# (accumulating displacement that resets the position-based timer above).
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue