feat: eval_on_track.py — proper zero-shot eval on any track

The goal is a model that generalises to ANY road-surface track, not
specifically mini_monaco.  mini_monaco (tight barriers, hairpins) was
a bad proxy for this.  Generated_road is a much better zero-shot test:
same visual category, never seen during Wave 4 training.

eval_on_track.py lets us run the Wave 4 champion on any track with
the same wrappers used during training, plus shuttle-exploit detection.

Run after Trial 25 finishes:
  python3 agent/eval_on_track.py     --model agent/models/wave4-champion/model.zip     --track donkey-generated-roads-v0     --episodes 3 --max-steps 3000

Agent: pi
Tests: 102 passed
Tests-Added: 0
TypeScript: N/A
This commit is contained in:
Paul Huliganga 2026-04-16 19:47:56 -04:00
parent a5577fb3e7
commit a3a49fbcaf
3 changed files with 141 additions and 0 deletions

95
agent/eval_on_track.py Normal file
View File

@ -0,0 +1,95 @@
"""
eval_on_track.py Evaluate a saved model on any track, zero-shot.
Usage:
python3 eval_on_track.py --model models/wave4-champion/model.zip \
--track donkey-generated-roads-v0 \
--episodes 3 --max-steps 3000
This is the proper zero-shot evaluation: load a trained model, connect
to a track it has never seen, run N episodes, report reward and steps.
"""
import argparse, os, sys, time
import numpy as np
sys.path.insert(0, os.path.dirname(__file__))
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
import gymnasium as gym
from reward_wrapper import SpeedRewardWrapper
from donkeycar_sb3_runner import ThrottleClampWrapper
from multitrack_runner import StuckTerminationWrapper
THROTTLE_MIN = 0.2
SPEED_SCALE = 0.1
def make_env(env_id):
raw = gym.make(env_id)
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
env = SpeedRewardWrapper(env, speed_scale=SPEED_SCALE)
return env
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--model', required=True, help='Path to model.zip')
parser.add_argument('--track', default='donkey-generated-roads-v0')
parser.add_argument('--episodes', type=int, default=3)
parser.add_argument('--max-steps', type=int, default=3000)
args = parser.parse_args()
print(f'\n=== Zero-Shot Eval ===')
print(f'Model : {args.model}')
print(f'Track : {args.track}')
print(f'Episodes: {args.episodes} x max {args.max_steps} steps\n')
raw_env = make_env(args.track)
env = VecTransposeImage(DummyVecEnv([lambda: raw_env]))
model = PPO.load(args.model, env=env, device='auto')
all_rewards, all_steps = [], []
for ep in range(args.episodes):
obs, _ = env.reset()
total_reward, steps, done = 0.0, 0, False
pos_samples = []
while not done and steps < args.max_steps:
action, _ = model.predict(obs, deterministic=True)
obs, reward, terminated, truncated, info = env.step(action)
total_reward += float(reward[0])
steps += 1
done = bool(terminated[0] or truncated[0])
if steps % 100 == 0:
raw_info = info[0] if isinstance(info, (list,tuple)) else info
pos = raw_info.get('pos') if isinstance(raw_info, dict) else None
if pos is not None:
pos_samples.append(np.array(list(pos)[:3]))
# Shuttle detection
note = ''
if len(pos_samples) >= 3:
net = np.linalg.norm(pos_samples[-1] - pos_samples[0])
tot = sum(np.linalg.norm(pos_samples[i+1]-pos_samples[i])
for i in range(len(pos_samples)-1))
eff = net/tot if tot > 0.1 else 1.0
if eff < 0.3 and steps >= 500:
note = f' ⚠️ SHUTTLE EXPLOIT? macro_eff={eff:.2f}'
status = '✅ RAN FULL EVAL' if steps >= args.max_steps else '❌ CRASHED'
print(f' ep{ep+1}: {total_reward:.1f} reward / {steps} steps '
f'({total_reward/max(steps,1):.2f}/step) {status}{note}')
all_rewards.append(total_reward)
all_steps.append(steps)
time.sleep(0.5)
print(f'\n Mean reward : {np.mean(all_rewards):.1f}')
print(f' Mean steps : {np.mean(all_steps):.0f}')
print(f' {"✅ DRIVES" if np.mean(all_steps) > 500 else "❌ CRASHES"}')
env.close()
time.sleep(2)
if __name__ == '__main__':
main()

View File

@ -766,3 +766,47 @@
[2026-04-16 17:13:24] [Wave4] Proposed params: {'learning_rate': 0.0003194840316570061, 'steps_per_switch': 4507, 'total_timesteps': 66965}
[2026-04-16 17:13:26] [Wave4] Launching trial 23: {'learning_rate': 0.0003194840316570061, 'steps_per_switch': 4507, 'total_timesteps': 66965}
[2026-04-16 17:13:26] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 66965 --steps-per-switch 4507 --learning-rate 0.0003194840316570061 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0023
[2026-04-16 18:26:11] [Wave4] Trial 23 finished in 4365.0s, rc=0
[2026-04-16 18:26:11] [Wave4] Parsed: combined=150.773 mini_monaco=150.773
[2026-04-16 18:26:11] [Wave4] ===== Trial 23 Summary =====
[2026-04-16 18:26:11] GP data points : 23
[2026-04-16 18:26:11] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
[2026-04-16 18:26:11] Top 5:
[2026-04-16 18:26:11] score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000}
[2026-04-16 18:26:11] score=1573.07 params={'learning_rate': 0.00033896311195395496, 'steps_per_switch': 5448, 'total_timesteps': 69359}
[2026-04-16 18:26:11] score=1435.04 params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893}
[2026-04-16 18:26:11] score=230.98 params={'learning_rate': 0.0006672844816013197, 'steps_per_switch': 4747, 'total_timesteps': 64179}
[2026-04-16 18:26:11] score=193.23 params={'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912}
[2026-04-16 18:26:13]
[Wave4] ========== Trial 24/25 ==========
[2026-04-16 18:26:13] [Wave4] GP UCB top-5 proposals:
[2026-04-16 18:26:13] UCB=7.5247 mu=6.2951 σ=0.6148 params={'learning_rate': 0.0003596012644521025, 'steps_per_switch': 8013, 'total_timesteps': 60054}
[2026-04-16 18:26:13] UCB=6.0685 mu=4.6817 σ=0.6934 params={'learning_rate': 0.0001643952288780874, 'steps_per_switch': 9023, 'total_timesteps': 61369}
[2026-04-16 18:26:13] UCB=5.8942 mu=5.3342 σ=0.2800 params={'learning_rate': 0.00036103199241236257, 'steps_per_switch': 6908, 'total_timesteps': 64716}
[2026-04-16 18:26:13] UCB=5.6180 mu=4.5299 σ=0.5441 params={'learning_rate': 0.0006214434613010804, 'steps_per_switch': 7433, 'total_timesteps': 61101}
[2026-04-16 18:26:13] UCB=5.2906 mu=4.8505 σ=0.2200 params={'learning_rate': 0.000363303441736895, 'steps_per_switch': 6643, 'total_timesteps': 65509}
[2026-04-16 18:26:13] [Wave4] Proposed params: {'learning_rate': 0.0003596012644521025, 'steps_per_switch': 8013, 'total_timesteps': 60054}
[2026-04-16 18:26:15] [Wave4] Launching trial 24: {'learning_rate': 0.0003596012644521025, 'steps_per_switch': 8013, 'total_timesteps': 60054}
[2026-04-16 18:26:15] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 60054 --steps-per-switch 8013 --learning-rate 0.0003596012644521025 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0024
[2026-04-16 19:09:39] [Wave4] Trial 24 finished in 2604.0s, rc=0
[2026-04-16 19:09:39] [Wave4] Parsed: combined=37.824 mini_monaco=37.824
[2026-04-16 19:09:39] [Wave4] ===== Trial 24 Summary =====
[2026-04-16 19:09:39] GP data points : 24
[2026-04-16 19:09:39] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
[2026-04-16 19:09:39] Top 5:
[2026-04-16 19:09:39] score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000}
[2026-04-16 19:09:39] score=1573.07 params={'learning_rate': 0.00033896311195395496, 'steps_per_switch': 5448, 'total_timesteps': 69359}
[2026-04-16 19:09:39] score=1435.04 params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893}
[2026-04-16 19:09:39] score=230.98 params={'learning_rate': 0.0006672844816013197, 'steps_per_switch': 4747, 'total_timesteps': 64179}
[2026-04-16 19:09:39] score=193.23 params={'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912}
[2026-04-16 19:09:41]
[Wave4] ========== Trial 25/25 ==========
[2026-04-16 19:09:41] [Wave4] GP UCB top-5 proposals:
[2026-04-16 19:09:41] UCB=2.8514 mu=2.5523 σ=0.1495 params={'learning_rate': 0.0003128257557719074, 'steps_per_switch': 6836, 'total_timesteps': 62683}
[2026-04-16 19:09:41] UCB=2.6831 mu=2.3606 σ=0.1612 params={'learning_rate': 0.0004069124594262667, 'steps_per_switch': 6035, 'total_timesteps': 62228}
[2026-04-16 19:09:41] UCB=2.5561 mu=2.1065 σ=0.2248 params={'learning_rate': 0.00011678403675177319, 'steps_per_switch': 6295, 'total_timesteps': 60168}
[2026-04-16 19:09:41] UCB=2.4849 mu=2.1243 σ=0.1803 params={'learning_rate': 0.0002227611562953955, 'steps_per_switch': 6484, 'total_timesteps': 60292}
[2026-04-16 19:09:41] UCB=2.3964 mu=2.0203 σ=0.1880 params={'learning_rate': 0.0004890619502018291, 'steps_per_switch': 6956, 'total_timesteps': 63892}
[2026-04-16 19:09:41] [Wave4] Proposed params: {'learning_rate': 0.0003128257557719074, 'steps_per_switch': 6836, 'total_timesteps': 62683}
[2026-04-16 19:09:43] [Wave4] Launching trial 25: {'learning_rate': 0.0003128257557719074, 'steps_per_switch': 6836, 'total_timesteps': 62683}
[2026-04-16 19:09:43] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 62683 --steps-per-switch 6836 --learning-rate 0.0003128257557719074 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0025

View File

@ -20,3 +20,5 @@
{"trial": 20, "timestamp": "2026-04-16T14:10:06.493399", "params": {"learning_rate": 0.0007104769870597334, "steps_per_switch": 5822, "total_timesteps": 73160}, "combined_test_score": 20.9587, "mini_monaco_reward": 20.9587, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0020/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 3578.8967142105103}
{"trial": 21, "timestamp": "2026-04-16T15:41:17.902221", "params": {"learning_rate": 0.0006240263317913531, "steps_per_switch": 6248, "total_timesteps": 81430}, "combined_test_score": 86.1012, "mini_monaco_reward": 86.1012, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0021/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5465.586006164551}
{"trial": 22, "timestamp": "2026-04-16T17:13:22.634461", "params": {"learning_rate": 0.0010680884757571496, "steps_per_switch": 3269, "total_timesteps": 81912}, "combined_test_score": 193.2341, "mini_monaco_reward": 193.2341, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0022/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5520.664301395416}
{"trial": 23, "timestamp": "2026-04-16T18:26:11.713980", "params": {"learning_rate": 0.0003194840316570061, "steps_per_switch": 4507, "total_timesteps": 66965}, "combined_test_score": 150.773, "mini_monaco_reward": 150.773, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0023/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4365.0226583480835}
{"trial": 24, "timestamp": "2026-04-16T19:09:39.779549", "params": {"learning_rate": 0.0003596012644521025, "steps_per_switch": 8013, "total_timesteps": 60054}, "combined_test_score": 37.824, "mini_monaco_reward": 37.824, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0024/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 2604.015403985977}