feat: eval_on_track.py — proper zero-shot eval on any track
The goal is a model that generalises to ANY road-surface track, not specifically mini_monaco. mini_monaco (tight barriers, hairpins) was a bad proxy for this. Generated_road is a much better zero-shot test: same visual category, never seen during Wave 4 training. eval_on_track.py lets us run the Wave 4 champion on any track with the same wrappers used during training, plus shuttle-exploit detection. Run after Trial 25 finishes: python3 agent/eval_on_track.py --model agent/models/wave4-champion/model.zip --track donkey-generated-roads-v0 --episodes 3 --max-steps 3000 Agent: pi Tests: 102 passed Tests-Added: 0 TypeScript: N/A
This commit is contained in:
parent
a5577fb3e7
commit
a3a49fbcaf
|
|
@ -0,0 +1,95 @@
|
|||
"""
|
||||
eval_on_track.py — Evaluate a saved model on any track, zero-shot.
|
||||
|
||||
Usage:
|
||||
python3 eval_on_track.py --model models/wave4-champion/model.zip \
|
||||
--track donkey-generated-roads-v0 \
|
||||
--episodes 3 --max-steps 3000
|
||||
|
||||
This is the proper zero-shot evaluation: load a trained model, connect
|
||||
to a track it has never seen, run N episodes, report reward and steps.
|
||||
"""
|
||||
import argparse, os, sys, time
|
||||
import numpy as np
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
||||
import gymnasium as gym
|
||||
|
||||
from reward_wrapper import SpeedRewardWrapper
|
||||
from donkeycar_sb3_runner import ThrottleClampWrapper
|
||||
from multitrack_runner import StuckTerminationWrapper
|
||||
|
||||
THROTTLE_MIN = 0.2
|
||||
SPEED_SCALE = 0.1
|
||||
|
||||
def make_env(env_id):
|
||||
raw = gym.make(env_id)
|
||||
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
||||
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
||||
env = SpeedRewardWrapper(env, speed_scale=SPEED_SCALE)
|
||||
return env
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model', required=True, help='Path to model.zip')
|
||||
parser.add_argument('--track', default='donkey-generated-roads-v0')
|
||||
parser.add_argument('--episodes', type=int, default=3)
|
||||
parser.add_argument('--max-steps', type=int, default=3000)
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f'\n=== Zero-Shot Eval ===')
|
||||
print(f'Model : {args.model}')
|
||||
print(f'Track : {args.track}')
|
||||
print(f'Episodes: {args.episodes} x max {args.max_steps} steps\n')
|
||||
|
||||
raw_env = make_env(args.track)
|
||||
env = VecTransposeImage(DummyVecEnv([lambda: raw_env]))
|
||||
|
||||
model = PPO.load(args.model, env=env, device='auto')
|
||||
|
||||
all_rewards, all_steps = [], []
|
||||
for ep in range(args.episodes):
|
||||
obs, _ = env.reset()
|
||||
total_reward, steps, done = 0.0, 0, False
|
||||
pos_samples = []
|
||||
|
||||
while not done and steps < args.max_steps:
|
||||
action, _ = model.predict(obs, deterministic=True)
|
||||
obs, reward, terminated, truncated, info = env.step(action)
|
||||
total_reward += float(reward[0])
|
||||
steps += 1
|
||||
done = bool(terminated[0] or truncated[0])
|
||||
if steps % 100 == 0:
|
||||
raw_info = info[0] if isinstance(info, (list,tuple)) else info
|
||||
pos = raw_info.get('pos') if isinstance(raw_info, dict) else None
|
||||
if pos is not None:
|
||||
pos_samples.append(np.array(list(pos)[:3]))
|
||||
|
||||
# Shuttle detection
|
||||
note = ''
|
||||
if len(pos_samples) >= 3:
|
||||
net = np.linalg.norm(pos_samples[-1] - pos_samples[0])
|
||||
tot = sum(np.linalg.norm(pos_samples[i+1]-pos_samples[i])
|
||||
for i in range(len(pos_samples)-1))
|
||||
eff = net/tot if tot > 0.1 else 1.0
|
||||
if eff < 0.3 and steps >= 500:
|
||||
note = f' ⚠️ SHUTTLE EXPLOIT? macro_eff={eff:.2f}'
|
||||
|
||||
status = '✅ RAN FULL EVAL' if steps >= args.max_steps else '❌ CRASHED'
|
||||
print(f' ep{ep+1}: {total_reward:.1f} reward / {steps} steps '
|
||||
f'({total_reward/max(steps,1):.2f}/step) {status}{note}')
|
||||
all_rewards.append(total_reward)
|
||||
all_steps.append(steps)
|
||||
time.sleep(0.5)
|
||||
|
||||
print(f'\n Mean reward : {np.mean(all_rewards):.1f}')
|
||||
print(f' Mean steps : {np.mean(all_steps):.0f}')
|
||||
print(f' {"✅ DRIVES" if np.mean(all_steps) > 500 else "❌ CRASHES"}')
|
||||
|
||||
env.close()
|
||||
time.sleep(2)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -766,3 +766,47 @@
|
|||
[2026-04-16 17:13:24] [Wave4] Proposed params: {'learning_rate': 0.0003194840316570061, 'steps_per_switch': 4507, 'total_timesteps': 66965}
|
||||
[2026-04-16 17:13:26] [Wave4] Launching trial 23: {'learning_rate': 0.0003194840316570061, 'steps_per_switch': 4507, 'total_timesteps': 66965}
|
||||
[2026-04-16 17:13:26] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 66965 --steps-per-switch 4507 --learning-rate 0.0003194840316570061 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0023
|
||||
[2026-04-16 18:26:11] [Wave4] Trial 23 finished in 4365.0s, rc=0
|
||||
[2026-04-16 18:26:11] [Wave4] Parsed: combined=150.773 mini_monaco=150.773
|
||||
[2026-04-16 18:26:11] [Wave4] ===== Trial 23 Summary =====
|
||||
[2026-04-16 18:26:11] GP data points : 23
|
||||
[2026-04-16 18:26:11] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
|
||||
[2026-04-16 18:26:11] Top 5:
|
||||
[2026-04-16 18:26:11] score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000}
|
||||
[2026-04-16 18:26:11] score=1573.07 params={'learning_rate': 0.00033896311195395496, 'steps_per_switch': 5448, 'total_timesteps': 69359}
|
||||
[2026-04-16 18:26:11] score=1435.04 params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893}
|
||||
[2026-04-16 18:26:11] score=230.98 params={'learning_rate': 0.0006672844816013197, 'steps_per_switch': 4747, 'total_timesteps': 64179}
|
||||
[2026-04-16 18:26:11] score=193.23 params={'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912}
|
||||
[2026-04-16 18:26:13]
|
||||
[Wave4] ========== Trial 24/25 ==========
|
||||
[2026-04-16 18:26:13] [Wave4] GP UCB top-5 proposals:
|
||||
[2026-04-16 18:26:13] UCB=7.5247 mu=6.2951 σ=0.6148 params={'learning_rate': 0.0003596012644521025, 'steps_per_switch': 8013, 'total_timesteps': 60054}
|
||||
[2026-04-16 18:26:13] UCB=6.0685 mu=4.6817 σ=0.6934 params={'learning_rate': 0.0001643952288780874, 'steps_per_switch': 9023, 'total_timesteps': 61369}
|
||||
[2026-04-16 18:26:13] UCB=5.8942 mu=5.3342 σ=0.2800 params={'learning_rate': 0.00036103199241236257, 'steps_per_switch': 6908, 'total_timesteps': 64716}
|
||||
[2026-04-16 18:26:13] UCB=5.6180 mu=4.5299 σ=0.5441 params={'learning_rate': 0.0006214434613010804, 'steps_per_switch': 7433, 'total_timesteps': 61101}
|
||||
[2026-04-16 18:26:13] UCB=5.2906 mu=4.8505 σ=0.2200 params={'learning_rate': 0.000363303441736895, 'steps_per_switch': 6643, 'total_timesteps': 65509}
|
||||
[2026-04-16 18:26:13] [Wave4] Proposed params: {'learning_rate': 0.0003596012644521025, 'steps_per_switch': 8013, 'total_timesteps': 60054}
|
||||
[2026-04-16 18:26:15] [Wave4] Launching trial 24: {'learning_rate': 0.0003596012644521025, 'steps_per_switch': 8013, 'total_timesteps': 60054}
|
||||
[2026-04-16 18:26:15] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 60054 --steps-per-switch 8013 --learning-rate 0.0003596012644521025 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0024
|
||||
[2026-04-16 19:09:39] [Wave4] Trial 24 finished in 2604.0s, rc=0
|
||||
[2026-04-16 19:09:39] [Wave4] Parsed: combined=37.824 mini_monaco=37.824
|
||||
[2026-04-16 19:09:39] [Wave4] ===== Trial 24 Summary =====
|
||||
[2026-04-16 19:09:39] GP data points : 24
|
||||
[2026-04-16 19:09:39] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
|
||||
[2026-04-16 19:09:39] Top 5:
|
||||
[2026-04-16 19:09:39] score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000}
|
||||
[2026-04-16 19:09:39] score=1573.07 params={'learning_rate': 0.00033896311195395496, 'steps_per_switch': 5448, 'total_timesteps': 69359}
|
||||
[2026-04-16 19:09:39] score=1435.04 params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893}
|
||||
[2026-04-16 19:09:39] score=230.98 params={'learning_rate': 0.0006672844816013197, 'steps_per_switch': 4747, 'total_timesteps': 64179}
|
||||
[2026-04-16 19:09:39] score=193.23 params={'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912}
|
||||
[2026-04-16 19:09:41]
|
||||
[Wave4] ========== Trial 25/25 ==========
|
||||
[2026-04-16 19:09:41] [Wave4] GP UCB top-5 proposals:
|
||||
[2026-04-16 19:09:41] UCB=2.8514 mu=2.5523 σ=0.1495 params={'learning_rate': 0.0003128257557719074, 'steps_per_switch': 6836, 'total_timesteps': 62683}
|
||||
[2026-04-16 19:09:41] UCB=2.6831 mu=2.3606 σ=0.1612 params={'learning_rate': 0.0004069124594262667, 'steps_per_switch': 6035, 'total_timesteps': 62228}
|
||||
[2026-04-16 19:09:41] UCB=2.5561 mu=2.1065 σ=0.2248 params={'learning_rate': 0.00011678403675177319, 'steps_per_switch': 6295, 'total_timesteps': 60168}
|
||||
[2026-04-16 19:09:41] UCB=2.4849 mu=2.1243 σ=0.1803 params={'learning_rate': 0.0002227611562953955, 'steps_per_switch': 6484, 'total_timesteps': 60292}
|
||||
[2026-04-16 19:09:41] UCB=2.3964 mu=2.0203 σ=0.1880 params={'learning_rate': 0.0004890619502018291, 'steps_per_switch': 6956, 'total_timesteps': 63892}
|
||||
[2026-04-16 19:09:41] [Wave4] Proposed params: {'learning_rate': 0.0003128257557719074, 'steps_per_switch': 6836, 'total_timesteps': 62683}
|
||||
[2026-04-16 19:09:43] [Wave4] Launching trial 25: {'learning_rate': 0.0003128257557719074, 'steps_per_switch': 6836, 'total_timesteps': 62683}
|
||||
[2026-04-16 19:09:43] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 62683 --steps-per-switch 6836 --learning-rate 0.0003128257557719074 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0025
|
||||
|
|
|
|||
|
|
@ -20,3 +20,5 @@
|
|||
{"trial": 20, "timestamp": "2026-04-16T14:10:06.493399", "params": {"learning_rate": 0.0007104769870597334, "steps_per_switch": 5822, "total_timesteps": 73160}, "combined_test_score": 20.9587, "mini_monaco_reward": 20.9587, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0020/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 3578.8967142105103}
|
||||
{"trial": 21, "timestamp": "2026-04-16T15:41:17.902221", "params": {"learning_rate": 0.0006240263317913531, "steps_per_switch": 6248, "total_timesteps": 81430}, "combined_test_score": 86.1012, "mini_monaco_reward": 86.1012, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0021/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5465.586006164551}
|
||||
{"trial": 22, "timestamp": "2026-04-16T17:13:22.634461", "params": {"learning_rate": 0.0010680884757571496, "steps_per_switch": 3269, "total_timesteps": 81912}, "combined_test_score": 193.2341, "mini_monaco_reward": 193.2341, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0022/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5520.664301395416}
|
||||
{"trial": 23, "timestamp": "2026-04-16T18:26:11.713980", "params": {"learning_rate": 0.0003194840316570061, "steps_per_switch": 4507, "total_timesteps": 66965}, "combined_test_score": 150.773, "mini_monaco_reward": 150.773, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0023/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4365.0226583480835}
|
||||
{"trial": 24, "timestamp": "2026-04-16T19:09:39.779549", "params": {"learning_rate": 0.0003596012644521025, "steps_per_switch": 8013, "total_timesteps": 60054}, "combined_test_score": 37.824, "mini_monaco_reward": 37.824, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0024/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 2604.015403985977}
|
||||
|
|
|
|||
Loading…
Reference in New Issue