diff --git a/agent/eval_on_track.py b/agent/eval_on_track.py new file mode 100644 index 0000000..c377090 --- /dev/null +++ b/agent/eval_on_track.py @@ -0,0 +1,95 @@ +""" +eval_on_track.py — Evaluate a saved model on any track, zero-shot. + +Usage: + python3 eval_on_track.py --model models/wave4-champion/model.zip \ + --track donkey-generated-roads-v0 \ + --episodes 3 --max-steps 3000 + +This is the proper zero-shot evaluation: load a trained model, connect +to a track it has never seen, run N episodes, report reward and steps. +""" +import argparse, os, sys, time +import numpy as np + +sys.path.insert(0, os.path.dirname(__file__)) +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage +import gymnasium as gym + +from reward_wrapper import SpeedRewardWrapper +from donkeycar_sb3_runner import ThrottleClampWrapper +from multitrack_runner import StuckTerminationWrapper + +THROTTLE_MIN = 0.2 +SPEED_SCALE = 0.1 + +def make_env(env_id): + raw = gym.make(env_id) + env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) + env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5) + env = SpeedRewardWrapper(env, speed_scale=SPEED_SCALE) + return env + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--model', required=True, help='Path to model.zip') + parser.add_argument('--track', default='donkey-generated-roads-v0') + parser.add_argument('--episodes', type=int, default=3) + parser.add_argument('--max-steps', type=int, default=3000) + args = parser.parse_args() + + print(f'\n=== Zero-Shot Eval ===') + print(f'Model : {args.model}') + print(f'Track : {args.track}') + print(f'Episodes: {args.episodes} x max {args.max_steps} steps\n') + + raw_env = make_env(args.track) + env = VecTransposeImage(DummyVecEnv([lambda: raw_env])) + + model = PPO.load(args.model, env=env, device='auto') + + all_rewards, all_steps = [], [] + for ep in range(args.episodes): + obs, _ = env.reset() + total_reward, steps, done = 0.0, 0, False + pos_samples = [] + + while not done and steps < args.max_steps: + action, _ = model.predict(obs, deterministic=True) + obs, reward, terminated, truncated, info = env.step(action) + total_reward += float(reward[0]) + steps += 1 + done = bool(terminated[0] or truncated[0]) + if steps % 100 == 0: + raw_info = info[0] if isinstance(info, (list,tuple)) else info + pos = raw_info.get('pos') if isinstance(raw_info, dict) else None + if pos is not None: + pos_samples.append(np.array(list(pos)[:3])) + + # Shuttle detection + note = '' + if len(pos_samples) >= 3: + net = np.linalg.norm(pos_samples[-1] - pos_samples[0]) + tot = sum(np.linalg.norm(pos_samples[i+1]-pos_samples[i]) + for i in range(len(pos_samples)-1)) + eff = net/tot if tot > 0.1 else 1.0 + if eff < 0.3 and steps >= 500: + note = f' ⚠️ SHUTTLE EXPLOIT? macro_eff={eff:.2f}' + + status = '✅ RAN FULL EVAL' if steps >= args.max_steps else '❌ CRASHED' + print(f' ep{ep+1}: {total_reward:.1f} reward / {steps} steps ' + f'({total_reward/max(steps,1):.2f}/step) {status}{note}') + all_rewards.append(total_reward) + all_steps.append(steps) + time.sleep(0.5) + + print(f'\n Mean reward : {np.mean(all_rewards):.1f}') + print(f' Mean steps : {np.mean(all_steps):.0f}') + print(f' {"✅ DRIVES" if np.mean(all_steps) > 500 else "❌ CRASHES"}') + + env.close() + time.sleep(2) + +if __name__ == '__main__': + main() diff --git a/agent/outerloop-results/autoresearch_phase4_log.txt b/agent/outerloop-results/autoresearch_phase4_log.txt index 8850b45..2c94c20 100644 --- a/agent/outerloop-results/autoresearch_phase4_log.txt +++ b/agent/outerloop-results/autoresearch_phase4_log.txt @@ -766,3 +766,47 @@ [2026-04-16 17:13:24] [Wave4] Proposed params: {'learning_rate': 0.0003194840316570061, 'steps_per_switch': 4507, 'total_timesteps': 66965} [2026-04-16 17:13:26] [Wave4] Launching trial 23: {'learning_rate': 0.0003194840316570061, 'steps_per_switch': 4507, 'total_timesteps': 66965} [2026-04-16 17:13:26] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 66965 --steps-per-switch 4507 --learning-rate 0.0003194840316570061 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0023 +[2026-04-16 18:26:11] [Wave4] Trial 23 finished in 4365.0s, rc=0 +[2026-04-16 18:26:11] [Wave4] Parsed: combined=150.773 mini_monaco=150.773 +[2026-04-16 18:26:11] [Wave4] ===== Trial 23 Summary ===== +[2026-04-16 18:26:11] GP data points : 23 +[2026-04-16 18:26:11] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-16 18:26:11] Top 5: +[2026-04-16 18:26:11] score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000} +[2026-04-16 18:26:11] score=1573.07 params={'learning_rate': 0.00033896311195395496, 'steps_per_switch': 5448, 'total_timesteps': 69359} +[2026-04-16 18:26:11] score=1435.04 params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893} +[2026-04-16 18:26:11] score=230.98 params={'learning_rate': 0.0006672844816013197, 'steps_per_switch': 4747, 'total_timesteps': 64179} +[2026-04-16 18:26:11] score=193.23 params={'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912} +[2026-04-16 18:26:13] +[Wave4] ========== Trial 24/25 ========== +[2026-04-16 18:26:13] [Wave4] GP UCB top-5 proposals: +[2026-04-16 18:26:13] UCB=7.5247 mu=6.2951 σ=0.6148 params={'learning_rate': 0.0003596012644521025, 'steps_per_switch': 8013, 'total_timesteps': 60054} +[2026-04-16 18:26:13] UCB=6.0685 mu=4.6817 σ=0.6934 params={'learning_rate': 0.0001643952288780874, 'steps_per_switch': 9023, 'total_timesteps': 61369} +[2026-04-16 18:26:13] UCB=5.8942 mu=5.3342 σ=0.2800 params={'learning_rate': 0.00036103199241236257, 'steps_per_switch': 6908, 'total_timesteps': 64716} +[2026-04-16 18:26:13] UCB=5.6180 mu=4.5299 σ=0.5441 params={'learning_rate': 0.0006214434613010804, 'steps_per_switch': 7433, 'total_timesteps': 61101} +[2026-04-16 18:26:13] UCB=5.2906 mu=4.8505 σ=0.2200 params={'learning_rate': 0.000363303441736895, 'steps_per_switch': 6643, 'total_timesteps': 65509} +[2026-04-16 18:26:13] [Wave4] Proposed params: {'learning_rate': 0.0003596012644521025, 'steps_per_switch': 8013, 'total_timesteps': 60054} +[2026-04-16 18:26:15] [Wave4] Launching trial 24: {'learning_rate': 0.0003596012644521025, 'steps_per_switch': 8013, 'total_timesteps': 60054} +[2026-04-16 18:26:15] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 60054 --steps-per-switch 8013 --learning-rate 0.0003596012644521025 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0024 +[2026-04-16 19:09:39] [Wave4] Trial 24 finished in 2604.0s, rc=0 +[2026-04-16 19:09:39] [Wave4] Parsed: combined=37.824 mini_monaco=37.824 +[2026-04-16 19:09:39] [Wave4] ===== Trial 24 Summary ===== +[2026-04-16 19:09:39] GP data points : 24 +[2026-04-16 19:09:39] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-16 19:09:39] Top 5: +[2026-04-16 19:09:39] score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000} +[2026-04-16 19:09:39] score=1573.07 params={'learning_rate': 0.00033896311195395496, 'steps_per_switch': 5448, 'total_timesteps': 69359} +[2026-04-16 19:09:39] score=1435.04 params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893} +[2026-04-16 19:09:39] score=230.98 params={'learning_rate': 0.0006672844816013197, 'steps_per_switch': 4747, 'total_timesteps': 64179} +[2026-04-16 19:09:39] score=193.23 params={'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912} +[2026-04-16 19:09:41] +[Wave4] ========== Trial 25/25 ========== +[2026-04-16 19:09:41] [Wave4] GP UCB top-5 proposals: +[2026-04-16 19:09:41] UCB=2.8514 mu=2.5523 σ=0.1495 params={'learning_rate': 0.0003128257557719074, 'steps_per_switch': 6836, 'total_timesteps': 62683} +[2026-04-16 19:09:41] UCB=2.6831 mu=2.3606 σ=0.1612 params={'learning_rate': 0.0004069124594262667, 'steps_per_switch': 6035, 'total_timesteps': 62228} +[2026-04-16 19:09:41] UCB=2.5561 mu=2.1065 σ=0.2248 params={'learning_rate': 0.00011678403675177319, 'steps_per_switch': 6295, 'total_timesteps': 60168} +[2026-04-16 19:09:41] UCB=2.4849 mu=2.1243 σ=0.1803 params={'learning_rate': 0.0002227611562953955, 'steps_per_switch': 6484, 'total_timesteps': 60292} +[2026-04-16 19:09:41] UCB=2.3964 mu=2.0203 σ=0.1880 params={'learning_rate': 0.0004890619502018291, 'steps_per_switch': 6956, 'total_timesteps': 63892} +[2026-04-16 19:09:41] [Wave4] Proposed params: {'learning_rate': 0.0003128257557719074, 'steps_per_switch': 6836, 'total_timesteps': 62683} +[2026-04-16 19:09:43] [Wave4] Launching trial 25: {'learning_rate': 0.0003128257557719074, 'steps_per_switch': 6836, 'total_timesteps': 62683} +[2026-04-16 19:09:43] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 62683 --steps-per-switch 6836 --learning-rate 0.0003128257557719074 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0025 diff --git a/agent/outerloop-results/autoresearch_results_phase4.jsonl b/agent/outerloop-results/autoresearch_results_phase4.jsonl index 94359c7..6f94b2c 100644 --- a/agent/outerloop-results/autoresearch_results_phase4.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase4.jsonl @@ -20,3 +20,5 @@ {"trial": 20, "timestamp": "2026-04-16T14:10:06.493399", "params": {"learning_rate": 0.0007104769870597334, "steps_per_switch": 5822, "total_timesteps": 73160}, "combined_test_score": 20.9587, "mini_monaco_reward": 20.9587, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0020/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 3578.8967142105103} {"trial": 21, "timestamp": "2026-04-16T15:41:17.902221", "params": {"learning_rate": 0.0006240263317913531, "steps_per_switch": 6248, "total_timesteps": 81430}, "combined_test_score": 86.1012, "mini_monaco_reward": 86.1012, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0021/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5465.586006164551} {"trial": 22, "timestamp": "2026-04-16T17:13:22.634461", "params": {"learning_rate": 0.0010680884757571496, "steps_per_switch": 3269, "total_timesteps": 81912}, "combined_test_score": 193.2341, "mini_monaco_reward": 193.2341, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0022/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5520.664301395416} +{"trial": 23, "timestamp": "2026-04-16T18:26:11.713980", "params": {"learning_rate": 0.0003194840316570061, "steps_per_switch": 4507, "total_timesteps": 66965}, "combined_test_score": 150.773, "mini_monaco_reward": 150.773, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0023/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4365.0226583480835} +{"trial": 24, "timestamp": "2026-04-16T19:09:39.779549", "params": {"learning_rate": 0.0003596012644521025, "steps_per_switch": 8013, "total_timesteps": 60054}, "combined_test_score": 37.824, "mini_monaco_reward": 37.824, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0024/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 2604.015403985977}