""" Evaluate best models from exp24/25/26 across 10 truly different random roads. Uses the regen_road TCP message (rand_seed) to get genuinely different roads — NOT the same road-2 that all prior training used. Road style 0 = default generated road geometry. 10 different seeds per model, 2000 steps per road. """ import os import sys import time import random import json from datetime import datetime sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') import gymnasium as gym import numpy as np from stable_baselines3 import PPO from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage from discretize_action import DiscretizedActionWrapper from donkeycar_sb3_runner import ThrottleClampWrapper from multitrack_runner import StuckTerminationWrapper from reward_wrapper import SpeedRewardWrapper HOST = 'localhost' PORT = 9091 THROTTLE_MIN = 0.2 N_STEER = 7 N_THROTTLE = 1 TRACK_ID = 'donkey-generated-roads-v0' MAX_EVAL_STEPS = 2000 REGEN_WAIT = 3.0 # seconds after regen_road before reset N_ROADS = 10 MODELS = { 'exp24': '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp24-discrete/best_model.zip', 'exp25': '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp25-wheel-fix/best_model.zip', 'exp26': '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/best_model.zip', } # 10 fixed seeds so every model is evaluated on the same set of roads EVAL_SEEDS = [1001, 2002, 3003, 4004, 5005, 6006, 7007, 8008, 9009, 1234] LOG_PATH = f'/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/eval_best_models_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log' import logging _fh = logging.FileHandler(LOG_PATH) _fh.setFormatter(logging.Formatter('%(message)s')) _sh = logging.StreamHandler(sys.stdout) _sh.setFormatter(logging.Formatter('%(message)s')) log = logging.getLogger('eval') log.setLevel(logging.INFO) log.propagate = False log.addHandler(_fh) log.addHandler(_sh) def ts(): return datetime.now().strftime('%H:%M:%S') def flog(msg): log.info(f'[{ts()}] {msg}') def make_env(): def _init(): raw = gym.make(TRACK_ID, conf={'host': HOST, 'port': PORT}) env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) env = DiscretizedActionWrapper(env, n_steer=N_STEER, n_throttle=N_THROTTLE) env = StuckTerminationWrapper( env, stuck_steps=40, min_displacement=0.5, max_stuck_seconds=5.0, max_episode_seconds=30.0, low_speed_threshold=1.0, max_low_speed_seconds=1.5, max_cte=3.0, max_high_cte_seconds=1.0, ) env = SpeedRewardWrapper( env, window_size=30, min_efficiency=0.15, max_cte=8.0, min_lap_time=12.0, progress_patience=100, ) return env return _init def get_handler(vec_env): """Navigate wrapper stack to reach DonkeyUnitySimHandler.""" return vec_env.venv.envs[0].unwrapped.viewer.handler def regen_road(vec_env, seed, road_style=0): """Send regen_road message to sim with the given seed.""" msg = { 'msg_type': 'regen_road', 'road_style': str(road_style), 'rand_seed': str(seed), 'turn_increment': '0.0', } handler = get_handler(vec_env) handler.queue_message(msg) time.sleep(REGEN_WAIT) def run_episode(model, env): """Run one deterministic episode, return (steps, reward).""" obs = env.reset() total_r = 0.0 total_s = 0 done = False while not done and total_s < MAX_EVAL_STEPS: action, _ = model.predict(obs, deterministic=True) obs, r, d, _ = env.step(action) r_val = float(r[0]) if hasattr(r, '__len__') else float(r) d_val = bool(d[0]) if hasattr(d, '__len__') else bool(d) total_r += r_val total_s += 1 done = d_val return total_s, total_r flog('=' * 70) flog('Evaluating best models on 10 genuinely different random roads') flog(f'Seeds: {EVAL_SEEDS}') flog(f'Log: {LOG_PATH}') flog('=' * 70) # Connect once — reuse env for all models/roads flog('Connecting to sim...') env = DummyVecEnv([make_env()]) env = VecTransposeImage(env) flog(f' Connected. obs={env.observation_space.shape}, action={env.action_space}') results = {} for model_name, model_path in MODELS.items(): flog('') flog(f'── {model_name} ──────────────────────────────────────') flog(f' Model: {model_path}') try: model = PPO.load(model_path, env=env, device='cpu') except Exception as e: flog(f' LOAD ERROR: {e}') continue steps_list = [] reward_list = [] for i, seed in enumerate(EVAL_SEEDS): flog(f' Road {i+1:2d}/10 (seed={seed}) — regenerating...') regen_road(env, seed) steps, reward = run_episode(model, env) status = '✅' if steps >= MAX_EVAL_STEPS else f'❌@{steps}' flog(f' → {reward:.1f}r / {steps}s {status}') steps_list.append(steps) reward_list.append(reward) mean_steps = np.mean(steps_list) mean_reward = np.mean(reward_list) full_eps = sum(1 for s in steps_list if s >= MAX_EVAL_STEPS) flog(f' {model_name} SUMMARY: {full_eps}/10 full | mean {mean_steps:.0f}s / {mean_reward:.1f}r') results[model_name] = { 'full': full_eps, 'mean_steps': mean_steps, 'mean_reward': mean_reward, 'per_road': list(zip(EVAL_SEEDS, steps_list, reward_list)), } env.close() flog('') flog('=' * 70) flog('FINAL RANKING') flog('=' * 70) ranked = sorted(results.items(), key=lambda x: (x[1]['full'], x[1]['mean_steps']), reverse=True) for rank, (name, r) in enumerate(ranked, 1): flog(f' #{rank} {name:8s} {r["full"]}/10 full mean {r["mean_steps"]:.0f}s / {r["mean_reward"]:.1f}r') flog('') flog('Evaluation complete.')