198 lines
5.9 KiB
Python
198 lines
5.9 KiB
Python
"""
|
|
Evaluate best models from exp24/25/26 across 10 truly different random roads.
|
|
|
|
Uses the regen_road TCP message (rand_seed) to get genuinely different roads —
|
|
NOT the same road-2 that all prior training used.
|
|
|
|
Road style 0 = default generated road geometry.
|
|
10 different seeds per model, 2000 steps per road.
|
|
"""
|
|
import os
|
|
import sys
|
|
import time
|
|
import random
|
|
import json
|
|
from datetime import datetime
|
|
|
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
|
|
|
import gymnasium as gym
|
|
import numpy as np
|
|
from stable_baselines3 import PPO
|
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
|
|
|
from discretize_action import DiscretizedActionWrapper
|
|
from donkeycar_sb3_runner import ThrottleClampWrapper
|
|
from multitrack_runner import StuckTerminationWrapper
|
|
from reward_wrapper import SpeedRewardWrapper
|
|
|
|
HOST = 'localhost'
|
|
PORT = 9091
|
|
THROTTLE_MIN = 0.2
|
|
N_STEER = 7
|
|
N_THROTTLE = 1
|
|
TRACK_ID = 'donkey-generated-roads-v0'
|
|
|
|
MAX_EVAL_STEPS = 2000
|
|
REGEN_WAIT = 3.0 # seconds after regen_road before reset
|
|
N_ROADS = 10
|
|
|
|
MODELS = {
|
|
'exp24': '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp24-discrete/best_model.zip',
|
|
'exp25': '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp25-wheel-fix/best_model.zip',
|
|
'exp26': '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/best_model.zip',
|
|
}
|
|
|
|
# 10 fixed seeds so every model is evaluated on the same set of roads
|
|
EVAL_SEEDS = [1001, 2002, 3003, 4004, 5005, 6006, 7007, 8008, 9009, 1234]
|
|
|
|
LOG_PATH = f'/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/eval_best_models_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'
|
|
|
|
import logging
|
|
_fh = logging.FileHandler(LOG_PATH)
|
|
_fh.setFormatter(logging.Formatter('%(message)s'))
|
|
_sh = logging.StreamHandler(sys.stdout)
|
|
_sh.setFormatter(logging.Formatter('%(message)s'))
|
|
log = logging.getLogger('eval')
|
|
log.setLevel(logging.INFO)
|
|
log.propagate = False
|
|
log.addHandler(_fh)
|
|
log.addHandler(_sh)
|
|
|
|
|
|
def ts():
|
|
return datetime.now().strftime('%H:%M:%S')
|
|
|
|
|
|
def flog(msg):
|
|
log.info(f'[{ts()}] {msg}')
|
|
|
|
|
|
def make_env():
|
|
def _init():
|
|
raw = gym.make(TRACK_ID, conf={'host': HOST, 'port': PORT})
|
|
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
|
env = DiscretizedActionWrapper(env, n_steer=N_STEER, n_throttle=N_THROTTLE)
|
|
env = StuckTerminationWrapper(
|
|
env,
|
|
stuck_steps=40,
|
|
min_displacement=0.5,
|
|
max_stuck_seconds=5.0,
|
|
max_episode_seconds=30.0,
|
|
low_speed_threshold=1.0,
|
|
max_low_speed_seconds=1.5,
|
|
max_cte=3.0,
|
|
max_high_cte_seconds=1.0,
|
|
)
|
|
env = SpeedRewardWrapper(
|
|
env,
|
|
window_size=30,
|
|
min_efficiency=0.15,
|
|
max_cte=8.0,
|
|
min_lap_time=12.0,
|
|
progress_patience=100,
|
|
)
|
|
return env
|
|
return _init
|
|
|
|
|
|
def get_handler(vec_env):
|
|
"""Navigate wrapper stack to reach DonkeyUnitySimHandler."""
|
|
return vec_env.venv.envs[0].unwrapped.viewer.handler
|
|
|
|
|
|
def regen_road(vec_env, seed, road_style=0):
|
|
"""Send regen_road message to sim with the given seed."""
|
|
msg = {
|
|
'msg_type': 'regen_road',
|
|
'road_style': str(road_style),
|
|
'rand_seed': str(seed),
|
|
'turn_increment': '0.0',
|
|
}
|
|
handler = get_handler(vec_env)
|
|
handler.queue_message(msg)
|
|
time.sleep(REGEN_WAIT)
|
|
|
|
|
|
def run_episode(model, env):
|
|
"""Run one deterministic episode, return (steps, reward)."""
|
|
obs = env.reset()
|
|
total_r = 0.0
|
|
total_s = 0
|
|
done = False
|
|
|
|
while not done and total_s < MAX_EVAL_STEPS:
|
|
action, _ = model.predict(obs, deterministic=True)
|
|
obs, r, d, _ = env.step(action)
|
|
r_val = float(r[0]) if hasattr(r, '__len__') else float(r)
|
|
d_val = bool(d[0]) if hasattr(d, '__len__') else bool(d)
|
|
total_r += r_val
|
|
total_s += 1
|
|
done = d_val
|
|
|
|
return total_s, total_r
|
|
|
|
|
|
flog('=' * 70)
|
|
flog('Evaluating best models on 10 genuinely different random roads')
|
|
flog(f'Seeds: {EVAL_SEEDS}')
|
|
flog(f'Log: {LOG_PATH}')
|
|
flog('=' * 70)
|
|
|
|
# Connect once — reuse env for all models/roads
|
|
flog('Connecting to sim...')
|
|
env = DummyVecEnv([make_env()])
|
|
env = VecTransposeImage(env)
|
|
flog(f' Connected. obs={env.observation_space.shape}, action={env.action_space}')
|
|
|
|
results = {}
|
|
|
|
for model_name, model_path in MODELS.items():
|
|
flog('')
|
|
flog(f'── {model_name} ──────────────────────────────────────')
|
|
flog(f' Model: {model_path}')
|
|
|
|
try:
|
|
model = PPO.load(model_path, env=env, device='cpu')
|
|
except Exception as e:
|
|
flog(f' LOAD ERROR: {e}')
|
|
continue
|
|
|
|
steps_list = []
|
|
reward_list = []
|
|
|
|
for i, seed in enumerate(EVAL_SEEDS):
|
|
flog(f' Road {i+1:2d}/10 (seed={seed}) — regenerating...')
|
|
regen_road(env, seed)
|
|
|
|
steps, reward = run_episode(model, env)
|
|
status = '✅' if steps >= MAX_EVAL_STEPS else f'❌@{steps}'
|
|
flog(f' → {reward:.1f}r / {steps}s {status}')
|
|
steps_list.append(steps)
|
|
reward_list.append(reward)
|
|
|
|
mean_steps = np.mean(steps_list)
|
|
mean_reward = np.mean(reward_list)
|
|
full_eps = sum(1 for s in steps_list if s >= MAX_EVAL_STEPS)
|
|
|
|
flog(f' {model_name} SUMMARY: {full_eps}/10 full | mean {mean_steps:.0f}s / {mean_reward:.1f}r')
|
|
results[model_name] = {
|
|
'full': full_eps,
|
|
'mean_steps': mean_steps,
|
|
'mean_reward': mean_reward,
|
|
'per_road': list(zip(EVAL_SEEDS, steps_list, reward_list)),
|
|
}
|
|
|
|
env.close()
|
|
|
|
flog('')
|
|
flog('=' * 70)
|
|
flog('FINAL RANKING')
|
|
flog('=' * 70)
|
|
ranked = sorted(results.items(), key=lambda x: (x[1]['full'], x[1]['mean_steps']), reverse=True)
|
|
for rank, (name, r) in enumerate(ranked, 1):
|
|
flog(f' #{rank} {name:8s} {r["full"]}/10 full mean {r["mean_steps"]:.0f}s / {r["mean_reward"]:.1f}r')
|
|
|
|
flog('')
|
|
flog('Evaluation complete.')
|