save: all experiment scripts moved from /tmp to agent/experiments/

Scripts in /tmp are lost on reboot and not reproducible.
All experiment scripts now committed to git with README.

Exp5 script was already gone (lost before this fix).
All others (Exp6-Exp10, overnight, wave5, etc.) now preserved.

Rule going forward: scripts saved to agent/experiments/ and committed
BEFORE running, not after.

Agent: pi
Tests: 102 passed
Tests-Added: 0
TypeScript: N/A
This commit is contained in:
Paul Huliganga 2026-04-18 21:30:08 -04:00
parent de7b9bc302
commit 6e9546cd22
11 changed files with 1021 additions and 0 deletions

View File

@ -0,0 +1,30 @@
# Experiment Scripts
These scripts were used to run individual training experiments.
Each corresponds to an entry in docs/TEST_HISTORY.md.
| Script | Experiment | Key change |
|---|---|---|
| mountain_v5.py | Exp 5 | v5 reward + throttle_min=0.5, direct model.learn() |
| mountain_continue.py | Exp 4 | Continued Exp3 training |
| mountain_high_throttle.py | Exp 3 | throttle_min=0.5, old v4 reward |
| exp6_mountain_v5_proper.py | Exp 6 | v5 + termination, wrong steps_per_switch (=total) |
| exp7_mountain_proper.py | Exp 7 | v5 + termination, correct steps_per_switch=6000, had phantom car issue |
| exp8_mountain_clean.py | Exp 8 | v5 + throttle_min=0.5, single connection, correct checkpointing |
| exp9_mountain_v5_throttle02.py | Exp 9 | v5 + throttle_min=0.2, OUR BEST MODEL |
| exp10_two_tracks.py | Exp 10 | Two tracks via custom script (abandoned — used multitrack_runner.py instead) |
| overnight.py | Overnight runs | mountain-only and Trial9-repeat experiments |
| wave5_train.py | Wave 5 | generated_track only with throttle_min=0.2 |
## Rule going forward
ALL experiment scripts must be saved here and committed to git
BEFORE running. Scripts in /tmp are lost on reboot.
## Running experiments
Use multitrack_runner.py directly for two-track training:
python3 multitrack_runner.py --total-timesteps 90000 --steps-per-switch 6000 ...
For single-track experiments, use the pattern from exp8/exp9:
- VecTransposeImage(DummyVecEnv([make_env])) for env creation
- Direct model.learn() loop with manual checkpointing
- No close_and_switch() for single track

View File

@ -0,0 +1,68 @@
"""
Exp 10 (fixed): generated_track + mountain_track, v5 reward, throttle_min=0.2
Fix: pass plain wrap_env() to train_multitrack(), NOT VecTransposeImage.
SB3 applies VecTransposeImage internally. close_and_switch() then works
on all segments because env.unwrapped.viewer is accessible via gym.Wrapper.
"""
import sys, os, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
from multitrack_runner import (log, _send_exit_scene, train_multitrack,
wrap_env, TRAINING_TRACKS, THROTTLE_MIN)
from stable_baselines3 import PPO
import gymnasium as gym, numpy as np
LR = 0.000725
TOTAL_STEPS = 90000
STEPS_PER_SEG = 6000
SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp10-two-tracks'
os.makedirs(SAVE_DIR, exist_ok=True)
log('='*60)
log('Exp 10 (fixed): generated_track + mountain_track, v5, throttle_min=0.2')
log(f' Tracks: {[t[0] for t in TRAINING_TRACKS]}')
log(f' throttle_min={THROTTLE_MIN}, lr={LR}, total={TOTAL_STEPS:,}')
log(f' FIX: passing gym.Wrapper to train_multitrack (not VecTransposeImage)')
log(f' SB3 applies VecEnv wrapping internally — exit_scene works on all segments')
log('='*60)
# Clear sim state
log('Clearing sim...')
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
# Create first env as plain gym.Wrapper — SB3 wraps it internally
first_track_id = TRAINING_TRACKS[0][1] # generated_track
log(f'Connecting to {TRAINING_TRACKS[0][0]}...')
raw = gym.make(first_track_id)
first_env = wrap_env(raw) # gym.Wrapper chain — close_and_switch works on this
# PPO.__init__ calls _wrap_env() → DummyVecEnv + VecTransposeImage automatically
model = PPO('CnnPolicy', first_env, learning_rate=LR, verbose=1, device='cpu')
log(f'Model env type: {type(model.env).__name__}') # Should show VecTransposeImage
log('Connected. Starting two-track training.')
env, segment_rewards = train_multitrack(
model, first_env, # <-- gym.Wrapper, not VecTransposeImage
total_timesteps=TOTAL_STEPS,
steps_per_switch=STEPS_PER_SEG,
save_dir=SAVE_DIR,
)
env.close(); time.sleep(3)
log(f'\nTraining complete. Checkpoints in {SAVE_DIR}:')
for f in sorted(os.listdir(SAVE_DIR)):
size = os.path.getsize(os.path.join(SAVE_DIR, f)) // (1024*1024)
log(f' {f} ({size}MB)')
log('\nRunning standard 3-set eval on all tracks...')
import subprocess
subprocess.run([
'python3',
'/home/paulh/projects/donkeycar-rl-autoresearch/agent/run_eval.py',
'--model', os.path.join(SAVE_DIR, 'best_model.zip'),
'--sets', '3', '--steps', '2000'
], cwd='/home/paulh/projects/donkeycar-rl-autoresearch/agent')
log('\n=== Exp 10 COMPLETE ===')

View File

@ -0,0 +1,110 @@
"""
Exp 6: mountain_track, v5 reward, throttle_min=0.5, lr=0.000725, 90k steps
PROPERLY CHECKPOINTED - numbered saves every segment, best_model.zip kept.
This re-runs Exp5 with the checkpointing fix. The model doing 20-second
laps at step ~30k will be captured in checkpoint_0027404.zip (or similar).
"""
import sys, os, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
from multitrack_runner import (wrap_env, log, _send_exit_scene,
StuckTerminationWrapper, close_and_switch,
TRAINING_TRACKS, train_multitrack,
evaluate_test_tracks)
from donkeycar_sb3_runner import ThrottleClampWrapper
from reward_wrapper import SpeedRewardWrapper
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
import gymnasium as gym
THROTTLE_MIN = 0.5
LR = 0.000725
TOTAL_STEPS = 90000
SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp6-mountain-v5-checkpointed'
os.makedirs(SAVE_DIR, exist_ok=True)
def make_env(env_id):
raw = gym.make(env_id)
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
env = SpeedRewardWrapper(env)
return env
def switch_to(current_id, next_id, name):
log(f' Switching to {name}...')
tmp = gym.make(current_id); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
env = VecTransposeImage(DummyVecEnv([lambda: make_env(next_id)]))
log(f' Connected to {name}'); return env
log('='*60)
log('Exp 6: mountain_track, v5 reward, throttle_min=0.5')
log(f' lr={LR}, steps={TOTAL_STEPS:,}')
log(f' Save dir: {SAVE_DIR}')
log(' Checkpointing: numbered saves every segment + best_model.zip')
log('='*60)
# Switch sim to mountain_track
log('Connecting to mountain_track...')
tmp = gym.make('donkey-generated-roads-v0'); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')]))
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
# ── TRAINING with full checkpointing ──
# We temporarily override TRAINING_TRACKS to mountain_track only
import multitrack_runner as mr
_orig_tracks = mr.TRAINING_TRACKS
mr.TRAINING_TRACKS = [('mountain_track', 'donkey-mountain-track-v0')]
env, segment_rewards = train_multitrack(
model, env,
total_timesteps=TOTAL_STEPS,
steps_per_switch=TOTAL_STEPS, # one long segment — no switching
save_dir=SAVE_DIR,
)
mr.TRAINING_TRACKS = _orig_tracks # restore
log(f'\nTraining complete. Files saved in {SAVE_DIR}:')
for f in sorted(os.listdir(SAVE_DIR)):
size = os.path.getsize(os.path.join(SAVE_DIR, f)) // (1024*1024)
log(f' {f} ({size}MB)')
env.close(); time.sleep(3)
# ── EVAL on all tracks using best_model ──
best_path = os.path.join(SAVE_DIR, 'best_model.zip')
log(f'\nEvaluating best_model.zip on all tracks...')
def eval_track(current_id, track_id, name, n=3):
log(f'\n--- EVAL: {name} ---')
ev = switch_to(current_id, track_id, name)
m = PPO.load(best_path, env=ev, device='cpu')
for ep in range(1, n+1):
obs = ev.reset(); total, steps, done = 0.0, 0, False
while not done and steps < 2000:
action, _ = m.predict(obs, deterministic=True)
result = ev.step(action)
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
else: obs,r,d,info=result; done=bool(d[0])
total+=float(r[0]); steps+=1
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}')
time.sleep(1)
ev.close(); time.sleep(3)
return track_id
current = 'donkey-mountain-track-v0'
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)')
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
log('\n' + '='*60)
log('Exp 6 COMPLETE. Check results above.')
log(f'Best model: {best_path}')
log('All checkpoints saved in: ' + SAVE_DIR)
log('='*60)

View File

@ -0,0 +1,96 @@
"""
Exp 7: mountain_track, v5 reward, throttle_min=0.5, lr=0.000725, 90k steps
- steps_per_switch=6000 15 checkpoints saved across the run
- best_model.zip saved whenever a new best segment score is achieved
- Single track: TRAINING_TRACKS overridden to mountain_track only
"""
import sys, os, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
import multitrack_runner as mr
from multitrack_runner import (log, _send_exit_scene, StuckTerminationWrapper,
train_multitrack)
from donkeycar_sb3_runner import ThrottleClampWrapper
from reward_wrapper import SpeedRewardWrapper
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
import gymnasium as gym
THROTTLE_MIN = 0.5
LR = 0.000725
TOTAL_STEPS = 90000
STEPS_PER_SEG = 6000 # 15 checkpoints across the run, NOT 1
SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp7-mountain-checkpointed'
os.makedirs(SAVE_DIR, exist_ok=True)
def make_env(env_id):
raw = gym.make(env_id)
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
env = SpeedRewardWrapper(env)
return env
log('='*60)
log('Exp 7: mountain_track ONLY, v5 reward, throttle_min=0.5')
log(f' lr={LR}, total_steps={TOTAL_STEPS:,}, steps_per_seg={STEPS_PER_SEG:,}')
log(f' Checkpoints: {TOTAL_STEPS // STEPS_PER_SEG} saves across the run')
log(f' Save dir: {SAVE_DIR}')
log('='*60)
# Switch sim to mountain_track
log('Switching to mountain_track...')
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')]))
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
# Override to single track — segments still run every 6k steps for checkpointing
_orig = mr.TRAINING_TRACKS
mr.TRAINING_TRACKS = [('mountain_track', 'donkey-mountain-track-v0')]
env, segment_rewards = train_multitrack(
model, env,
total_timesteps=TOTAL_STEPS,
steps_per_switch=STEPS_PER_SEG,
save_dir=SAVE_DIR,
)
mr.TRAINING_TRACKS = _orig
log(f'\nCheckpoints saved in {SAVE_DIR}:')
for f in sorted(os.listdir(SAVE_DIR)):
size = os.path.getsize(os.path.join(SAVE_DIR, f)) // (1024*1024)
log(f' {f} ({size}MB)')
env.close(); time.sleep(3)
# Eval best_model on all tracks
best_path = os.path.join(SAVE_DIR, 'best_model.zip')
def eval_track(current_id, track_id, name, n=3):
log(f'\n--- EVAL: {name} ---')
tmp = gym.make(current_id); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
ev = VecTransposeImage(DummyVecEnv([lambda: make_env(track_id)]))
m = PPO.load(best_path, env=ev, device='cpu')
for ep in range(1, n+1):
obs = ev.reset(); total, steps, done = 0.0, 0, False
while not done and steps < 2000:
action, _ = m.predict(obs, deterministic=True)
result = ev.step(action)
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
else: obs,r,d,info=result; done=bool(d[0])
total+=float(r[0]); steps+=1
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}')
time.sleep(1)
ev.close(); time.sleep(3)
return track_id
current = 'donkey-mountain-track-v0'
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)')
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
log('\n=== Exp 7 COMPLETE ===')

View File

@ -0,0 +1,133 @@
"""
Exp 8: mountain_track, v5 reward (speed x CTE), throttle_min=0.5
- Single TCP connection for the entire run (no disconnect/reconnect)
- Saves numbered checkpoint every 6000 steps
- Saves best_model.zip whenever a new best is found
- Circle exploit: episode terminates immediately on short lap
"""
import sys, os, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
from multitrack_runner import log, _send_exit_scene, StuckTerminationWrapper
from donkeycar_sb3_runner import ThrottleClampWrapper
from reward_wrapper import SpeedRewardWrapper
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from stable_baselines3.common.utils import get_schedule_fn
import gymnasium as gym
THROTTLE_MIN = 0.5
LR = 0.000725
TOTAL_STEPS = 90000
STEPS_PER_SEG = 6000 # checkpoint frequency — NOT track switching
SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp8-mountain-clean'
os.makedirs(SAVE_DIR, exist_ok=True)
def make_env():
raw = gym.make('donkey-mountain-track-v0')
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
env = SpeedRewardWrapper(env)
return env
log('='*60)
log('Exp 8: mountain_track ONLY — single connection throughout')
log(f' throttle_min={THROTTLE_MIN}, lr={LR}, total_steps={TOTAL_STEPS:,}')
log(f' Checkpoint every {STEPS_PER_SEG:,} steps ({TOTAL_STEPS//STEPS_PER_SEG} checkpoints)')
log(f' Reward: v5 (speed x CTE-quality)')
log(f' Circle fix: short lap terminates episode immediately')
log(f' NO disconnect/reconnect between chunks')
log('='*60)
# Connect ONCE — stay connected for the entire run
log('Connecting to mountain_track...')
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
env = VecTransposeImage(DummyVecEnv([make_env]))
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
log('Connected. Training begins — sim will NOT go to main menu between segments.')
log('You will see: car runs → crashes/stuck → resets to start → runs again.')
best_reward = float('-inf')
steps_done = 0
seg_num = 0
while steps_done < TOTAL_STEPS:
seg_steps = min(STEPS_PER_SEG, TOTAL_STEPS - steps_done)
seg_num += 1
log(f'\n[Seg {seg_num}] steps {steps_done:,}{steps_done+seg_steps:,}')
model.learn(total_timesteps=seg_steps, reset_num_timesteps=False)
steps_done += seg_steps
# Numbered checkpoint — never overwritten
ckpt = os.path.join(SAVE_DIR, f'checkpoint_{steps_done:07d}')
model.save(ckpt)
log(f'[Seg {seg_num}] Checkpoint saved: {ckpt}.zip')
# Quick 1-episode deterministic eval to measure quality
try:
obs = env.reset()
ep_reward, ep_steps, done = 0.0, 0, False
while not done and ep_steps < 2000:
action, _ = model.predict(obs, deterministic=True)
result = env.step(action)
if len(result)==5: obs,r,t,tr,_ = result; done=bool(t[0] or tr[0])
else: obs,r,d,_ = result; done=bool(d[0])
ep_reward += float(r[0]); ep_steps += 1
log(f'[Seg {seg_num}] Eval: {ep_reward:.1f} reward / {ep_steps} steps (deterministic)')
if ep_reward > best_reward:
best_reward = ep_reward
best_path = os.path.join(SAVE_DIR, 'best_model')
model.save(best_path)
log(f'[Seg {seg_num}] ⭐ NEW BEST: {best_reward:.1f} → best_model.zip')
except Exception as e:
log(f'[Seg {seg_num}] Eval failed: {e}')
env.close()
time.sleep(2)
log(f'\nTraining complete. Best reward: {best_reward:.1f}')
log(f'Checkpoints: {SAVE_DIR}/')
for f in sorted(os.listdir(SAVE_DIR)):
log(f' {f}')
# Eval best model on all 4 tracks
best_path = os.path.join(SAVE_DIR, 'best_model.zip')
log(f'\nEvaluating best_model.zip on all tracks...')
def eval_track(current_id, track_id, name, n=3):
log(f'\n--- EVAL: {name} ---')
tmp2 = gym.make(current_id); time.sleep(2)
_send_exit_scene(tmp2, verbose=False); tmp2.close(); time.sleep(5)
ev = VecTransposeImage(DummyVecEnv([lambda: (
SpeedRewardWrapper(
StuckTerminationWrapper(
ThrottleClampWrapper(gym.make(track_id), throttle_min=THROTTLE_MIN),
stuck_steps=80, min_displacement=0.5
)
)
)]))
m = PPO.load(best_path, env=ev, device='cpu')
for ep in range(1, n+1):
obs = ev.reset(); total, steps, done = 0.0, 0, False
while not done and steps < 2000:
action, _ = m.predict(obs, deterministic=True)
result = ev.step(action)
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
else: obs,r,d,info=result; done=bool(d[0])
total+=float(r[0]); steps+=1
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}')
time.sleep(1)
ev.close(); time.sleep(3)
return track_id
current = 'donkey-mountain-track-v0'
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)')
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
log('\n=== Exp 8 COMPLETE ===')

View File

@ -0,0 +1,128 @@
"""
Exp 9: mountain_track, v5 reward, throttle_min=0.2
ONE VARIABLE CHANGED from Exp8: throttle_min 0.5 0.2
Hypothesis: v5 reward (speed × CTE) has non-zero gradient on hill.
Model can learn to output high throttle when needed even with 0.2 floor.
Full throttle range [0.2, 1.0] allows model to also slow for corners.
If this works: can drive mountain_track AND potentially mini_monaco corners.
If this fails: car stalls on hill, confirming 0.5 minimum is physically required.
"""
import sys, os, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
from multitrack_runner import log, _send_exit_scene, StuckTerminationWrapper
from donkeycar_sb3_runner import ThrottleClampWrapper
from reward_wrapper import SpeedRewardWrapper
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
import gymnasium as gym, numpy as np
THROTTLE_MIN = 0.2 # ← ONLY CHANGE from Exp8
LR = 0.000725 # same
TOTAL_STEPS = 90000 # same
STEPS_PER_SEG = 6000 # same — 15 checkpoints
SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp9-mountain-v5-throttle02'
os.makedirs(SAVE_DIR, exist_ok=True)
def make_env():
raw = gym.make('donkey-mountain-track-v0')
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
env = SpeedRewardWrapper(env)
return env
log('='*60)
log('Exp 9: mountain_track, v5 reward, throttle_min=0.2')
log('ONE CHANGE from Exp8: throttle_min 0.5 → 0.2')
log(f' lr={LR}, total_steps={TOTAL_STEPS:,}, steps_per_seg={STEPS_PER_SEG:,}')
log(f' Hypothesis: v5 gradient non-zero on hill → model learns high throttle')
log(f' Save: {SAVE_DIR}')
log('='*60)
# Clear previous sim state
log('Clearing sim state...')
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
# Single connection for entire run
env = VecTransposeImage(DummyVecEnv([make_env]))
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
log('Connected. Training begins on mountain_track with throttle_min=0.2')
log('Watch: does model get over the hill?')
best_reward = float('-inf')
steps_done, seg_num = 0, 0
while steps_done < TOTAL_STEPS:
seg_steps = min(STEPS_PER_SEG, TOTAL_STEPS - steps_done)
seg_num += 1
log(f'\n[Seg {seg_num}] steps {steps_done:,}{steps_done+seg_steps:,}')
model.learn(total_timesteps=seg_steps, reset_num_timesteps=False)
steps_done += seg_steps
ckpt = os.path.join(SAVE_DIR, f'checkpoint_{steps_done:07d}')
model.save(ckpt)
log(f'[Seg {seg_num}] Checkpoint: {ckpt}.zip')
try:
obs = env.reset()
ep_reward, ep_steps, done = 0.0, 0, False
while not done and ep_steps < 2000:
action, _ = model.predict(obs, deterministic=True)
result = env.step(action)
if len(result)==5: obs,r,t,tr,_ = result; done=bool(t[0] or tr[0])
else: obs,r,d,_ = result; done=bool(d[0])
ep_reward += float(r[0]); ep_steps += 1
log(f'[Seg {seg_num}] Eval: {ep_reward:.1f} reward / {ep_steps} steps (deterministic)')
if ep_reward > best_reward:
best_reward = ep_reward
model.save(os.path.join(SAVE_DIR, 'best_model'))
log(f'[Seg {seg_num}] ⭐ NEW BEST: {best_reward:.1f}')
except Exception as e:
log(f'[Seg {seg_num}] Eval error: {e}')
env.close(); time.sleep(2)
log(f'\nTraining complete. Best reward: {best_reward:.1f}')
# Eval best_model on all tracks
best_path = os.path.join(SAVE_DIR, 'best_model.zip')
def eval_track(current_id, track_id, name, n=3):
log(f'\n--- EVAL: {name} ---')
tmp2 = gym.make(current_id); time.sleep(2)
_send_exit_scene(tmp2, verbose=False); tmp2.close(); time.sleep(5)
ev = VecTransposeImage(DummyVecEnv([lambda: (
SpeedRewardWrapper(StuckTerminationWrapper(
ThrottleClampWrapper(gym.make(track_id), throttle_min=THROTTLE_MIN),
80, 0.5)))]))
m = PPO.load(best_path, env=ev, device='cpu')
results = []
for ep in range(1, n+1):
obs = ev.reset(); total, steps, done = 0.0, 0, False
while not done and steps < 2000:
action, _ = m.predict(obs, deterministic=True)
result = ev.step(action)
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
else: obs,r,d,info=result; done=bool(d[0])
total+=float(r[0]); steps+=1
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}')
results.append(steps)
time.sleep(1)
log(f' Mean steps: {np.mean(results):.0f}')
ev.close(); time.sleep(3)
return track_id
current = 'donkey-mountain-track-v0'
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)')
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
log('\n=== Exp 9 COMPLETE ===')
log(f'Compare with Exp8 best_model results:')
log(f' mountain_track: 382/529/182 (mean=364)')
log(f' mini_monaco: 154/155/104 (mean=138) ← crashed at one corner')

View File

@ -0,0 +1,86 @@
import sys, os, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
from multitrack_runner import log, _send_exit_scene
from donkeycar_sb3_runner import ThrottleClampWrapper
from reward_wrapper import SpeedRewardWrapper
from multitrack_runner import StuckTerminationWrapper
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from stable_baselines3.common.callbacks import BaseCallback
import gymnasium as gym
THROTTLE_MIN = 0.5
EXTRA_STEPS = 110000 # already did 90k, adding 110k = 200k total
BASE_MODEL = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp3-mountain-throttle05/model.zip'
SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp4-mountain-200k/model'
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)
def make_env(env_id):
raw = gym.make(env_id)
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
env = SpeedRewardWrapper(env, speed_scale=0.1)
return env
def switch_to(current_id, next_id, name):
log(f' → Switching to {name}...')
tmp = gym.make(current_id); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
raw = gym.make(next_id)
env = VecTransposeImage(DummyVecEnv([lambda: make_env(next_id)]))
log(f' Connected to {name}'); return env
class ProgressCB(BaseCallback):
def __init__(self, extra): super().__init__(verbose=0); self._last=0; self._extra=extra
def _on_step(self):
if self.num_timesteps - self._last >= 10000:
log(f' +{self.num_timesteps:,} steps (of {self._extra:,} extra)')
self._last = self.num_timesteps
return True
log('='*60)
log(f'Exp 4: CONTINUE mountain_track from exp3 — adding {EXTRA_STEPS:,} steps')
log(f' Total training will be 200,000 steps on mountain_track')
log(f' throttle_min={THROTTLE_MIN}, loading: {os.path.basename(BASE_MODEL)}')
log('='*60)
# Switch to mountain_track
log('Switching to mountain_track...')
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')]))
log(f'Loading exp3 model and continuing training...')
model = PPO.load(BASE_MODEL, env=env, device='cpu')
model.learn(total_timesteps=EXTRA_STEPS, callback=ProgressCB(EXTRA_STEPS),
reset_num_timesteps=True)
model.save(SAVE_PATH)
log(f'Saved: {SAVE_PATH}.zip')
env.close(); time.sleep(3)
def eval_track(current_id, track_id, name, n=3):
log(f'\n--- EVAL: {name} ---')
ev = switch_to(current_id, track_id, name)
m = PPO.load(SAVE_PATH, env=ev, device='cpu')
for ep in range(1, n+1):
obs = ev.reset(); total, steps, done = 0.0, 0, False
while not done and steps < 2000:
action, _ = m.predict(obs, deterministic=True)
result = ev.step(action)
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
else: obs,r,d,info=result; done=bool(d[0])
total+=float(r[0]); steps+=1
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
log(f' ep{ep}: {total:.0f} reward / {steps} steps — {status}')
time.sleep(1)
ev.close(); time.sleep(3)
return track_id
log('\nEvaluating deterministic policy on all tracks...')
current = 'donkey-mountain-track-v0'
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training track)')
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
log('\n=== Exp 4 COMPLETE ===')

View File

@ -0,0 +1,106 @@
import sys, os, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
from multitrack_runner import wrap_env, log, _send_exit_scene
from donkeycar_sb3_runner import ThrottleClampWrapper
from reward_wrapper import SpeedRewardWrapper
from multitrack_runner import StuckTerminationWrapper
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from stable_baselines3.common.callbacks import BaseCallback
import gymnasium as gym, numpy as np
LR = 0.000725
TOTAL_STEPS = 90000
THROTTLE_MIN = 0.5 # raised from 0.2
SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp3-mountain-throttle05/model'
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)
def make_env_high_throttle(env_id):
raw = gym.make(env_id)
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
env = SpeedRewardWrapper(env, speed_scale=0.1)
return env
def switch_to(current_id, next_id, name, throttle_min=THROTTLE_MIN):
log(f' → Switching to {name} (throttle_min={throttle_min})...')
tmp = gym.make(current_id); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
raw = gym.make(next_id)
env = VecTransposeImage(DummyVecEnv(
[lambda e=raw, t=throttle_min: (
lambda ee: (ThrottleClampWrapper(ee, throttle_min=t),
StuckTerminationWrapper(ThrottleClampWrapper(ee, throttle_min=t), 80, 0.5),
SpeedRewardWrapper(StuckTerminationWrapper(ThrottleClampWrapper(ee, throttle_min=t), 80, 0.5), 0.1)
)[-1]
)(e)]
))
log(f' Connected to {name}'); return env
def simple_switch(current_id, next_id, name):
log(f' → Switching to {name}...')
tmp = gym.make(current_id); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
raw = gym.make(next_id)
env = VecTransposeImage(DummyVecEnv([lambda e=raw: make_env_high_throttle(next_id)]))
log(f' Connected to {name}'); return env
class ProgressCB(BaseCallback):
def __init__(self, total): super().__init__(verbose=0); self._last=0; self._total=total
def _on_step(self):
if self.num_timesteps - self._last >= 10000:
log(f' step {self.num_timesteps:,}/{self._total:,}')
self._last = self.num_timesteps
return True
log('='*60)
log(f'Exp 3: mountain_track ONLY, throttle_min={THROTTLE_MIN}, lr={LR}')
log('Hypothesis: higher min throttle gets car over the hill,')
log(' allowing it to learn the full track.')
log('='*60)
# Connect - sim is on mountain_track from last test
log('Connecting to mountain_track...')
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
raw = gym.make('donkey-mountain-track-v0')
train_env = VecTransposeImage(DummyVecEnv([lambda: make_env_high_throttle('donkey-mountain-track-v0')]))
log('Connected. Training...')
model = PPO('CnnPolicy', train_env, learning_rate=LR, verbose=1, device='cpu')
model.learn(total_timesteps=TOTAL_STEPS, callback=ProgressCB(TOTAL_STEPS),
reset_num_timesteps=True)
model.save(SAVE_PATH)
log(f'Saved: {SAVE_PATH}.zip')
train_env.close(); time.sleep(3)
# Eval on all 4 tracks
def eval_track(current_id, track_id, track_name, model_path, n=3, max_steps=2000):
log(f'\n--- EVAL: {track_name} ({n} episodes) ---')
tmp = gym.make(current_id); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
raw = gym.make(track_id)
ev = VecTransposeImage(DummyVecEnv([lambda e=raw: make_env_high_throttle(track_id)]))
m = PPO.load(model_path, env=ev, device='cpu')
for ep in range(1, n+1):
obs = ev.reset(); total, steps, done = 0.0, 0, False
while not done and steps < max_steps:
action, _ = m.predict(obs, deterministic=True)
result = ev.step(action)
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
else: obs,r,d,info=result; done=bool(d[0])
total+=float(r[0]); steps+=1
status='✅ FULL' if steps>=max_steps else f'❌ crash@{steps}'
log(f' ep{ep}: {total:.0f} reward / {steps} steps — {status}')
time.sleep(1)
ev.close(); time.sleep(3)
return track_id
current = 'donkey-mountain-track-v0'
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training track)', SAVE_PATH)
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)', SAVE_PATH)
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)', SAVE_PATH)
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)', SAVE_PATH)
log('\n=== Exp 3 COMPLETE ===')

View File

@ -0,0 +1,79 @@
import sys, os, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
from multitrack_runner import log, _send_exit_scene, StuckTerminationWrapper
from donkeycar_sb3_runner import ThrottleClampWrapper
from reward_wrapper import SpeedRewardWrapper
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from stable_baselines3.common.callbacks import BaseCallback
import gymnasium as gym
THROTTLE_MIN = 0.5
LR = 0.000725
TOTAL_STEPS = 90000
SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp5-mountain-v5reward/model'
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)
def make_env(env_id):
raw = gym.make(env_id)
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
env = SpeedRewardWrapper(env) # v5 reward
return env
def switch_to(current_id, next_id, name):
log(f'{name}...')
tmp = gym.make(current_id); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
env = VecTransposeImage(DummyVecEnv([lambda: make_env(next_id)]))
log(f' Connected to {name}'); return env
class ProgressCB(BaseCallback):
def __init__(self, total):
super().__init__(verbose=0); self._last=0; self._total=total
def _on_step(self):
if self.num_timesteps - self._last >= 10000:
log(f' step {self.num_timesteps:,}/{self._total:,}')
self._last = self.num_timesteps
return True
log('='*60)
log('Exp 5: mountain_track, v5 reward (speed×CTE), throttle_min=0.5')
log('v5 reward gives direct gradient signal for hill: slow=low reward')
log('='*60)
# Switch sim to mountain_track
log('Switching to mountain_track...')
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')]))
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
model.learn(total_timesteps=TOTAL_STEPS, callback=ProgressCB(TOTAL_STEPS),
reset_num_timesteps=True)
model.save(SAVE_PATH); log(f'Saved.')
env.close(); time.sleep(3)
def eval_track(current_id, track_id, name, n=3):
log(f'\n--- EVAL: {name} ---')
ev = switch_to(current_id, track_id, name)
m = PPO.load(SAVE_PATH, env=ev, device='cpu')
for ep in range(1, n+1):
obs = ev.reset(); total, steps, done = 0.0, 0, False
while not done and steps < 2000:
action, _ = m.predict(obs, deterministic=True)
result = ev.step(action)
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
else: obs,r,d,info=result; done=bool(d[0])
total+=float(r[0]); steps+=1
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}')
time.sleep(1)
ev.close(); time.sleep(3)
return track_id
current = 'donkey-mountain-track-v0'
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)')
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
log('\n=== Exp 5 COMPLETE ===')

View File

@ -0,0 +1,111 @@
import sys, os, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
from multitrack_runner import wrap_env, log, _send_exit_scene
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from stable_baselines3.common.callbacks import BaseCallback
import gymnasium as gym, numpy as np
LR = 0.000725
def exit_connect(current_id, next_id, name):
log(f' → Switching to {name}...')
tmp = gym.make(current_id); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
raw = gym.make(next_id)
env = VecTransposeImage(DummyVecEnv([lambda e=raw: wrap_env(e)]))
log(f' Connected to {name}'); return env
class ProgressCB(BaseCallback):
def __init__(self,total): super().__init__(verbose=0); self._last=0; self._total=total
def _on_step(self):
if self.num_timesteps - self._last >= 10000:
log(f' step {self.num_timesteps:,}/{self._total:,}')
self._last = self.num_timesteps
return True
def train(current_id, track_id, track_name, steps, save_path):
log(f'\nTRAINING on {track_name}{steps:,} steps, lr={LR}')
os.makedirs(os.path.dirname(save_path), exist_ok=True)
env = exit_connect(current_id, track_id, track_name)
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
model.learn(total_timesteps=steps, callback=ProgressCB(steps), reset_num_timesteps=True)
model.save(save_path); log(f' Saved: {save_path}.zip')
env.close(); time.sleep(3)
return track_id
def train_two_tracks(current_id, steps, switch, save_path):
"""Round-robin training: generated_track + mountain_track."""
from multitrack_runner import close_and_switch
log(f'\nTRAINING generated_track+mountain_track — {steps:,} steps, switch={switch}, lr={LR}')
os.makedirs(os.path.dirname(save_path), exist_ok=True)
TRACKS = [('generated_track','donkey-generated-track-v0'),
('mountain_track', 'donkey-mountain-track-v0')]
env = exit_connect(current_id, TRACKS[0][1], TRACKS[0][0])
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
steps_done, idx = 0, 0
while steps_done < steps:
seg = min(switch, steps - steps_done)
log(f' Segment: {TRACKS[idx][0]} | {steps_done:,}/{steps:,}')
model.learn(total_timesteps=seg, reset_num_timesteps=False)
steps_done += seg
try: model.save(save_path) # checkpoint
except: pass
if steps_done < steps:
nxt = (idx+1) % 2
env = close_and_switch(env, TRACKS[nxt][1])
model.set_env(env)
idx = nxt
model.save(save_path); log(f' Saved: {save_path}.zip')
env.close(); time.sleep(3)
return TRACKS[idx][1]
def eval_all(current_id, model_path, label):
log(f'\n{"="*60}')
log(f'EVAL: {label}')
log(f'{"="*60}')
tests = [
('generated_track','donkey-generated-track-v0'),
('mini_monaco', 'donkey-minimonaco-track-v0'),
('generated_road', 'donkey-generated-roads-v0'),
]
cur = current_id
for tname, tid in tests:
ev = exit_connect(cur, tid, tname)
m = PPO.load(model_path, env=ev, device='cpu')
log(f' --- {tname} (3 episodes) ---')
for ep in range(1,4):
obs = ev.reset(); total,steps,done = 0.0,0,False
while not done and steps < 2000:
action,_ = m.predict(obs, deterministic=True)
result = ev.step(action)
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
else: obs,r,d,info=result; done=bool(d[0])
total+=float(r[0]); steps+=1
status='✅ FULL 2000' if steps>=2000 else f'❌ crash@{steps}'
log(f' ep{ep}: {total:.0f} reward/{steps} steps — {status}')
time.sleep(1)
ev.close(); time.sleep(3)
cur = tid
return cur
# ── START ── sim is on mini_monaco
current = 'donkey-minimonaco-track-v0'
log('\n'+'#'*60)
log('EXPERIMENT 1: mountain_track ONLY, 90k steps')
log('#'*60)
SAVE1 = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp1-mountain-only/model'
current = train(current, 'donkey-mountain-track-v0', 'mountain_track', 90000, SAVE1)
current = eval_all(current, SAVE1, 'Exp 1: mountain_track only model')
log('\n'+'#'*60)
log('EXPERIMENT 2: Trial 9 REPEAT — generated_track+mountain_track, 90k, switch=6851')
log('#'*60)
SAVE2 = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp2-trial9-repeat/model'
current = train_two_tracks(current, steps=90000, switch=6851, save_path=SAVE2)
current = eval_all(current, SAVE2, 'Exp 2: Trial 9 repeat model')
log('\n'+'='*60)
log('ALL OVERNIGHT EXPERIMENTS COMPLETE')
log('='*60)

View File

@ -0,0 +1,74 @@
import sys, os, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
from multitrack_runner import wrap_env, log, _send_exit_scene
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from stable_baselines3.common.callbacks import BaseCallback
import gymnasium as gym
TRAIN_TRACK = 'donkey-generated-track-v0'
LR = 0.000725
TOTAL_STEPS = 90000
SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave5-gentrack-only/model'
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)
def exit_and_connect(current_track_id, next_track_id, next_name):
"""Exit current scene cleanly then connect to next track."""
log(f'Exiting current scene, connecting to {next_name}...')
tmp = gym.make(current_track_id)
time.sleep(2)
_send_exit_scene(tmp, verbose=False)
tmp.close()
time.sleep(5)
raw = gym.make(next_track_id)
env = VecTransposeImage(DummyVecEnv([lambda e=raw: wrap_env(e)]))
log(f'Connected to {next_name}')
return env
log('='*60)
log('Wave 5: generated_track ONLY, from scratch, lr=0.000725')
log('='*60)
# The sim is currently on generated_road — exit and switch to generated_track
env = exit_and_connect('donkey-generated-roads-v0', TRAIN_TRACK, 'generated_track')
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
log(f'Training from scratch on generated_track, {TOTAL_STEPS:,} steps...')
class ProgressCB(BaseCallback):
def __init__(self): super().__init__(verbose=0); self._last=0
def _on_step(self):
if self.num_timesteps - self._last >= 10000:
log(f' step {self.num_timesteps:,}/{TOTAL_STEPS:,}')
self._last = self.num_timesteps
return True
model.learn(total_timesteps=TOTAL_STEPS, callback=ProgressCB(), reset_num_timesteps=True)
model.save(SAVE_PATH)
log(f'Model saved.')
env.close()
time.sleep(3)
def eval_track(current_id, eval_id, track_name, n_eps=3):
log(f'\n--- EVAL: {track_name} (zero-shot) ---')
ev = exit_and_connect(current_id, eval_id, track_name)
m = PPO.load(SAVE_PATH, env=ev, device='cpu')
for ep in range(1, n_eps+1):
obs = ev.reset()
total, steps, done = 0.0, 0, False
while not done and steps < 2000:
action, _ = m.predict(obs, deterministic=True)
result = ev.step(action)
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
else: obs,r,d,info=result; done=bool(d[0])
total+=float(r[0]); steps+=1
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
log(f' ep{ep}: {total:.0f} reward / {steps} steps — {status}')
ev.close()
time.sleep(3)
return eval_id
current = eval_track(TRAIN_TRACK, 'donkey-minimonaco-track-v0', 'mini_monaco')
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road')
log('\n=== Wave 5 COMPLETE ===')