111 lines
4.3 KiB
Python
111 lines
4.3 KiB
Python
"""
|
|
Exp 6: mountain_track, v5 reward, throttle_min=0.5, lr=0.000725, 90k steps
|
|
PROPERLY CHECKPOINTED - numbered saves every segment, best_model.zip kept.
|
|
|
|
This re-runs Exp5 with the checkpointing fix. The model doing 20-second
|
|
laps at step ~30k will be captured in checkpoint_0027404.zip (or similar).
|
|
"""
|
|
import sys, os, time
|
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
|
|
|
from multitrack_runner import (wrap_env, log, _send_exit_scene,
|
|
StuckTerminationWrapper, close_and_switch,
|
|
TRAINING_TRACKS, train_multitrack,
|
|
evaluate_test_tracks)
|
|
from donkeycar_sb3_runner import ThrottleClampWrapper
|
|
from reward_wrapper import SpeedRewardWrapper
|
|
from stable_baselines3 import PPO
|
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
|
import gymnasium as gym
|
|
|
|
THROTTLE_MIN = 0.5
|
|
LR = 0.000725
|
|
TOTAL_STEPS = 90000
|
|
SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp6-mountain-v5-checkpointed'
|
|
os.makedirs(SAVE_DIR, exist_ok=True)
|
|
|
|
def make_env(env_id):
|
|
raw = gym.make(env_id)
|
|
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
|
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
|
env = SpeedRewardWrapper(env)
|
|
return env
|
|
|
|
def switch_to(current_id, next_id, name):
|
|
log(f' Switching to {name}...')
|
|
tmp = gym.make(current_id); time.sleep(2)
|
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
|
env = VecTransposeImage(DummyVecEnv([lambda: make_env(next_id)]))
|
|
log(f' Connected to {name}'); return env
|
|
|
|
log('='*60)
|
|
log('Exp 6: mountain_track, v5 reward, throttle_min=0.5')
|
|
log(f' lr={LR}, steps={TOTAL_STEPS:,}')
|
|
log(f' Save dir: {SAVE_DIR}')
|
|
log(' Checkpointing: numbered saves every segment + best_model.zip')
|
|
log('='*60)
|
|
|
|
# Switch sim to mountain_track
|
|
log('Connecting to mountain_track...')
|
|
tmp = gym.make('donkey-generated-roads-v0'); time.sleep(2)
|
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
|
|
|
env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')]))
|
|
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
|
|
|
|
# ── TRAINING with full checkpointing ──
|
|
# We temporarily override TRAINING_TRACKS to mountain_track only
|
|
import multitrack_runner as mr
|
|
_orig_tracks = mr.TRAINING_TRACKS
|
|
mr.TRAINING_TRACKS = [('mountain_track', 'donkey-mountain-track-v0')]
|
|
|
|
env, segment_rewards = train_multitrack(
|
|
model, env,
|
|
total_timesteps=TOTAL_STEPS,
|
|
steps_per_switch=TOTAL_STEPS, # one long segment — no switching
|
|
save_dir=SAVE_DIR,
|
|
)
|
|
|
|
mr.TRAINING_TRACKS = _orig_tracks # restore
|
|
|
|
log(f'\nTraining complete. Files saved in {SAVE_DIR}:')
|
|
for f in sorted(os.listdir(SAVE_DIR)):
|
|
size = os.path.getsize(os.path.join(SAVE_DIR, f)) // (1024*1024)
|
|
log(f' {f} ({size}MB)')
|
|
|
|
env.close(); time.sleep(3)
|
|
|
|
# ── EVAL on all tracks using best_model ──
|
|
best_path = os.path.join(SAVE_DIR, 'best_model.zip')
|
|
log(f'\nEvaluating best_model.zip on all tracks...')
|
|
|
|
def eval_track(current_id, track_id, name, n=3):
|
|
log(f'\n--- EVAL: {name} ---')
|
|
ev = switch_to(current_id, track_id, name)
|
|
m = PPO.load(best_path, env=ev, device='cpu')
|
|
for ep in range(1, n+1):
|
|
obs = ev.reset(); total, steps, done = 0.0, 0, False
|
|
while not done and steps < 2000:
|
|
action, _ = m.predict(obs, deterministic=True)
|
|
result = ev.step(action)
|
|
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
|
else: obs,r,d,info=result; done=bool(d[0])
|
|
total+=float(r[0]); steps+=1
|
|
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
|
|
log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}')
|
|
time.sleep(1)
|
|
ev.close(); time.sleep(3)
|
|
return track_id
|
|
|
|
current = 'donkey-mountain-track-v0'
|
|
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)')
|
|
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
|
|
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
|
|
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
|
|
|
|
log('\n' + '='*60)
|
|
log('Exp 6 COMPLETE. Check results above.')
|
|
log(f'Best model: {best_path}')
|
|
log('All checkpoints saved in: ' + SAVE_DIR)
|
|
log('='*60)
|