donkeycar-rl-autoresearch/agent/experiments/exp10_two_tracks.py

69 lines
2.7 KiB
Python

"""
Exp 10 (fixed): generated_track + mountain_track, v5 reward, throttle_min=0.2
Fix: pass plain wrap_env() to train_multitrack(), NOT VecTransposeImage.
SB3 applies VecTransposeImage internally. close_and_switch() then works
on all segments because env.unwrapped.viewer is accessible via gym.Wrapper.
"""
import sys, os, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
from multitrack_runner import (log, _send_exit_scene, train_multitrack,
wrap_env, TRAINING_TRACKS, THROTTLE_MIN)
from stable_baselines3 import PPO
import gymnasium as gym, numpy as np
LR = 0.000725
TOTAL_STEPS = 90000
STEPS_PER_SEG = 6000
SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp10-two-tracks'
os.makedirs(SAVE_DIR, exist_ok=True)
log('='*60)
log('Exp 10 (fixed): generated_track + mountain_track, v5, throttle_min=0.2')
log(f' Tracks: {[t[0] for t in TRAINING_TRACKS]}')
log(f' throttle_min={THROTTLE_MIN}, lr={LR}, total={TOTAL_STEPS:,}')
log(f' FIX: passing gym.Wrapper to train_multitrack (not VecTransposeImage)')
log(f' SB3 applies VecEnv wrapping internally — exit_scene works on all segments')
log('='*60)
# Clear sim state
log('Clearing sim...')
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
# Create first env as plain gym.Wrapper — SB3 wraps it internally
first_track_id = TRAINING_TRACKS[0][1] # generated_track
log(f'Connecting to {TRAINING_TRACKS[0][0]}...')
raw = gym.make(first_track_id)
first_env = wrap_env(raw) # gym.Wrapper chain — close_and_switch works on this
# PPO.__init__ calls _wrap_env() → DummyVecEnv + VecTransposeImage automatically
model = PPO('CnnPolicy', first_env, learning_rate=LR, verbose=1, device='cpu')
log(f'Model env type: {type(model.env).__name__}') # Should show VecTransposeImage
log('Connected. Starting two-track training.')
env, segment_rewards = train_multitrack(
model, first_env, # <-- gym.Wrapper, not VecTransposeImage
total_timesteps=TOTAL_STEPS,
steps_per_switch=STEPS_PER_SEG,
save_dir=SAVE_DIR,
)
env.close(); time.sleep(3)
log(f'\nTraining complete. Checkpoints in {SAVE_DIR}:')
for f in sorted(os.listdir(SAVE_DIR)):
size = os.path.getsize(os.path.join(SAVE_DIR, f)) // (1024*1024)
log(f' {f} ({size}MB)')
log('\nRunning standard 3-set eval on all tracks...')
import subprocess
subprocess.run([
'python3',
'/home/paulh/projects/donkeycar-rl-autoresearch/agent/run_eval.py',
'--model', os.path.join(SAVE_DIR, 'best_model.zip'),
'--sets', '3', '--steps', '2000'
], cwd='/home/paulh/projects/donkeycar-rl-autoresearch/agent')
log('\n=== Exp 10 COMPLETE ===')