""" Exp 10 (fixed): generated_track + mountain_track, v5 reward, throttle_min=0.2 Fix: pass plain wrap_env() to train_multitrack(), NOT VecTransposeImage. SB3 applies VecTransposeImage internally. close_and_switch() then works on all segments because env.unwrapped.viewer is accessible via gym.Wrapper. """ import sys, os, time sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') from multitrack_runner import (log, _send_exit_scene, train_multitrack, wrap_env, TRAINING_TRACKS, THROTTLE_MIN) from stable_baselines3 import PPO import gymnasium as gym, numpy as np LR = 0.000725 TOTAL_STEPS = 90000 STEPS_PER_SEG = 6000 SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp10-two-tracks' os.makedirs(SAVE_DIR, exist_ok=True) log('='*60) log('Exp 10 (fixed): generated_track + mountain_track, v5, throttle_min=0.2') log(f' Tracks: {[t[0] for t in TRAINING_TRACKS]}') log(f' throttle_min={THROTTLE_MIN}, lr={LR}, total={TOTAL_STEPS:,}') log(f' FIX: passing gym.Wrapper to train_multitrack (not VecTransposeImage)') log(f' SB3 applies VecEnv wrapping internally — exit_scene works on all segments') log('='*60) # Clear sim state log('Clearing sim...') tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2) _send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) # Create first env as plain gym.Wrapper — SB3 wraps it internally first_track_id = TRAINING_TRACKS[0][1] # generated_track log(f'Connecting to {TRAINING_TRACKS[0][0]}...') raw = gym.make(first_track_id) first_env = wrap_env(raw) # gym.Wrapper chain — close_and_switch works on this # PPO.__init__ calls _wrap_env() → DummyVecEnv + VecTransposeImage automatically model = PPO('CnnPolicy', first_env, learning_rate=LR, verbose=1, device='cpu') log(f'Model env type: {type(model.env).__name__}') # Should show VecTransposeImage log('Connected. Starting two-track training.') env, segment_rewards = train_multitrack( model, first_env, # <-- gym.Wrapper, not VecTransposeImage total_timesteps=TOTAL_STEPS, steps_per_switch=STEPS_PER_SEG, save_dir=SAVE_DIR, ) env.close(); time.sleep(3) log(f'\nTraining complete. Checkpoints in {SAVE_DIR}:') for f in sorted(os.listdir(SAVE_DIR)): size = os.path.getsize(os.path.join(SAVE_DIR, f)) // (1024*1024) log(f' {f} ({size}MB)') log('\nRunning standard 3-set eval on all tracks...') import subprocess subprocess.run([ 'python3', '/home/paulh/projects/donkeycar-rl-autoresearch/agent/run_eval.py', '--model', os.path.join(SAVE_DIR, 'best_model.zip'), '--sets', '3', '--steps', '2000' ], cwd='/home/paulh/projects/donkeycar-rl-autoresearch/agent') log('\n=== Exp 10 COMPLETE ===')