import sys, os, time sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') from multitrack_runner import wrap_env, log, _send_exit_scene from stable_baselines3 import PPO from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage from stable_baselines3.common.callbacks import BaseCallback import gymnasium as gym TRAIN_TRACK = 'donkey-generated-track-v0' LR = 0.000725 TOTAL_STEPS = 90000 SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave5-gentrack-only/model' os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True) def exit_and_connect(current_track_id, next_track_id, next_name): """Exit current scene cleanly then connect to next track.""" log(f'Exiting current scene, connecting to {next_name}...') tmp = gym.make(current_track_id) time.sleep(2) _send_exit_scene(tmp, verbose=False) tmp.close() time.sleep(5) raw = gym.make(next_track_id) env = VecTransposeImage(DummyVecEnv([lambda e=raw: wrap_env(e)])) log(f'Connected to {next_name}') return env log('='*60) log('Wave 5: generated_track ONLY, from scratch, lr=0.000725') log('='*60) # The sim is currently on generated_road — exit and switch to generated_track env = exit_and_connect('donkey-generated-roads-v0', TRAIN_TRACK, 'generated_track') model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu') log(f'Training from scratch on generated_track, {TOTAL_STEPS:,} steps...') class ProgressCB(BaseCallback): def __init__(self): super().__init__(verbose=0); self._last=0 def _on_step(self): if self.num_timesteps - self._last >= 10000: log(f' step {self.num_timesteps:,}/{TOTAL_STEPS:,}') self._last = self.num_timesteps return True model.learn(total_timesteps=TOTAL_STEPS, callback=ProgressCB(), reset_num_timesteps=True) model.save(SAVE_PATH) log(f'Model saved.') env.close() time.sleep(3) def eval_track(current_id, eval_id, track_name, n_eps=3): log(f'\n--- EVAL: {track_name} (zero-shot) ---') ev = exit_and_connect(current_id, eval_id, track_name) m = PPO.load(SAVE_PATH, env=ev, device='cpu') for ep in range(1, n_eps+1): obs = ev.reset() total, steps, done = 0.0, 0, False while not done and steps < 2000: action, _ = m.predict(obs, deterministic=True) result = ev.step(action) if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0]) else: obs,r,d,info=result; done=bool(d[0]) total+=float(r[0]); steps+=1 status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}' log(f' ep{ep}: {total:.0f} reward / {steps} steps — {status}') ev.close() time.sleep(3) return eval_id current = eval_track(TRAIN_TRACK, 'donkey-minimonaco-track-v0', 'mini_monaco') current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road') log('\n=== Wave 5 COMPLETE ===')