import sys, os, time sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') from multitrack_runner import wrap_env, log, _send_exit_scene from stable_baselines3 import PPO from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage from stable_baselines3.common.callbacks import BaseCallback import gymnasium as gym, numpy as np LR = 0.000725 def exit_connect(current_id, next_id, name): log(f' → Switching to {name}...') tmp = gym.make(current_id); time.sleep(2) _send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) raw = gym.make(next_id) env = VecTransposeImage(DummyVecEnv([lambda e=raw: wrap_env(e)])) log(f' Connected to {name}'); return env class ProgressCB(BaseCallback): def __init__(self,total): super().__init__(verbose=0); self._last=0; self._total=total def _on_step(self): if self.num_timesteps - self._last >= 10000: log(f' step {self.num_timesteps:,}/{self._total:,}') self._last = self.num_timesteps return True def train(current_id, track_id, track_name, steps, save_path): log(f'\nTRAINING on {track_name} — {steps:,} steps, lr={LR}') os.makedirs(os.path.dirname(save_path), exist_ok=True) env = exit_connect(current_id, track_id, track_name) model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu') model.learn(total_timesteps=steps, callback=ProgressCB(steps), reset_num_timesteps=True) model.save(save_path); log(f' Saved: {save_path}.zip') env.close(); time.sleep(3) return track_id def train_two_tracks(current_id, steps, switch, save_path): """Round-robin training: generated_track + mountain_track.""" from multitrack_runner import close_and_switch log(f'\nTRAINING generated_track+mountain_track — {steps:,} steps, switch={switch}, lr={LR}') os.makedirs(os.path.dirname(save_path), exist_ok=True) TRACKS = [('generated_track','donkey-generated-track-v0'), ('mountain_track', 'donkey-mountain-track-v0')] env = exit_connect(current_id, TRACKS[0][1], TRACKS[0][0]) model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu') steps_done, idx = 0, 0 while steps_done < steps: seg = min(switch, steps - steps_done) log(f' Segment: {TRACKS[idx][0]} | {steps_done:,}/{steps:,}') model.learn(total_timesteps=seg, reset_num_timesteps=False) steps_done += seg try: model.save(save_path) # checkpoint except: pass if steps_done < steps: nxt = (idx+1) % 2 env = close_and_switch(env, TRACKS[nxt][1]) model.set_env(env) idx = nxt model.save(save_path); log(f' Saved: {save_path}.zip') env.close(); time.sleep(3) return TRACKS[idx][1] def eval_all(current_id, model_path, label): log(f'\n{"="*60}') log(f'EVAL: {label}') log(f'{"="*60}') tests = [ ('generated_track','donkey-generated-track-v0'), ('mini_monaco', 'donkey-minimonaco-track-v0'), ('generated_road', 'donkey-generated-roads-v0'), ] cur = current_id for tname, tid in tests: ev = exit_connect(cur, tid, tname) m = PPO.load(model_path, env=ev, device='cpu') log(f' --- {tname} (3 episodes) ---') for ep in range(1,4): obs = ev.reset(); total,steps,done = 0.0,0,False while not done and steps < 2000: action,_ = m.predict(obs, deterministic=True) result = ev.step(action) if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0]) else: obs,r,d,info=result; done=bool(d[0]) total+=float(r[0]); steps+=1 status='✅ FULL 2000' if steps>=2000 else f'❌ crash@{steps}' log(f' ep{ep}: {total:.0f} reward/{steps} steps — {status}') time.sleep(1) ev.close(); time.sleep(3) cur = tid return cur # ── START ── sim is on mini_monaco current = 'donkey-minimonaco-track-v0' log('\n'+'#'*60) log('EXPERIMENT 1: mountain_track ONLY, 90k steps') log('#'*60) SAVE1 = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp1-mountain-only/model' current = train(current, 'donkey-mountain-track-v0', 'mountain_track', 90000, SAVE1) current = eval_all(current, SAVE1, 'Exp 1: mountain_track only model') log('\n'+'#'*60) log('EXPERIMENT 2: Trial 9 REPEAT — generated_track+mountain_track, 90k, switch=6851') log('#'*60) SAVE2 = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp2-trial9-repeat/model' current = train_two_tracks(current, steps=90000, switch=6851, save_path=SAVE2) current = eval_all(current, SAVE2, 'Exp 2: Trial 9 repeat model') log('\n'+'='*60) log('ALL OVERNIGHT EXPERIMENTS COMPLETE') log('='*60)