donkeycar-rl-autoresearch/agent/experiments/overnight.py

112 lines
4.7 KiB
Python

import sys, os, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
from multitrack_runner import wrap_env, log, _send_exit_scene
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from stable_baselines3.common.callbacks import BaseCallback
import gymnasium as gym, numpy as np
LR = 0.000725
def exit_connect(current_id, next_id, name):
log(f' → Switching to {name}...')
tmp = gym.make(current_id); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
raw = gym.make(next_id)
env = VecTransposeImage(DummyVecEnv([lambda e=raw: wrap_env(e)]))
log(f' Connected to {name}'); return env
class ProgressCB(BaseCallback):
def __init__(self,total): super().__init__(verbose=0); self._last=0; self._total=total
def _on_step(self):
if self.num_timesteps - self._last >= 10000:
log(f' step {self.num_timesteps:,}/{self._total:,}')
self._last = self.num_timesteps
return True
def train(current_id, track_id, track_name, steps, save_path):
log(f'\nTRAINING on {track_name}{steps:,} steps, lr={LR}')
os.makedirs(os.path.dirname(save_path), exist_ok=True)
env = exit_connect(current_id, track_id, track_name)
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
model.learn(total_timesteps=steps, callback=ProgressCB(steps), reset_num_timesteps=True)
model.save(save_path); log(f' Saved: {save_path}.zip')
env.close(); time.sleep(3)
return track_id
def train_two_tracks(current_id, steps, switch, save_path):
"""Round-robin training: generated_track + mountain_track."""
from multitrack_runner import close_and_switch
log(f'\nTRAINING generated_track+mountain_track — {steps:,} steps, switch={switch}, lr={LR}')
os.makedirs(os.path.dirname(save_path), exist_ok=True)
TRACKS = [('generated_track','donkey-generated-track-v0'),
('mountain_track', 'donkey-mountain-track-v0')]
env = exit_connect(current_id, TRACKS[0][1], TRACKS[0][0])
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
steps_done, idx = 0, 0
while steps_done < steps:
seg = min(switch, steps - steps_done)
log(f' Segment: {TRACKS[idx][0]} | {steps_done:,}/{steps:,}')
model.learn(total_timesteps=seg, reset_num_timesteps=False)
steps_done += seg
try: model.save(save_path) # checkpoint
except: pass
if steps_done < steps:
nxt = (idx+1) % 2
env = close_and_switch(env, TRACKS[nxt][1])
model.set_env(env)
idx = nxt
model.save(save_path); log(f' Saved: {save_path}.zip')
env.close(); time.sleep(3)
return TRACKS[idx][1]
def eval_all(current_id, model_path, label):
log(f'\n{"="*60}')
log(f'EVAL: {label}')
log(f'{"="*60}')
tests = [
('generated_track','donkey-generated-track-v0'),
('mini_monaco', 'donkey-minimonaco-track-v0'),
('generated_road', 'donkey-generated-roads-v0'),
]
cur = current_id
for tname, tid in tests:
ev = exit_connect(cur, tid, tname)
m = PPO.load(model_path, env=ev, device='cpu')
log(f' --- {tname} (3 episodes) ---')
for ep in range(1,4):
obs = ev.reset(); total,steps,done = 0.0,0,False
while not done and steps < 2000:
action,_ = m.predict(obs, deterministic=True)
result = ev.step(action)
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
else: obs,r,d,info=result; done=bool(d[0])
total+=float(r[0]); steps+=1
status='✅ FULL 2000' if steps>=2000 else f'❌ crash@{steps}'
log(f' ep{ep}: {total:.0f} reward/{steps} steps — {status}')
time.sleep(1)
ev.close(); time.sleep(3)
cur = tid
return cur
# ── START ── sim is on mini_monaco
current = 'donkey-minimonaco-track-v0'
log('\n'+'#'*60)
log('EXPERIMENT 1: mountain_track ONLY, 90k steps')
log('#'*60)
SAVE1 = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp1-mountain-only/model'
current = train(current, 'donkey-mountain-track-v0', 'mountain_track', 90000, SAVE1)
current = eval_all(current, SAVE1, 'Exp 1: mountain_track only model')
log('\n'+'#'*60)
log('EXPERIMENT 2: Trial 9 REPEAT — generated_track+mountain_track, 90k, switch=6851')
log('#'*60)
SAVE2 = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp2-trial9-repeat/model'
current = train_two_tracks(current, steps=90000, switch=6851, save_path=SAVE2)
current = eval_all(current, SAVE2, 'Exp 2: Trial 9 repeat model')
log('\n'+'='*60)
log('ALL OVERNIGHT EXPERIMENTS COMPLETE')
log('='*60)