112 lines
4.7 KiB
Python
112 lines
4.7 KiB
Python
import sys, os, time
|
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
|
from multitrack_runner import wrap_env, log, _send_exit_scene
|
|
from stable_baselines3 import PPO
|
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
|
from stable_baselines3.common.callbacks import BaseCallback
|
|
import gymnasium as gym, numpy as np
|
|
|
|
LR = 0.000725
|
|
|
|
def exit_connect(current_id, next_id, name):
|
|
log(f' → Switching to {name}...')
|
|
tmp = gym.make(current_id); time.sleep(2)
|
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
|
raw = gym.make(next_id)
|
|
env = VecTransposeImage(DummyVecEnv([lambda e=raw: wrap_env(e)]))
|
|
log(f' Connected to {name}'); return env
|
|
|
|
class ProgressCB(BaseCallback):
|
|
def __init__(self,total): super().__init__(verbose=0); self._last=0; self._total=total
|
|
def _on_step(self):
|
|
if self.num_timesteps - self._last >= 10000:
|
|
log(f' step {self.num_timesteps:,}/{self._total:,}')
|
|
self._last = self.num_timesteps
|
|
return True
|
|
|
|
def train(current_id, track_id, track_name, steps, save_path):
|
|
log(f'\nTRAINING on {track_name} — {steps:,} steps, lr={LR}')
|
|
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
|
env = exit_connect(current_id, track_id, track_name)
|
|
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
|
|
model.learn(total_timesteps=steps, callback=ProgressCB(steps), reset_num_timesteps=True)
|
|
model.save(save_path); log(f' Saved: {save_path}.zip')
|
|
env.close(); time.sleep(3)
|
|
return track_id
|
|
|
|
def train_two_tracks(current_id, steps, switch, save_path):
|
|
"""Round-robin training: generated_track + mountain_track."""
|
|
from multitrack_runner import close_and_switch
|
|
log(f'\nTRAINING generated_track+mountain_track — {steps:,} steps, switch={switch}, lr={LR}')
|
|
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
|
TRACKS = [('generated_track','donkey-generated-track-v0'),
|
|
('mountain_track', 'donkey-mountain-track-v0')]
|
|
env = exit_connect(current_id, TRACKS[0][1], TRACKS[0][0])
|
|
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
|
|
steps_done, idx = 0, 0
|
|
while steps_done < steps:
|
|
seg = min(switch, steps - steps_done)
|
|
log(f' Segment: {TRACKS[idx][0]} | {steps_done:,}/{steps:,}')
|
|
model.learn(total_timesteps=seg, reset_num_timesteps=False)
|
|
steps_done += seg
|
|
try: model.save(save_path) # checkpoint
|
|
except: pass
|
|
if steps_done < steps:
|
|
nxt = (idx+1) % 2
|
|
env = close_and_switch(env, TRACKS[nxt][1])
|
|
model.set_env(env)
|
|
idx = nxt
|
|
model.save(save_path); log(f' Saved: {save_path}.zip')
|
|
env.close(); time.sleep(3)
|
|
return TRACKS[idx][1]
|
|
|
|
def eval_all(current_id, model_path, label):
|
|
log(f'\n{"="*60}')
|
|
log(f'EVAL: {label}')
|
|
log(f'{"="*60}')
|
|
tests = [
|
|
('generated_track','donkey-generated-track-v0'),
|
|
('mini_monaco', 'donkey-minimonaco-track-v0'),
|
|
('generated_road', 'donkey-generated-roads-v0'),
|
|
]
|
|
cur = current_id
|
|
for tname, tid in tests:
|
|
ev = exit_connect(cur, tid, tname)
|
|
m = PPO.load(model_path, env=ev, device='cpu')
|
|
log(f' --- {tname} (3 episodes) ---')
|
|
for ep in range(1,4):
|
|
obs = ev.reset(); total,steps,done = 0.0,0,False
|
|
while not done and steps < 2000:
|
|
action,_ = m.predict(obs, deterministic=True)
|
|
result = ev.step(action)
|
|
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
|
else: obs,r,d,info=result; done=bool(d[0])
|
|
total+=float(r[0]); steps+=1
|
|
status='✅ FULL 2000' if steps>=2000 else f'❌ crash@{steps}'
|
|
log(f' ep{ep}: {total:.0f} reward/{steps} steps — {status}')
|
|
time.sleep(1)
|
|
ev.close(); time.sleep(3)
|
|
cur = tid
|
|
return cur
|
|
|
|
# ── START ── sim is on mini_monaco
|
|
current = 'donkey-minimonaco-track-v0'
|
|
|
|
log('\n'+'#'*60)
|
|
log('EXPERIMENT 1: mountain_track ONLY, 90k steps')
|
|
log('#'*60)
|
|
SAVE1 = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp1-mountain-only/model'
|
|
current = train(current, 'donkey-mountain-track-v0', 'mountain_track', 90000, SAVE1)
|
|
current = eval_all(current, SAVE1, 'Exp 1: mountain_track only model')
|
|
|
|
log('\n'+'#'*60)
|
|
log('EXPERIMENT 2: Trial 9 REPEAT — generated_track+mountain_track, 90k, switch=6851')
|
|
log('#'*60)
|
|
SAVE2 = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp2-trial9-repeat/model'
|
|
current = train_two_tracks(current, steps=90000, switch=6851, save_path=SAVE2)
|
|
current = eval_all(current, SAVE2, 'Exp 2: Trial 9 repeat model')
|
|
|
|
log('\n'+'='*60)
|
|
log('ALL OVERNIGHT EXPERIMENTS COMPLETE')
|
|
log('='*60)
|