75 lines
2.9 KiB
Python
75 lines
2.9 KiB
Python
import sys, os, time
|
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
|
from multitrack_runner import wrap_env, log, _send_exit_scene
|
|
from stable_baselines3 import PPO
|
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
|
from stable_baselines3.common.callbacks import BaseCallback
|
|
import gymnasium as gym
|
|
|
|
TRAIN_TRACK = 'donkey-generated-track-v0'
|
|
LR = 0.000725
|
|
TOTAL_STEPS = 90000
|
|
SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave5-gentrack-only/model'
|
|
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)
|
|
|
|
def exit_and_connect(current_track_id, next_track_id, next_name):
|
|
"""Exit current scene cleanly then connect to next track."""
|
|
log(f'Exiting current scene, connecting to {next_name}...')
|
|
tmp = gym.make(current_track_id)
|
|
time.sleep(2)
|
|
_send_exit_scene(tmp, verbose=False)
|
|
tmp.close()
|
|
time.sleep(5)
|
|
raw = gym.make(next_track_id)
|
|
env = VecTransposeImage(DummyVecEnv([lambda e=raw: wrap_env(e)]))
|
|
log(f'Connected to {next_name}')
|
|
return env
|
|
|
|
log('='*60)
|
|
log('Wave 5: generated_track ONLY, from scratch, lr=0.000725')
|
|
log('='*60)
|
|
|
|
# The sim is currently on generated_road — exit and switch to generated_track
|
|
env = exit_and_connect('donkey-generated-roads-v0', TRAIN_TRACK, 'generated_track')
|
|
|
|
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
|
|
log(f'Training from scratch on generated_track, {TOTAL_STEPS:,} steps...')
|
|
|
|
class ProgressCB(BaseCallback):
|
|
def __init__(self): super().__init__(verbose=0); self._last=0
|
|
def _on_step(self):
|
|
if self.num_timesteps - self._last >= 10000:
|
|
log(f' step {self.num_timesteps:,}/{TOTAL_STEPS:,}')
|
|
self._last = self.num_timesteps
|
|
return True
|
|
|
|
model.learn(total_timesteps=TOTAL_STEPS, callback=ProgressCB(), reset_num_timesteps=True)
|
|
model.save(SAVE_PATH)
|
|
log(f'Model saved.')
|
|
env.close()
|
|
time.sleep(3)
|
|
|
|
def eval_track(current_id, eval_id, track_name, n_eps=3):
|
|
log(f'\n--- EVAL: {track_name} (zero-shot) ---')
|
|
ev = exit_and_connect(current_id, eval_id, track_name)
|
|
m = PPO.load(SAVE_PATH, env=ev, device='cpu')
|
|
for ep in range(1, n_eps+1):
|
|
obs = ev.reset()
|
|
total, steps, done = 0.0, 0, False
|
|
while not done and steps < 2000:
|
|
action, _ = m.predict(obs, deterministic=True)
|
|
result = ev.step(action)
|
|
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
|
else: obs,r,d,info=result; done=bool(d[0])
|
|
total+=float(r[0]); steps+=1
|
|
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
|
|
log(f' ep{ep}: {total:.0f} reward / {steps} steps — {status}')
|
|
ev.close()
|
|
time.sleep(3)
|
|
return eval_id
|
|
|
|
current = eval_track(TRAIN_TRACK, 'donkey-minimonaco-track-v0', 'mini_monaco')
|
|
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road')
|
|
|
|
log('\n=== Wave 5 COMPLETE ===')
|