donkeycar-rl-autoresearch/agent/experiments/mountain_continue.py

87 lines
3.8 KiB
Python

import sys, os, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
from multitrack_runner import log, _send_exit_scene
from donkeycar_sb3_runner import ThrottleClampWrapper
from reward_wrapper import SpeedRewardWrapper
from multitrack_runner import StuckTerminationWrapper
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from stable_baselines3.common.callbacks import BaseCallback
import gymnasium as gym
THROTTLE_MIN = 0.5
EXTRA_STEPS = 110000 # already did 90k, adding 110k = 200k total
BASE_MODEL = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp3-mountain-throttle05/model.zip'
SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp4-mountain-200k/model'
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)
def make_env(env_id):
raw = gym.make(env_id)
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
env = SpeedRewardWrapper(env, speed_scale=0.1)
return env
def switch_to(current_id, next_id, name):
log(f' → Switching to {name}...')
tmp = gym.make(current_id); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
raw = gym.make(next_id)
env = VecTransposeImage(DummyVecEnv([lambda: make_env(next_id)]))
log(f' Connected to {name}'); return env
class ProgressCB(BaseCallback):
def __init__(self, extra): super().__init__(verbose=0); self._last=0; self._extra=extra
def _on_step(self):
if self.num_timesteps - self._last >= 10000:
log(f' +{self.num_timesteps:,} steps (of {self._extra:,} extra)')
self._last = self.num_timesteps
return True
log('='*60)
log(f'Exp 4: CONTINUE mountain_track from exp3 — adding {EXTRA_STEPS:,} steps')
log(f' Total training will be 200,000 steps on mountain_track')
log(f' throttle_min={THROTTLE_MIN}, loading: {os.path.basename(BASE_MODEL)}')
log('='*60)
# Switch to mountain_track
log('Switching to mountain_track...')
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')]))
log(f'Loading exp3 model and continuing training...')
model = PPO.load(BASE_MODEL, env=env, device='cpu')
model.learn(total_timesteps=EXTRA_STEPS, callback=ProgressCB(EXTRA_STEPS),
reset_num_timesteps=True)
model.save(SAVE_PATH)
log(f'Saved: {SAVE_PATH}.zip')
env.close(); time.sleep(3)
def eval_track(current_id, track_id, name, n=3):
log(f'\n--- EVAL: {name} ---')
ev = switch_to(current_id, track_id, name)
m = PPO.load(SAVE_PATH, env=ev, device='cpu')
for ep in range(1, n+1):
obs = ev.reset(); total, steps, done = 0.0, 0, False
while not done and steps < 2000:
action, _ = m.predict(obs, deterministic=True)
result = ev.step(action)
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
else: obs,r,d,info=result; done=bool(d[0])
total+=float(r[0]); steps+=1
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
log(f' ep{ep}: {total:.0f} reward / {steps} steps — {status}')
time.sleep(1)
ev.close(); time.sleep(3)
return track_id
log('\nEvaluating deterministic policy on all tracks...')
current = 'donkey-mountain-track-v0'
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training track)')
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
log('\n=== Exp 4 COMPLETE ===')