107 lines
4.8 KiB
Python
107 lines
4.8 KiB
Python
import sys, os, time
|
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
|
from multitrack_runner import wrap_env, log, _send_exit_scene
|
|
from donkeycar_sb3_runner import ThrottleClampWrapper
|
|
from reward_wrapper import SpeedRewardWrapper
|
|
from multitrack_runner import StuckTerminationWrapper
|
|
from stable_baselines3 import PPO
|
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
|
from stable_baselines3.common.callbacks import BaseCallback
|
|
import gymnasium as gym, numpy as np
|
|
|
|
LR = 0.000725
|
|
TOTAL_STEPS = 90000
|
|
THROTTLE_MIN = 0.5 # raised from 0.2
|
|
SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp3-mountain-throttle05/model'
|
|
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)
|
|
|
|
def make_env_high_throttle(env_id):
|
|
raw = gym.make(env_id)
|
|
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
|
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
|
env = SpeedRewardWrapper(env, speed_scale=0.1)
|
|
return env
|
|
|
|
def switch_to(current_id, next_id, name, throttle_min=THROTTLE_MIN):
|
|
log(f' → Switching to {name} (throttle_min={throttle_min})...')
|
|
tmp = gym.make(current_id); time.sleep(2)
|
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
|
raw = gym.make(next_id)
|
|
env = VecTransposeImage(DummyVecEnv(
|
|
[lambda e=raw, t=throttle_min: (
|
|
lambda ee: (ThrottleClampWrapper(ee, throttle_min=t),
|
|
StuckTerminationWrapper(ThrottleClampWrapper(ee, throttle_min=t), 80, 0.5),
|
|
SpeedRewardWrapper(StuckTerminationWrapper(ThrottleClampWrapper(ee, throttle_min=t), 80, 0.5), 0.1)
|
|
)[-1]
|
|
)(e)]
|
|
))
|
|
log(f' Connected to {name}'); return env
|
|
|
|
def simple_switch(current_id, next_id, name):
|
|
log(f' → Switching to {name}...')
|
|
tmp = gym.make(current_id); time.sleep(2)
|
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
|
raw = gym.make(next_id)
|
|
env = VecTransposeImage(DummyVecEnv([lambda e=raw: make_env_high_throttle(next_id)]))
|
|
log(f' Connected to {name}'); return env
|
|
|
|
class ProgressCB(BaseCallback):
|
|
def __init__(self, total): super().__init__(verbose=0); self._last=0; self._total=total
|
|
def _on_step(self):
|
|
if self.num_timesteps - self._last >= 10000:
|
|
log(f' step {self.num_timesteps:,}/{self._total:,}')
|
|
self._last = self.num_timesteps
|
|
return True
|
|
|
|
log('='*60)
|
|
log(f'Exp 3: mountain_track ONLY, throttle_min={THROTTLE_MIN}, lr={LR}')
|
|
log('Hypothesis: higher min throttle gets car over the hill,')
|
|
log(' allowing it to learn the full track.')
|
|
log('='*60)
|
|
|
|
# Connect - sim is on mountain_track from last test
|
|
log('Connecting to mountain_track...')
|
|
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
|
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
|
|
|
raw = gym.make('donkey-mountain-track-v0')
|
|
train_env = VecTransposeImage(DummyVecEnv([lambda: make_env_high_throttle('donkey-mountain-track-v0')]))
|
|
log('Connected. Training...')
|
|
|
|
model = PPO('CnnPolicy', train_env, learning_rate=LR, verbose=1, device='cpu')
|
|
model.learn(total_timesteps=TOTAL_STEPS, callback=ProgressCB(TOTAL_STEPS),
|
|
reset_num_timesteps=True)
|
|
model.save(SAVE_PATH)
|
|
log(f'Saved: {SAVE_PATH}.zip')
|
|
train_env.close(); time.sleep(3)
|
|
|
|
# Eval on all 4 tracks
|
|
def eval_track(current_id, track_id, track_name, model_path, n=3, max_steps=2000):
|
|
log(f'\n--- EVAL: {track_name} ({n} episodes) ---')
|
|
tmp = gym.make(current_id); time.sleep(2)
|
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
|
raw = gym.make(track_id)
|
|
ev = VecTransposeImage(DummyVecEnv([lambda e=raw: make_env_high_throttle(track_id)]))
|
|
m = PPO.load(model_path, env=ev, device='cpu')
|
|
for ep in range(1, n+1):
|
|
obs = ev.reset(); total, steps, done = 0.0, 0, False
|
|
while not done and steps < max_steps:
|
|
action, _ = m.predict(obs, deterministic=True)
|
|
result = ev.step(action)
|
|
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
|
else: obs,r,d,info=result; done=bool(d[0])
|
|
total+=float(r[0]); steps+=1
|
|
status='✅ FULL' if steps>=max_steps else f'❌ crash@{steps}'
|
|
log(f' ep{ep}: {total:.0f} reward / {steps} steps — {status}')
|
|
time.sleep(1)
|
|
ev.close(); time.sleep(3)
|
|
return track_id
|
|
|
|
current = 'donkey-mountain-track-v0'
|
|
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training track)', SAVE_PATH)
|
|
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)', SAVE_PATH)
|
|
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)', SAVE_PATH)
|
|
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)', SAVE_PATH)
|
|
|
|
log('\n=== Exp 3 COMPLETE ===')
|