129 lines
5.4 KiB
Python
129 lines
5.4 KiB
Python
"""
|
||
Exp 9: mountain_track, v5 reward, throttle_min=0.2
|
||
ONE VARIABLE CHANGED from Exp8: throttle_min 0.5 → 0.2
|
||
|
||
Hypothesis: v5 reward (speed × CTE) has non-zero gradient on hill.
|
||
Model can learn to output high throttle when needed even with 0.2 floor.
|
||
Full throttle range [0.2, 1.0] allows model to also slow for corners.
|
||
|
||
If this works: can drive mountain_track AND potentially mini_monaco corners.
|
||
If this fails: car stalls on hill, confirming 0.5 minimum is physically required.
|
||
"""
|
||
import sys, os, time
|
||
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||
|
||
from multitrack_runner import log, _send_exit_scene, StuckTerminationWrapper
|
||
from donkeycar_sb3_runner import ThrottleClampWrapper
|
||
from reward_wrapper import SpeedRewardWrapper
|
||
from stable_baselines3 import PPO
|
||
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
||
import gymnasium as gym, numpy as np
|
||
|
||
THROTTLE_MIN = 0.2 # ← ONLY CHANGE from Exp8
|
||
LR = 0.000725 # same
|
||
TOTAL_STEPS = 90000 # same
|
||
STEPS_PER_SEG = 6000 # same — 15 checkpoints
|
||
SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp9-mountain-v5-throttle02'
|
||
os.makedirs(SAVE_DIR, exist_ok=True)
|
||
|
||
def make_env():
|
||
raw = gym.make('donkey-mountain-track-v0')
|
||
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
||
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
||
env = SpeedRewardWrapper(env)
|
||
return env
|
||
|
||
log('='*60)
|
||
log('Exp 9: mountain_track, v5 reward, throttle_min=0.2')
|
||
log('ONE CHANGE from Exp8: throttle_min 0.5 → 0.2')
|
||
log(f' lr={LR}, total_steps={TOTAL_STEPS:,}, steps_per_seg={STEPS_PER_SEG:,}')
|
||
log(f' Hypothesis: v5 gradient non-zero on hill → model learns high throttle')
|
||
log(f' Save: {SAVE_DIR}')
|
||
log('='*60)
|
||
|
||
# Clear previous sim state
|
||
log('Clearing sim state...')
|
||
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
|
||
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||
|
||
# Single connection for entire run
|
||
env = VecTransposeImage(DummyVecEnv([make_env]))
|
||
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
|
||
log('Connected. Training begins on mountain_track with throttle_min=0.2')
|
||
log('Watch: does model get over the hill?')
|
||
|
||
best_reward = float('-inf')
|
||
steps_done, seg_num = 0, 0
|
||
|
||
while steps_done < TOTAL_STEPS:
|
||
seg_steps = min(STEPS_PER_SEG, TOTAL_STEPS - steps_done)
|
||
seg_num += 1
|
||
log(f'\n[Seg {seg_num}] steps {steps_done:,} → {steps_done+seg_steps:,}')
|
||
|
||
model.learn(total_timesteps=seg_steps, reset_num_timesteps=False)
|
||
steps_done += seg_steps
|
||
|
||
ckpt = os.path.join(SAVE_DIR, f'checkpoint_{steps_done:07d}')
|
||
model.save(ckpt)
|
||
log(f'[Seg {seg_num}] Checkpoint: {ckpt}.zip')
|
||
|
||
try:
|
||
obs = env.reset()
|
||
ep_reward, ep_steps, done = 0.0, 0, False
|
||
while not done and ep_steps < 2000:
|
||
action, _ = model.predict(obs, deterministic=True)
|
||
result = env.step(action)
|
||
if len(result)==5: obs,r,t,tr,_ = result; done=bool(t[0] or tr[0])
|
||
else: obs,r,d,_ = result; done=bool(d[0])
|
||
ep_reward += float(r[0]); ep_steps += 1
|
||
log(f'[Seg {seg_num}] Eval: {ep_reward:.1f} reward / {ep_steps} steps (deterministic)')
|
||
if ep_reward > best_reward:
|
||
best_reward = ep_reward
|
||
model.save(os.path.join(SAVE_DIR, 'best_model'))
|
||
log(f'[Seg {seg_num}] ⭐ NEW BEST: {best_reward:.1f}')
|
||
except Exception as e:
|
||
log(f'[Seg {seg_num}] Eval error: {e}')
|
||
|
||
env.close(); time.sleep(2)
|
||
log(f'\nTraining complete. Best reward: {best_reward:.1f}')
|
||
|
||
# Eval best_model on all tracks
|
||
best_path = os.path.join(SAVE_DIR, 'best_model.zip')
|
||
|
||
def eval_track(current_id, track_id, name, n=3):
|
||
log(f'\n--- EVAL: {name} ---')
|
||
tmp2 = gym.make(current_id); time.sleep(2)
|
||
_send_exit_scene(tmp2, verbose=False); tmp2.close(); time.sleep(5)
|
||
ev = VecTransposeImage(DummyVecEnv([lambda: (
|
||
SpeedRewardWrapper(StuckTerminationWrapper(
|
||
ThrottleClampWrapper(gym.make(track_id), throttle_min=THROTTLE_MIN),
|
||
80, 0.5)))]))
|
||
m = PPO.load(best_path, env=ev, device='cpu')
|
||
results = []
|
||
for ep in range(1, n+1):
|
||
obs = ev.reset(); total, steps, done = 0.0, 0, False
|
||
while not done and steps < 2000:
|
||
action, _ = m.predict(obs, deterministic=True)
|
||
result = ev.step(action)
|
||
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
||
else: obs,r,d,info=result; done=bool(d[0])
|
||
total+=float(r[0]); steps+=1
|
||
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
|
||
log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}')
|
||
results.append(steps)
|
||
time.sleep(1)
|
||
log(f' Mean steps: {np.mean(results):.0f}')
|
||
ev.close(); time.sleep(3)
|
||
return track_id
|
||
|
||
current = 'donkey-mountain-track-v0'
|
||
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)')
|
||
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
|
||
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
|
||
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
|
||
|
||
log('\n=== Exp 9 COMPLETE ===')
|
||
log(f'Compare with Exp8 best_model results:')
|
||
log(f' mountain_track: 382/529/182 (mean=364)')
|
||
log(f' mini_monaco: 154/155/104 (mean=138) ← crashed at one corner')
|