save: all experiment scripts moved from /tmp to agent/experiments/
Scripts in /tmp are lost on reboot and not reproducible. All experiment scripts now committed to git with README. Exp5 script was already gone (lost before this fix). All others (Exp6-Exp10, overnight, wave5, etc.) now preserved. Rule going forward: scripts saved to agent/experiments/ and committed BEFORE running, not after. Agent: pi Tests: 102 passed Tests-Added: 0 TypeScript: N/A
This commit is contained in:
parent
de7b9bc302
commit
6e9546cd22
|
|
@ -0,0 +1,30 @@
|
||||||
|
# Experiment Scripts
|
||||||
|
|
||||||
|
These scripts were used to run individual training experiments.
|
||||||
|
Each corresponds to an entry in docs/TEST_HISTORY.md.
|
||||||
|
|
||||||
|
| Script | Experiment | Key change |
|
||||||
|
|---|---|---|
|
||||||
|
| mountain_v5.py | Exp 5 | v5 reward + throttle_min=0.5, direct model.learn() |
|
||||||
|
| mountain_continue.py | Exp 4 | Continued Exp3 training |
|
||||||
|
| mountain_high_throttle.py | Exp 3 | throttle_min=0.5, old v4 reward |
|
||||||
|
| exp6_mountain_v5_proper.py | Exp 6 | v5 + termination, wrong steps_per_switch (=total) |
|
||||||
|
| exp7_mountain_proper.py | Exp 7 | v5 + termination, correct steps_per_switch=6000, had phantom car issue |
|
||||||
|
| exp8_mountain_clean.py | Exp 8 | v5 + throttle_min=0.5, single connection, correct checkpointing |
|
||||||
|
| exp9_mountain_v5_throttle02.py | Exp 9 | v5 + throttle_min=0.2, OUR BEST MODEL |
|
||||||
|
| exp10_two_tracks.py | Exp 10 | Two tracks via custom script (abandoned — used multitrack_runner.py instead) |
|
||||||
|
| overnight.py | Overnight runs | mountain-only and Trial9-repeat experiments |
|
||||||
|
| wave5_train.py | Wave 5 | generated_track only with throttle_min=0.2 |
|
||||||
|
|
||||||
|
## Rule going forward
|
||||||
|
ALL experiment scripts must be saved here and committed to git
|
||||||
|
BEFORE running. Scripts in /tmp are lost on reboot.
|
||||||
|
|
||||||
|
## Running experiments
|
||||||
|
Use multitrack_runner.py directly for two-track training:
|
||||||
|
python3 multitrack_runner.py --total-timesteps 90000 --steps-per-switch 6000 ...
|
||||||
|
|
||||||
|
For single-track experiments, use the pattern from exp8/exp9:
|
||||||
|
- VecTransposeImage(DummyVecEnv([make_env])) for env creation
|
||||||
|
- Direct model.learn() loop with manual checkpointing
|
||||||
|
- No close_and_switch() for single track
|
||||||
|
|
@ -0,0 +1,68 @@
|
||||||
|
"""
|
||||||
|
Exp 10 (fixed): generated_track + mountain_track, v5 reward, throttle_min=0.2
|
||||||
|
|
||||||
|
Fix: pass plain wrap_env() to train_multitrack(), NOT VecTransposeImage.
|
||||||
|
SB3 applies VecTransposeImage internally. close_and_switch() then works
|
||||||
|
on all segments because env.unwrapped.viewer is accessible via gym.Wrapper.
|
||||||
|
"""
|
||||||
|
import sys, os, time
|
||||||
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
|
||||||
|
from multitrack_runner import (log, _send_exit_scene, train_multitrack,
|
||||||
|
wrap_env, TRAINING_TRACKS, THROTTLE_MIN)
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
import gymnasium as gym, numpy as np
|
||||||
|
|
||||||
|
LR = 0.000725
|
||||||
|
TOTAL_STEPS = 90000
|
||||||
|
STEPS_PER_SEG = 6000
|
||||||
|
SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp10-two-tracks'
|
||||||
|
os.makedirs(SAVE_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
log('='*60)
|
||||||
|
log('Exp 10 (fixed): generated_track + mountain_track, v5, throttle_min=0.2')
|
||||||
|
log(f' Tracks: {[t[0] for t in TRAINING_TRACKS]}')
|
||||||
|
log(f' throttle_min={THROTTLE_MIN}, lr={LR}, total={TOTAL_STEPS:,}')
|
||||||
|
log(f' FIX: passing gym.Wrapper to train_multitrack (not VecTransposeImage)')
|
||||||
|
log(f' SB3 applies VecEnv wrapping internally — exit_scene works on all segments')
|
||||||
|
log('='*60)
|
||||||
|
|
||||||
|
# Clear sim state
|
||||||
|
log('Clearing sim...')
|
||||||
|
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
|
||||||
|
# Create first env as plain gym.Wrapper — SB3 wraps it internally
|
||||||
|
first_track_id = TRAINING_TRACKS[0][1] # generated_track
|
||||||
|
log(f'Connecting to {TRAINING_TRACKS[0][0]}...')
|
||||||
|
raw = gym.make(first_track_id)
|
||||||
|
first_env = wrap_env(raw) # gym.Wrapper chain — close_and_switch works on this
|
||||||
|
|
||||||
|
# PPO.__init__ calls _wrap_env() → DummyVecEnv + VecTransposeImage automatically
|
||||||
|
model = PPO('CnnPolicy', first_env, learning_rate=LR, verbose=1, device='cpu')
|
||||||
|
log(f'Model env type: {type(model.env).__name__}') # Should show VecTransposeImage
|
||||||
|
log('Connected. Starting two-track training.')
|
||||||
|
|
||||||
|
env, segment_rewards = train_multitrack(
|
||||||
|
model, first_env, # <-- gym.Wrapper, not VecTransposeImage
|
||||||
|
total_timesteps=TOTAL_STEPS,
|
||||||
|
steps_per_switch=STEPS_PER_SEG,
|
||||||
|
save_dir=SAVE_DIR,
|
||||||
|
)
|
||||||
|
env.close(); time.sleep(3)
|
||||||
|
|
||||||
|
log(f'\nTraining complete. Checkpoints in {SAVE_DIR}:')
|
||||||
|
for f in sorted(os.listdir(SAVE_DIR)):
|
||||||
|
size = os.path.getsize(os.path.join(SAVE_DIR, f)) // (1024*1024)
|
||||||
|
log(f' {f} ({size}MB)')
|
||||||
|
|
||||||
|
log('\nRunning standard 3-set eval on all tracks...')
|
||||||
|
import subprocess
|
||||||
|
subprocess.run([
|
||||||
|
'python3',
|
||||||
|
'/home/paulh/projects/donkeycar-rl-autoresearch/agent/run_eval.py',
|
||||||
|
'--model', os.path.join(SAVE_DIR, 'best_model.zip'),
|
||||||
|
'--sets', '3', '--steps', '2000'
|
||||||
|
], cwd='/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
|
||||||
|
log('\n=== Exp 10 COMPLETE ===')
|
||||||
|
|
@ -0,0 +1,110 @@
|
||||||
|
"""
|
||||||
|
Exp 6: mountain_track, v5 reward, throttle_min=0.5, lr=0.000725, 90k steps
|
||||||
|
PROPERLY CHECKPOINTED - numbered saves every segment, best_model.zip kept.
|
||||||
|
|
||||||
|
This re-runs Exp5 with the checkpointing fix. The model doing 20-second
|
||||||
|
laps at step ~30k will be captured in checkpoint_0027404.zip (or similar).
|
||||||
|
"""
|
||||||
|
import sys, os, time
|
||||||
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
|
||||||
|
from multitrack_runner import (wrap_env, log, _send_exit_scene,
|
||||||
|
StuckTerminationWrapper, close_and_switch,
|
||||||
|
TRAINING_TRACKS, train_multitrack,
|
||||||
|
evaluate_test_tracks)
|
||||||
|
from donkeycar_sb3_runner import ThrottleClampWrapper
|
||||||
|
from reward_wrapper import SpeedRewardWrapper
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
||||||
|
import gymnasium as gym
|
||||||
|
|
||||||
|
THROTTLE_MIN = 0.5
|
||||||
|
LR = 0.000725
|
||||||
|
TOTAL_STEPS = 90000
|
||||||
|
SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp6-mountain-v5-checkpointed'
|
||||||
|
os.makedirs(SAVE_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
def make_env(env_id):
|
||||||
|
raw = gym.make(env_id)
|
||||||
|
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
||||||
|
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
||||||
|
env = SpeedRewardWrapper(env)
|
||||||
|
return env
|
||||||
|
|
||||||
|
def switch_to(current_id, next_id, name):
|
||||||
|
log(f' Switching to {name}...')
|
||||||
|
tmp = gym.make(current_id); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
env = VecTransposeImage(DummyVecEnv([lambda: make_env(next_id)]))
|
||||||
|
log(f' Connected to {name}'); return env
|
||||||
|
|
||||||
|
log('='*60)
|
||||||
|
log('Exp 6: mountain_track, v5 reward, throttle_min=0.5')
|
||||||
|
log(f' lr={LR}, steps={TOTAL_STEPS:,}')
|
||||||
|
log(f' Save dir: {SAVE_DIR}')
|
||||||
|
log(' Checkpointing: numbered saves every segment + best_model.zip')
|
||||||
|
log('='*60)
|
||||||
|
|
||||||
|
# Switch sim to mountain_track
|
||||||
|
log('Connecting to mountain_track...')
|
||||||
|
tmp = gym.make('donkey-generated-roads-v0'); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
|
||||||
|
env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')]))
|
||||||
|
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
|
||||||
|
|
||||||
|
# ── TRAINING with full checkpointing ──
|
||||||
|
# We temporarily override TRAINING_TRACKS to mountain_track only
|
||||||
|
import multitrack_runner as mr
|
||||||
|
_orig_tracks = mr.TRAINING_TRACKS
|
||||||
|
mr.TRAINING_TRACKS = [('mountain_track', 'donkey-mountain-track-v0')]
|
||||||
|
|
||||||
|
env, segment_rewards = train_multitrack(
|
||||||
|
model, env,
|
||||||
|
total_timesteps=TOTAL_STEPS,
|
||||||
|
steps_per_switch=TOTAL_STEPS, # one long segment — no switching
|
||||||
|
save_dir=SAVE_DIR,
|
||||||
|
)
|
||||||
|
|
||||||
|
mr.TRAINING_TRACKS = _orig_tracks # restore
|
||||||
|
|
||||||
|
log(f'\nTraining complete. Files saved in {SAVE_DIR}:')
|
||||||
|
for f in sorted(os.listdir(SAVE_DIR)):
|
||||||
|
size = os.path.getsize(os.path.join(SAVE_DIR, f)) // (1024*1024)
|
||||||
|
log(f' {f} ({size}MB)')
|
||||||
|
|
||||||
|
env.close(); time.sleep(3)
|
||||||
|
|
||||||
|
# ── EVAL on all tracks using best_model ──
|
||||||
|
best_path = os.path.join(SAVE_DIR, 'best_model.zip')
|
||||||
|
log(f'\nEvaluating best_model.zip on all tracks...')
|
||||||
|
|
||||||
|
def eval_track(current_id, track_id, name, n=3):
|
||||||
|
log(f'\n--- EVAL: {name} ---')
|
||||||
|
ev = switch_to(current_id, track_id, name)
|
||||||
|
m = PPO.load(best_path, env=ev, device='cpu')
|
||||||
|
for ep in range(1, n+1):
|
||||||
|
obs = ev.reset(); total, steps, done = 0.0, 0, False
|
||||||
|
while not done and steps < 2000:
|
||||||
|
action, _ = m.predict(obs, deterministic=True)
|
||||||
|
result = ev.step(action)
|
||||||
|
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
||||||
|
else: obs,r,d,info=result; done=bool(d[0])
|
||||||
|
total+=float(r[0]); steps+=1
|
||||||
|
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
|
||||||
|
log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}')
|
||||||
|
time.sleep(1)
|
||||||
|
ev.close(); time.sleep(3)
|
||||||
|
return track_id
|
||||||
|
|
||||||
|
current = 'donkey-mountain-track-v0'
|
||||||
|
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)')
|
||||||
|
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
|
||||||
|
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
|
||||||
|
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
|
||||||
|
|
||||||
|
log('\n' + '='*60)
|
||||||
|
log('Exp 6 COMPLETE. Check results above.')
|
||||||
|
log(f'Best model: {best_path}')
|
||||||
|
log('All checkpoints saved in: ' + SAVE_DIR)
|
||||||
|
log('='*60)
|
||||||
|
|
@ -0,0 +1,96 @@
|
||||||
|
"""
|
||||||
|
Exp 7: mountain_track, v5 reward, throttle_min=0.5, lr=0.000725, 90k steps
|
||||||
|
- steps_per_switch=6000 → 15 checkpoints saved across the run
|
||||||
|
- best_model.zip saved whenever a new best segment score is achieved
|
||||||
|
- Single track: TRAINING_TRACKS overridden to mountain_track only
|
||||||
|
"""
|
||||||
|
import sys, os, time
|
||||||
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
|
||||||
|
import multitrack_runner as mr
|
||||||
|
from multitrack_runner import (log, _send_exit_scene, StuckTerminationWrapper,
|
||||||
|
train_multitrack)
|
||||||
|
from donkeycar_sb3_runner import ThrottleClampWrapper
|
||||||
|
from reward_wrapper import SpeedRewardWrapper
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
||||||
|
import gymnasium as gym
|
||||||
|
|
||||||
|
THROTTLE_MIN = 0.5
|
||||||
|
LR = 0.000725
|
||||||
|
TOTAL_STEPS = 90000
|
||||||
|
STEPS_PER_SEG = 6000 # 15 checkpoints across the run, NOT 1
|
||||||
|
SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp7-mountain-checkpointed'
|
||||||
|
os.makedirs(SAVE_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
def make_env(env_id):
|
||||||
|
raw = gym.make(env_id)
|
||||||
|
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
||||||
|
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
||||||
|
env = SpeedRewardWrapper(env)
|
||||||
|
return env
|
||||||
|
|
||||||
|
log('='*60)
|
||||||
|
log('Exp 7: mountain_track ONLY, v5 reward, throttle_min=0.5')
|
||||||
|
log(f' lr={LR}, total_steps={TOTAL_STEPS:,}, steps_per_seg={STEPS_PER_SEG:,}')
|
||||||
|
log(f' Checkpoints: {TOTAL_STEPS // STEPS_PER_SEG} saves across the run')
|
||||||
|
log(f' Save dir: {SAVE_DIR}')
|
||||||
|
log('='*60)
|
||||||
|
|
||||||
|
# Switch sim to mountain_track
|
||||||
|
log('Switching to mountain_track...')
|
||||||
|
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')]))
|
||||||
|
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
|
||||||
|
|
||||||
|
# Override to single track — segments still run every 6k steps for checkpointing
|
||||||
|
_orig = mr.TRAINING_TRACKS
|
||||||
|
mr.TRAINING_TRACKS = [('mountain_track', 'donkey-mountain-track-v0')]
|
||||||
|
|
||||||
|
env, segment_rewards = train_multitrack(
|
||||||
|
model, env,
|
||||||
|
total_timesteps=TOTAL_STEPS,
|
||||||
|
steps_per_switch=STEPS_PER_SEG,
|
||||||
|
save_dir=SAVE_DIR,
|
||||||
|
)
|
||||||
|
|
||||||
|
mr.TRAINING_TRACKS = _orig
|
||||||
|
|
||||||
|
log(f'\nCheckpoints saved in {SAVE_DIR}:')
|
||||||
|
for f in sorted(os.listdir(SAVE_DIR)):
|
||||||
|
size = os.path.getsize(os.path.join(SAVE_DIR, f)) // (1024*1024)
|
||||||
|
log(f' {f} ({size}MB)')
|
||||||
|
|
||||||
|
env.close(); time.sleep(3)
|
||||||
|
|
||||||
|
# Eval best_model on all tracks
|
||||||
|
best_path = os.path.join(SAVE_DIR, 'best_model.zip')
|
||||||
|
|
||||||
|
def eval_track(current_id, track_id, name, n=3):
|
||||||
|
log(f'\n--- EVAL: {name} ---')
|
||||||
|
tmp = gym.make(current_id); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
ev = VecTransposeImage(DummyVecEnv([lambda: make_env(track_id)]))
|
||||||
|
m = PPO.load(best_path, env=ev, device='cpu')
|
||||||
|
for ep in range(1, n+1):
|
||||||
|
obs = ev.reset(); total, steps, done = 0.0, 0, False
|
||||||
|
while not done and steps < 2000:
|
||||||
|
action, _ = m.predict(obs, deterministic=True)
|
||||||
|
result = ev.step(action)
|
||||||
|
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
||||||
|
else: obs,r,d,info=result; done=bool(d[0])
|
||||||
|
total+=float(r[0]); steps+=1
|
||||||
|
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
|
||||||
|
log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}')
|
||||||
|
time.sleep(1)
|
||||||
|
ev.close(); time.sleep(3)
|
||||||
|
return track_id
|
||||||
|
|
||||||
|
current = 'donkey-mountain-track-v0'
|
||||||
|
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)')
|
||||||
|
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
|
||||||
|
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
|
||||||
|
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
|
||||||
|
|
||||||
|
log('\n=== Exp 7 COMPLETE ===')
|
||||||
|
|
@ -0,0 +1,133 @@
|
||||||
|
"""
|
||||||
|
Exp 8: mountain_track, v5 reward (speed x CTE), throttle_min=0.5
|
||||||
|
- Single TCP connection for the entire run (no disconnect/reconnect)
|
||||||
|
- Saves numbered checkpoint every 6000 steps
|
||||||
|
- Saves best_model.zip whenever a new best is found
|
||||||
|
- Circle exploit: episode terminates immediately on short lap
|
||||||
|
"""
|
||||||
|
import sys, os, time
|
||||||
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
|
||||||
|
from multitrack_runner import log, _send_exit_scene, StuckTerminationWrapper
|
||||||
|
from donkeycar_sb3_runner import ThrottleClampWrapper
|
||||||
|
from reward_wrapper import SpeedRewardWrapper
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
||||||
|
from stable_baselines3.common.utils import get_schedule_fn
|
||||||
|
import gymnasium as gym
|
||||||
|
|
||||||
|
THROTTLE_MIN = 0.5
|
||||||
|
LR = 0.000725
|
||||||
|
TOTAL_STEPS = 90000
|
||||||
|
STEPS_PER_SEG = 6000 # checkpoint frequency — NOT track switching
|
||||||
|
SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp8-mountain-clean'
|
||||||
|
os.makedirs(SAVE_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
def make_env():
|
||||||
|
raw = gym.make('donkey-mountain-track-v0')
|
||||||
|
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
||||||
|
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
||||||
|
env = SpeedRewardWrapper(env)
|
||||||
|
return env
|
||||||
|
|
||||||
|
log('='*60)
|
||||||
|
log('Exp 8: mountain_track ONLY — single connection throughout')
|
||||||
|
log(f' throttle_min={THROTTLE_MIN}, lr={LR}, total_steps={TOTAL_STEPS:,}')
|
||||||
|
log(f' Checkpoint every {STEPS_PER_SEG:,} steps ({TOTAL_STEPS//STEPS_PER_SEG} checkpoints)')
|
||||||
|
log(f' Reward: v5 (speed x CTE-quality)')
|
||||||
|
log(f' Circle fix: short lap terminates episode immediately')
|
||||||
|
log(f' NO disconnect/reconnect between chunks')
|
||||||
|
log('='*60)
|
||||||
|
|
||||||
|
# Connect ONCE — stay connected for the entire run
|
||||||
|
log('Connecting to mountain_track...')
|
||||||
|
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
|
||||||
|
env = VecTransposeImage(DummyVecEnv([make_env]))
|
||||||
|
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
|
||||||
|
log('Connected. Training begins — sim will NOT go to main menu between segments.')
|
||||||
|
log('You will see: car runs → crashes/stuck → resets to start → runs again.')
|
||||||
|
|
||||||
|
best_reward = float('-inf')
|
||||||
|
steps_done = 0
|
||||||
|
seg_num = 0
|
||||||
|
|
||||||
|
while steps_done < TOTAL_STEPS:
|
||||||
|
seg_steps = min(STEPS_PER_SEG, TOTAL_STEPS - steps_done)
|
||||||
|
seg_num += 1
|
||||||
|
log(f'\n[Seg {seg_num}] steps {steps_done:,} → {steps_done+seg_steps:,}')
|
||||||
|
|
||||||
|
model.learn(total_timesteps=seg_steps, reset_num_timesteps=False)
|
||||||
|
steps_done += seg_steps
|
||||||
|
|
||||||
|
# Numbered checkpoint — never overwritten
|
||||||
|
ckpt = os.path.join(SAVE_DIR, f'checkpoint_{steps_done:07d}')
|
||||||
|
model.save(ckpt)
|
||||||
|
log(f'[Seg {seg_num}] Checkpoint saved: {ckpt}.zip')
|
||||||
|
|
||||||
|
# Quick 1-episode deterministic eval to measure quality
|
||||||
|
try:
|
||||||
|
obs = env.reset()
|
||||||
|
ep_reward, ep_steps, done = 0.0, 0, False
|
||||||
|
while not done and ep_steps < 2000:
|
||||||
|
action, _ = model.predict(obs, deterministic=True)
|
||||||
|
result = env.step(action)
|
||||||
|
if len(result)==5: obs,r,t,tr,_ = result; done=bool(t[0] or tr[0])
|
||||||
|
else: obs,r,d,_ = result; done=bool(d[0])
|
||||||
|
ep_reward += float(r[0]); ep_steps += 1
|
||||||
|
log(f'[Seg {seg_num}] Eval: {ep_reward:.1f} reward / {ep_steps} steps (deterministic)')
|
||||||
|
|
||||||
|
if ep_reward > best_reward:
|
||||||
|
best_reward = ep_reward
|
||||||
|
best_path = os.path.join(SAVE_DIR, 'best_model')
|
||||||
|
model.save(best_path)
|
||||||
|
log(f'[Seg {seg_num}] ⭐ NEW BEST: {best_reward:.1f} → best_model.zip')
|
||||||
|
except Exception as e:
|
||||||
|
log(f'[Seg {seg_num}] Eval failed: {e}')
|
||||||
|
|
||||||
|
env.close()
|
||||||
|
time.sleep(2)
|
||||||
|
log(f'\nTraining complete. Best reward: {best_reward:.1f}')
|
||||||
|
log(f'Checkpoints: {SAVE_DIR}/')
|
||||||
|
for f in sorted(os.listdir(SAVE_DIR)):
|
||||||
|
log(f' {f}')
|
||||||
|
|
||||||
|
# Eval best model on all 4 tracks
|
||||||
|
best_path = os.path.join(SAVE_DIR, 'best_model.zip')
|
||||||
|
log(f'\nEvaluating best_model.zip on all tracks...')
|
||||||
|
|
||||||
|
def eval_track(current_id, track_id, name, n=3):
|
||||||
|
log(f'\n--- EVAL: {name} ---')
|
||||||
|
tmp2 = gym.make(current_id); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp2, verbose=False); tmp2.close(); time.sleep(5)
|
||||||
|
ev = VecTransposeImage(DummyVecEnv([lambda: (
|
||||||
|
SpeedRewardWrapper(
|
||||||
|
StuckTerminationWrapper(
|
||||||
|
ThrottleClampWrapper(gym.make(track_id), throttle_min=THROTTLE_MIN),
|
||||||
|
stuck_steps=80, min_displacement=0.5
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)]))
|
||||||
|
m = PPO.load(best_path, env=ev, device='cpu')
|
||||||
|
for ep in range(1, n+1):
|
||||||
|
obs = ev.reset(); total, steps, done = 0.0, 0, False
|
||||||
|
while not done and steps < 2000:
|
||||||
|
action, _ = m.predict(obs, deterministic=True)
|
||||||
|
result = ev.step(action)
|
||||||
|
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
||||||
|
else: obs,r,d,info=result; done=bool(d[0])
|
||||||
|
total+=float(r[0]); steps+=1
|
||||||
|
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
|
||||||
|
log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}')
|
||||||
|
time.sleep(1)
|
||||||
|
ev.close(); time.sleep(3)
|
||||||
|
return track_id
|
||||||
|
|
||||||
|
current = 'donkey-mountain-track-v0'
|
||||||
|
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)')
|
||||||
|
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
|
||||||
|
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
|
||||||
|
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
|
||||||
|
|
||||||
|
log('\n=== Exp 8 COMPLETE ===')
|
||||||
|
|
@ -0,0 +1,128 @@
|
||||||
|
"""
|
||||||
|
Exp 9: mountain_track, v5 reward, throttle_min=0.2
|
||||||
|
ONE VARIABLE CHANGED from Exp8: throttle_min 0.5 → 0.2
|
||||||
|
|
||||||
|
Hypothesis: v5 reward (speed × CTE) has non-zero gradient on hill.
|
||||||
|
Model can learn to output high throttle when needed even with 0.2 floor.
|
||||||
|
Full throttle range [0.2, 1.0] allows model to also slow for corners.
|
||||||
|
|
||||||
|
If this works: can drive mountain_track AND potentially mini_monaco corners.
|
||||||
|
If this fails: car stalls on hill, confirming 0.5 minimum is physically required.
|
||||||
|
"""
|
||||||
|
import sys, os, time
|
||||||
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
|
||||||
|
from multitrack_runner import log, _send_exit_scene, StuckTerminationWrapper
|
||||||
|
from donkeycar_sb3_runner import ThrottleClampWrapper
|
||||||
|
from reward_wrapper import SpeedRewardWrapper
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
||||||
|
import gymnasium as gym, numpy as np
|
||||||
|
|
||||||
|
THROTTLE_MIN = 0.2 # ← ONLY CHANGE from Exp8
|
||||||
|
LR = 0.000725 # same
|
||||||
|
TOTAL_STEPS = 90000 # same
|
||||||
|
STEPS_PER_SEG = 6000 # same — 15 checkpoints
|
||||||
|
SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp9-mountain-v5-throttle02'
|
||||||
|
os.makedirs(SAVE_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
def make_env():
|
||||||
|
raw = gym.make('donkey-mountain-track-v0')
|
||||||
|
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
||||||
|
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
||||||
|
env = SpeedRewardWrapper(env)
|
||||||
|
return env
|
||||||
|
|
||||||
|
log('='*60)
|
||||||
|
log('Exp 9: mountain_track, v5 reward, throttle_min=0.2')
|
||||||
|
log('ONE CHANGE from Exp8: throttle_min 0.5 → 0.2')
|
||||||
|
log(f' lr={LR}, total_steps={TOTAL_STEPS:,}, steps_per_seg={STEPS_PER_SEG:,}')
|
||||||
|
log(f' Hypothesis: v5 gradient non-zero on hill → model learns high throttle')
|
||||||
|
log(f' Save: {SAVE_DIR}')
|
||||||
|
log('='*60)
|
||||||
|
|
||||||
|
# Clear previous sim state
|
||||||
|
log('Clearing sim state...')
|
||||||
|
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
|
||||||
|
# Single connection for entire run
|
||||||
|
env = VecTransposeImage(DummyVecEnv([make_env]))
|
||||||
|
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
|
||||||
|
log('Connected. Training begins on mountain_track with throttle_min=0.2')
|
||||||
|
log('Watch: does model get over the hill?')
|
||||||
|
|
||||||
|
best_reward = float('-inf')
|
||||||
|
steps_done, seg_num = 0, 0
|
||||||
|
|
||||||
|
while steps_done < TOTAL_STEPS:
|
||||||
|
seg_steps = min(STEPS_PER_SEG, TOTAL_STEPS - steps_done)
|
||||||
|
seg_num += 1
|
||||||
|
log(f'\n[Seg {seg_num}] steps {steps_done:,} → {steps_done+seg_steps:,}')
|
||||||
|
|
||||||
|
model.learn(total_timesteps=seg_steps, reset_num_timesteps=False)
|
||||||
|
steps_done += seg_steps
|
||||||
|
|
||||||
|
ckpt = os.path.join(SAVE_DIR, f'checkpoint_{steps_done:07d}')
|
||||||
|
model.save(ckpt)
|
||||||
|
log(f'[Seg {seg_num}] Checkpoint: {ckpt}.zip')
|
||||||
|
|
||||||
|
try:
|
||||||
|
obs = env.reset()
|
||||||
|
ep_reward, ep_steps, done = 0.0, 0, False
|
||||||
|
while not done and ep_steps < 2000:
|
||||||
|
action, _ = model.predict(obs, deterministic=True)
|
||||||
|
result = env.step(action)
|
||||||
|
if len(result)==5: obs,r,t,tr,_ = result; done=bool(t[0] or tr[0])
|
||||||
|
else: obs,r,d,_ = result; done=bool(d[0])
|
||||||
|
ep_reward += float(r[0]); ep_steps += 1
|
||||||
|
log(f'[Seg {seg_num}] Eval: {ep_reward:.1f} reward / {ep_steps} steps (deterministic)')
|
||||||
|
if ep_reward > best_reward:
|
||||||
|
best_reward = ep_reward
|
||||||
|
model.save(os.path.join(SAVE_DIR, 'best_model'))
|
||||||
|
log(f'[Seg {seg_num}] ⭐ NEW BEST: {best_reward:.1f}')
|
||||||
|
except Exception as e:
|
||||||
|
log(f'[Seg {seg_num}] Eval error: {e}')
|
||||||
|
|
||||||
|
env.close(); time.sleep(2)
|
||||||
|
log(f'\nTraining complete. Best reward: {best_reward:.1f}')
|
||||||
|
|
||||||
|
# Eval best_model on all tracks
|
||||||
|
best_path = os.path.join(SAVE_DIR, 'best_model.zip')
|
||||||
|
|
||||||
|
def eval_track(current_id, track_id, name, n=3):
|
||||||
|
log(f'\n--- EVAL: {name} ---')
|
||||||
|
tmp2 = gym.make(current_id); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp2, verbose=False); tmp2.close(); time.sleep(5)
|
||||||
|
ev = VecTransposeImage(DummyVecEnv([lambda: (
|
||||||
|
SpeedRewardWrapper(StuckTerminationWrapper(
|
||||||
|
ThrottleClampWrapper(gym.make(track_id), throttle_min=THROTTLE_MIN),
|
||||||
|
80, 0.5)))]))
|
||||||
|
m = PPO.load(best_path, env=ev, device='cpu')
|
||||||
|
results = []
|
||||||
|
for ep in range(1, n+1):
|
||||||
|
obs = ev.reset(); total, steps, done = 0.0, 0, False
|
||||||
|
while not done and steps < 2000:
|
||||||
|
action, _ = m.predict(obs, deterministic=True)
|
||||||
|
result = ev.step(action)
|
||||||
|
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
||||||
|
else: obs,r,d,info=result; done=bool(d[0])
|
||||||
|
total+=float(r[0]); steps+=1
|
||||||
|
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
|
||||||
|
log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}')
|
||||||
|
results.append(steps)
|
||||||
|
time.sleep(1)
|
||||||
|
log(f' Mean steps: {np.mean(results):.0f}')
|
||||||
|
ev.close(); time.sleep(3)
|
||||||
|
return track_id
|
||||||
|
|
||||||
|
current = 'donkey-mountain-track-v0'
|
||||||
|
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)')
|
||||||
|
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
|
||||||
|
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
|
||||||
|
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
|
||||||
|
|
||||||
|
log('\n=== Exp 9 COMPLETE ===')
|
||||||
|
log(f'Compare with Exp8 best_model results:')
|
||||||
|
log(f' mountain_track: 382/529/182 (mean=364)')
|
||||||
|
log(f' mini_monaco: 154/155/104 (mean=138) ← crashed at one corner')
|
||||||
|
|
@ -0,0 +1,86 @@
|
||||||
|
import sys, os, time
|
||||||
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
from multitrack_runner import log, _send_exit_scene
|
||||||
|
from donkeycar_sb3_runner import ThrottleClampWrapper
|
||||||
|
from reward_wrapper import SpeedRewardWrapper
|
||||||
|
from multitrack_runner import StuckTerminationWrapper
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
||||||
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
|
import gymnasium as gym
|
||||||
|
|
||||||
|
THROTTLE_MIN = 0.5
|
||||||
|
EXTRA_STEPS = 110000 # already did 90k, adding 110k = 200k total
|
||||||
|
BASE_MODEL = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp3-mountain-throttle05/model.zip'
|
||||||
|
SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp4-mountain-200k/model'
|
||||||
|
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)
|
||||||
|
|
||||||
|
def make_env(env_id):
|
||||||
|
raw = gym.make(env_id)
|
||||||
|
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
||||||
|
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
||||||
|
env = SpeedRewardWrapper(env, speed_scale=0.1)
|
||||||
|
return env
|
||||||
|
|
||||||
|
def switch_to(current_id, next_id, name):
|
||||||
|
log(f' → Switching to {name}...')
|
||||||
|
tmp = gym.make(current_id); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
raw = gym.make(next_id)
|
||||||
|
env = VecTransposeImage(DummyVecEnv([lambda: make_env(next_id)]))
|
||||||
|
log(f' Connected to {name}'); return env
|
||||||
|
|
||||||
|
class ProgressCB(BaseCallback):
|
||||||
|
def __init__(self, extra): super().__init__(verbose=0); self._last=0; self._extra=extra
|
||||||
|
def _on_step(self):
|
||||||
|
if self.num_timesteps - self._last >= 10000:
|
||||||
|
log(f' +{self.num_timesteps:,} steps (of {self._extra:,} extra)')
|
||||||
|
self._last = self.num_timesteps
|
||||||
|
return True
|
||||||
|
|
||||||
|
log('='*60)
|
||||||
|
log(f'Exp 4: CONTINUE mountain_track from exp3 — adding {EXTRA_STEPS:,} steps')
|
||||||
|
log(f' Total training will be 200,000 steps on mountain_track')
|
||||||
|
log(f' throttle_min={THROTTLE_MIN}, loading: {os.path.basename(BASE_MODEL)}')
|
||||||
|
log('='*60)
|
||||||
|
|
||||||
|
# Switch to mountain_track
|
||||||
|
log('Switching to mountain_track...')
|
||||||
|
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')]))
|
||||||
|
|
||||||
|
log(f'Loading exp3 model and continuing training...')
|
||||||
|
model = PPO.load(BASE_MODEL, env=env, device='cpu')
|
||||||
|
model.learn(total_timesteps=EXTRA_STEPS, callback=ProgressCB(EXTRA_STEPS),
|
||||||
|
reset_num_timesteps=True)
|
||||||
|
model.save(SAVE_PATH)
|
||||||
|
log(f'Saved: {SAVE_PATH}.zip')
|
||||||
|
env.close(); time.sleep(3)
|
||||||
|
|
||||||
|
def eval_track(current_id, track_id, name, n=3):
|
||||||
|
log(f'\n--- EVAL: {name} ---')
|
||||||
|
ev = switch_to(current_id, track_id, name)
|
||||||
|
m = PPO.load(SAVE_PATH, env=ev, device='cpu')
|
||||||
|
for ep in range(1, n+1):
|
||||||
|
obs = ev.reset(); total, steps, done = 0.0, 0, False
|
||||||
|
while not done and steps < 2000:
|
||||||
|
action, _ = m.predict(obs, deterministic=True)
|
||||||
|
result = ev.step(action)
|
||||||
|
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
||||||
|
else: obs,r,d,info=result; done=bool(d[0])
|
||||||
|
total+=float(r[0]); steps+=1
|
||||||
|
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
|
||||||
|
log(f' ep{ep}: {total:.0f} reward / {steps} steps — {status}')
|
||||||
|
time.sleep(1)
|
||||||
|
ev.close(); time.sleep(3)
|
||||||
|
return track_id
|
||||||
|
|
||||||
|
log('\nEvaluating deterministic policy on all tracks...')
|
||||||
|
current = 'donkey-mountain-track-v0'
|
||||||
|
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training track)')
|
||||||
|
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
|
||||||
|
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
|
||||||
|
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
|
||||||
|
|
||||||
|
log('\n=== Exp 4 COMPLETE ===')
|
||||||
|
|
@ -0,0 +1,106 @@
|
||||||
|
import sys, os, time
|
||||||
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
from multitrack_runner import wrap_env, log, _send_exit_scene
|
||||||
|
from donkeycar_sb3_runner import ThrottleClampWrapper
|
||||||
|
from reward_wrapper import SpeedRewardWrapper
|
||||||
|
from multitrack_runner import StuckTerminationWrapper
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
||||||
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
|
import gymnasium as gym, numpy as np
|
||||||
|
|
||||||
|
LR = 0.000725
|
||||||
|
TOTAL_STEPS = 90000
|
||||||
|
THROTTLE_MIN = 0.5 # raised from 0.2
|
||||||
|
SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp3-mountain-throttle05/model'
|
||||||
|
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)
|
||||||
|
|
||||||
|
def make_env_high_throttle(env_id):
|
||||||
|
raw = gym.make(env_id)
|
||||||
|
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
||||||
|
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
||||||
|
env = SpeedRewardWrapper(env, speed_scale=0.1)
|
||||||
|
return env
|
||||||
|
|
||||||
|
def switch_to(current_id, next_id, name, throttle_min=THROTTLE_MIN):
|
||||||
|
log(f' → Switching to {name} (throttle_min={throttle_min})...')
|
||||||
|
tmp = gym.make(current_id); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
raw = gym.make(next_id)
|
||||||
|
env = VecTransposeImage(DummyVecEnv(
|
||||||
|
[lambda e=raw, t=throttle_min: (
|
||||||
|
lambda ee: (ThrottleClampWrapper(ee, throttle_min=t),
|
||||||
|
StuckTerminationWrapper(ThrottleClampWrapper(ee, throttle_min=t), 80, 0.5),
|
||||||
|
SpeedRewardWrapper(StuckTerminationWrapper(ThrottleClampWrapper(ee, throttle_min=t), 80, 0.5), 0.1)
|
||||||
|
)[-1]
|
||||||
|
)(e)]
|
||||||
|
))
|
||||||
|
log(f' Connected to {name}'); return env
|
||||||
|
|
||||||
|
def simple_switch(current_id, next_id, name):
|
||||||
|
log(f' → Switching to {name}...')
|
||||||
|
tmp = gym.make(current_id); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
raw = gym.make(next_id)
|
||||||
|
env = VecTransposeImage(DummyVecEnv([lambda e=raw: make_env_high_throttle(next_id)]))
|
||||||
|
log(f' Connected to {name}'); return env
|
||||||
|
|
||||||
|
class ProgressCB(BaseCallback):
|
||||||
|
def __init__(self, total): super().__init__(verbose=0); self._last=0; self._total=total
|
||||||
|
def _on_step(self):
|
||||||
|
if self.num_timesteps - self._last >= 10000:
|
||||||
|
log(f' step {self.num_timesteps:,}/{self._total:,}')
|
||||||
|
self._last = self.num_timesteps
|
||||||
|
return True
|
||||||
|
|
||||||
|
log('='*60)
|
||||||
|
log(f'Exp 3: mountain_track ONLY, throttle_min={THROTTLE_MIN}, lr={LR}')
|
||||||
|
log('Hypothesis: higher min throttle gets car over the hill,')
|
||||||
|
log(' allowing it to learn the full track.')
|
||||||
|
log('='*60)
|
||||||
|
|
||||||
|
# Connect - sim is on mountain_track from last test
|
||||||
|
log('Connecting to mountain_track...')
|
||||||
|
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
|
||||||
|
raw = gym.make('donkey-mountain-track-v0')
|
||||||
|
train_env = VecTransposeImage(DummyVecEnv([lambda: make_env_high_throttle('donkey-mountain-track-v0')]))
|
||||||
|
log('Connected. Training...')
|
||||||
|
|
||||||
|
model = PPO('CnnPolicy', train_env, learning_rate=LR, verbose=1, device='cpu')
|
||||||
|
model.learn(total_timesteps=TOTAL_STEPS, callback=ProgressCB(TOTAL_STEPS),
|
||||||
|
reset_num_timesteps=True)
|
||||||
|
model.save(SAVE_PATH)
|
||||||
|
log(f'Saved: {SAVE_PATH}.zip')
|
||||||
|
train_env.close(); time.sleep(3)
|
||||||
|
|
||||||
|
# Eval on all 4 tracks
|
||||||
|
def eval_track(current_id, track_id, track_name, model_path, n=3, max_steps=2000):
|
||||||
|
log(f'\n--- EVAL: {track_name} ({n} episodes) ---')
|
||||||
|
tmp = gym.make(current_id); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
raw = gym.make(track_id)
|
||||||
|
ev = VecTransposeImage(DummyVecEnv([lambda e=raw: make_env_high_throttle(track_id)]))
|
||||||
|
m = PPO.load(model_path, env=ev, device='cpu')
|
||||||
|
for ep in range(1, n+1):
|
||||||
|
obs = ev.reset(); total, steps, done = 0.0, 0, False
|
||||||
|
while not done and steps < max_steps:
|
||||||
|
action, _ = m.predict(obs, deterministic=True)
|
||||||
|
result = ev.step(action)
|
||||||
|
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
||||||
|
else: obs,r,d,info=result; done=bool(d[0])
|
||||||
|
total+=float(r[0]); steps+=1
|
||||||
|
status='✅ FULL' if steps>=max_steps else f'❌ crash@{steps}'
|
||||||
|
log(f' ep{ep}: {total:.0f} reward / {steps} steps — {status}')
|
||||||
|
time.sleep(1)
|
||||||
|
ev.close(); time.sleep(3)
|
||||||
|
return track_id
|
||||||
|
|
||||||
|
current = 'donkey-mountain-track-v0'
|
||||||
|
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training track)', SAVE_PATH)
|
||||||
|
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)', SAVE_PATH)
|
||||||
|
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)', SAVE_PATH)
|
||||||
|
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)', SAVE_PATH)
|
||||||
|
|
||||||
|
log('\n=== Exp 3 COMPLETE ===')
|
||||||
|
|
@ -0,0 +1,79 @@
|
||||||
|
import sys, os, time
|
||||||
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
from multitrack_runner import log, _send_exit_scene, StuckTerminationWrapper
|
||||||
|
from donkeycar_sb3_runner import ThrottleClampWrapper
|
||||||
|
from reward_wrapper import SpeedRewardWrapper
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
||||||
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
|
import gymnasium as gym
|
||||||
|
|
||||||
|
THROTTLE_MIN = 0.5
|
||||||
|
LR = 0.000725
|
||||||
|
TOTAL_STEPS = 90000
|
||||||
|
SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp5-mountain-v5reward/model'
|
||||||
|
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)
|
||||||
|
|
||||||
|
def make_env(env_id):
|
||||||
|
raw = gym.make(env_id)
|
||||||
|
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
||||||
|
env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
|
||||||
|
env = SpeedRewardWrapper(env) # v5 reward
|
||||||
|
return env
|
||||||
|
|
||||||
|
def switch_to(current_id, next_id, name):
|
||||||
|
log(f' → {name}...')
|
||||||
|
tmp = gym.make(current_id); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
env = VecTransposeImage(DummyVecEnv([lambda: make_env(next_id)]))
|
||||||
|
log(f' Connected to {name}'); return env
|
||||||
|
|
||||||
|
class ProgressCB(BaseCallback):
|
||||||
|
def __init__(self, total):
|
||||||
|
super().__init__(verbose=0); self._last=0; self._total=total
|
||||||
|
def _on_step(self):
|
||||||
|
if self.num_timesteps - self._last >= 10000:
|
||||||
|
log(f' step {self.num_timesteps:,}/{self._total:,}')
|
||||||
|
self._last = self.num_timesteps
|
||||||
|
return True
|
||||||
|
|
||||||
|
log('='*60)
|
||||||
|
log('Exp 5: mountain_track, v5 reward (speed×CTE), throttle_min=0.5')
|
||||||
|
log('v5 reward gives direct gradient signal for hill: slow=low reward')
|
||||||
|
log('='*60)
|
||||||
|
|
||||||
|
# Switch sim to mountain_track
|
||||||
|
log('Switching to mountain_track...')
|
||||||
|
tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')]))
|
||||||
|
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
|
||||||
|
model.learn(total_timesteps=TOTAL_STEPS, callback=ProgressCB(TOTAL_STEPS),
|
||||||
|
reset_num_timesteps=True)
|
||||||
|
model.save(SAVE_PATH); log(f'Saved.')
|
||||||
|
env.close(); time.sleep(3)
|
||||||
|
|
||||||
|
def eval_track(current_id, track_id, name, n=3):
|
||||||
|
log(f'\n--- EVAL: {name} ---')
|
||||||
|
ev = switch_to(current_id, track_id, name)
|
||||||
|
m = PPO.load(SAVE_PATH, env=ev, device='cpu')
|
||||||
|
for ep in range(1, n+1):
|
||||||
|
obs = ev.reset(); total, steps, done = 0.0, 0, False
|
||||||
|
while not done and steps < 2000:
|
||||||
|
action, _ = m.predict(obs, deterministic=True)
|
||||||
|
result = ev.step(action)
|
||||||
|
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
||||||
|
else: obs,r,d,info=result; done=bool(d[0])
|
||||||
|
total+=float(r[0]); steps+=1
|
||||||
|
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
|
||||||
|
log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}')
|
||||||
|
time.sleep(1)
|
||||||
|
ev.close(); time.sleep(3)
|
||||||
|
return track_id
|
||||||
|
|
||||||
|
current = 'donkey-mountain-track-v0'
|
||||||
|
current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)')
|
||||||
|
current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)')
|
||||||
|
current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)')
|
||||||
|
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)')
|
||||||
|
log('\n=== Exp 5 COMPLETE ===')
|
||||||
|
|
@ -0,0 +1,111 @@
|
||||||
|
import sys, os, time
|
||||||
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
from multitrack_runner import wrap_env, log, _send_exit_scene
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
||||||
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
|
import gymnasium as gym, numpy as np
|
||||||
|
|
||||||
|
LR = 0.000725
|
||||||
|
|
||||||
|
def exit_connect(current_id, next_id, name):
|
||||||
|
log(f' → Switching to {name}...')
|
||||||
|
tmp = gym.make(current_id); time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5)
|
||||||
|
raw = gym.make(next_id)
|
||||||
|
env = VecTransposeImage(DummyVecEnv([lambda e=raw: wrap_env(e)]))
|
||||||
|
log(f' Connected to {name}'); return env
|
||||||
|
|
||||||
|
class ProgressCB(BaseCallback):
|
||||||
|
def __init__(self,total): super().__init__(verbose=0); self._last=0; self._total=total
|
||||||
|
def _on_step(self):
|
||||||
|
if self.num_timesteps - self._last >= 10000:
|
||||||
|
log(f' step {self.num_timesteps:,}/{self._total:,}')
|
||||||
|
self._last = self.num_timesteps
|
||||||
|
return True
|
||||||
|
|
||||||
|
def train(current_id, track_id, track_name, steps, save_path):
|
||||||
|
log(f'\nTRAINING on {track_name} — {steps:,} steps, lr={LR}')
|
||||||
|
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
||||||
|
env = exit_connect(current_id, track_id, track_name)
|
||||||
|
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
|
||||||
|
model.learn(total_timesteps=steps, callback=ProgressCB(steps), reset_num_timesteps=True)
|
||||||
|
model.save(save_path); log(f' Saved: {save_path}.zip')
|
||||||
|
env.close(); time.sleep(3)
|
||||||
|
return track_id
|
||||||
|
|
||||||
|
def train_two_tracks(current_id, steps, switch, save_path):
|
||||||
|
"""Round-robin training: generated_track + mountain_track."""
|
||||||
|
from multitrack_runner import close_and_switch
|
||||||
|
log(f'\nTRAINING generated_track+mountain_track — {steps:,} steps, switch={switch}, lr={LR}')
|
||||||
|
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
||||||
|
TRACKS = [('generated_track','donkey-generated-track-v0'),
|
||||||
|
('mountain_track', 'donkey-mountain-track-v0')]
|
||||||
|
env = exit_connect(current_id, TRACKS[0][1], TRACKS[0][0])
|
||||||
|
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
|
||||||
|
steps_done, idx = 0, 0
|
||||||
|
while steps_done < steps:
|
||||||
|
seg = min(switch, steps - steps_done)
|
||||||
|
log(f' Segment: {TRACKS[idx][0]} | {steps_done:,}/{steps:,}')
|
||||||
|
model.learn(total_timesteps=seg, reset_num_timesteps=False)
|
||||||
|
steps_done += seg
|
||||||
|
try: model.save(save_path) # checkpoint
|
||||||
|
except: pass
|
||||||
|
if steps_done < steps:
|
||||||
|
nxt = (idx+1) % 2
|
||||||
|
env = close_and_switch(env, TRACKS[nxt][1])
|
||||||
|
model.set_env(env)
|
||||||
|
idx = nxt
|
||||||
|
model.save(save_path); log(f' Saved: {save_path}.zip')
|
||||||
|
env.close(); time.sleep(3)
|
||||||
|
return TRACKS[idx][1]
|
||||||
|
|
||||||
|
def eval_all(current_id, model_path, label):
|
||||||
|
log(f'\n{"="*60}')
|
||||||
|
log(f'EVAL: {label}')
|
||||||
|
log(f'{"="*60}')
|
||||||
|
tests = [
|
||||||
|
('generated_track','donkey-generated-track-v0'),
|
||||||
|
('mini_monaco', 'donkey-minimonaco-track-v0'),
|
||||||
|
('generated_road', 'donkey-generated-roads-v0'),
|
||||||
|
]
|
||||||
|
cur = current_id
|
||||||
|
for tname, tid in tests:
|
||||||
|
ev = exit_connect(cur, tid, tname)
|
||||||
|
m = PPO.load(model_path, env=ev, device='cpu')
|
||||||
|
log(f' --- {tname} (3 episodes) ---')
|
||||||
|
for ep in range(1,4):
|
||||||
|
obs = ev.reset(); total,steps,done = 0.0,0,False
|
||||||
|
while not done and steps < 2000:
|
||||||
|
action,_ = m.predict(obs, deterministic=True)
|
||||||
|
result = ev.step(action)
|
||||||
|
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
||||||
|
else: obs,r,d,info=result; done=bool(d[0])
|
||||||
|
total+=float(r[0]); steps+=1
|
||||||
|
status='✅ FULL 2000' if steps>=2000 else f'❌ crash@{steps}'
|
||||||
|
log(f' ep{ep}: {total:.0f} reward/{steps} steps — {status}')
|
||||||
|
time.sleep(1)
|
||||||
|
ev.close(); time.sleep(3)
|
||||||
|
cur = tid
|
||||||
|
return cur
|
||||||
|
|
||||||
|
# ── START ── sim is on mini_monaco
|
||||||
|
current = 'donkey-minimonaco-track-v0'
|
||||||
|
|
||||||
|
log('\n'+'#'*60)
|
||||||
|
log('EXPERIMENT 1: mountain_track ONLY, 90k steps')
|
||||||
|
log('#'*60)
|
||||||
|
SAVE1 = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp1-mountain-only/model'
|
||||||
|
current = train(current, 'donkey-mountain-track-v0', 'mountain_track', 90000, SAVE1)
|
||||||
|
current = eval_all(current, SAVE1, 'Exp 1: mountain_track only model')
|
||||||
|
|
||||||
|
log('\n'+'#'*60)
|
||||||
|
log('EXPERIMENT 2: Trial 9 REPEAT — generated_track+mountain_track, 90k, switch=6851')
|
||||||
|
log('#'*60)
|
||||||
|
SAVE2 = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp2-trial9-repeat/model'
|
||||||
|
current = train_two_tracks(current, steps=90000, switch=6851, save_path=SAVE2)
|
||||||
|
current = eval_all(current, SAVE2, 'Exp 2: Trial 9 repeat model')
|
||||||
|
|
||||||
|
log('\n'+'='*60)
|
||||||
|
log('ALL OVERNIGHT EXPERIMENTS COMPLETE')
|
||||||
|
log('='*60)
|
||||||
|
|
@ -0,0 +1,74 @@
|
||||||
|
import sys, os, time
|
||||||
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
from multitrack_runner import wrap_env, log, _send_exit_scene
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
||||||
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
|
import gymnasium as gym
|
||||||
|
|
||||||
|
TRAIN_TRACK = 'donkey-generated-track-v0'
|
||||||
|
LR = 0.000725
|
||||||
|
TOTAL_STEPS = 90000
|
||||||
|
SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave5-gentrack-only/model'
|
||||||
|
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)
|
||||||
|
|
||||||
|
def exit_and_connect(current_track_id, next_track_id, next_name):
|
||||||
|
"""Exit current scene cleanly then connect to next track."""
|
||||||
|
log(f'Exiting current scene, connecting to {next_name}...')
|
||||||
|
tmp = gym.make(current_track_id)
|
||||||
|
time.sleep(2)
|
||||||
|
_send_exit_scene(tmp, verbose=False)
|
||||||
|
tmp.close()
|
||||||
|
time.sleep(5)
|
||||||
|
raw = gym.make(next_track_id)
|
||||||
|
env = VecTransposeImage(DummyVecEnv([lambda e=raw: wrap_env(e)]))
|
||||||
|
log(f'Connected to {next_name}')
|
||||||
|
return env
|
||||||
|
|
||||||
|
log('='*60)
|
||||||
|
log('Wave 5: generated_track ONLY, from scratch, lr=0.000725')
|
||||||
|
log('='*60)
|
||||||
|
|
||||||
|
# The sim is currently on generated_road — exit and switch to generated_track
|
||||||
|
env = exit_and_connect('donkey-generated-roads-v0', TRAIN_TRACK, 'generated_track')
|
||||||
|
|
||||||
|
model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu')
|
||||||
|
log(f'Training from scratch on generated_track, {TOTAL_STEPS:,} steps...')
|
||||||
|
|
||||||
|
class ProgressCB(BaseCallback):
|
||||||
|
def __init__(self): super().__init__(verbose=0); self._last=0
|
||||||
|
def _on_step(self):
|
||||||
|
if self.num_timesteps - self._last >= 10000:
|
||||||
|
log(f' step {self.num_timesteps:,}/{TOTAL_STEPS:,}')
|
||||||
|
self._last = self.num_timesteps
|
||||||
|
return True
|
||||||
|
|
||||||
|
model.learn(total_timesteps=TOTAL_STEPS, callback=ProgressCB(), reset_num_timesteps=True)
|
||||||
|
model.save(SAVE_PATH)
|
||||||
|
log(f'Model saved.')
|
||||||
|
env.close()
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
def eval_track(current_id, eval_id, track_name, n_eps=3):
|
||||||
|
log(f'\n--- EVAL: {track_name} (zero-shot) ---')
|
||||||
|
ev = exit_and_connect(current_id, eval_id, track_name)
|
||||||
|
m = PPO.load(SAVE_PATH, env=ev, device='cpu')
|
||||||
|
for ep in range(1, n_eps+1):
|
||||||
|
obs = ev.reset()
|
||||||
|
total, steps, done = 0.0, 0, False
|
||||||
|
while not done and steps < 2000:
|
||||||
|
action, _ = m.predict(obs, deterministic=True)
|
||||||
|
result = ev.step(action)
|
||||||
|
if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0])
|
||||||
|
else: obs,r,d,info=result; done=bool(d[0])
|
||||||
|
total+=float(r[0]); steps+=1
|
||||||
|
status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}'
|
||||||
|
log(f' ep{ep}: {total:.0f} reward / {steps} steps — {status}')
|
||||||
|
ev.close()
|
||||||
|
time.sleep(3)
|
||||||
|
return eval_id
|
||||||
|
|
||||||
|
current = eval_track(TRAIN_TRACK, 'donkey-minimonaco-track-v0', 'mini_monaco')
|
||||||
|
current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road')
|
||||||
|
|
||||||
|
log('\n=== Wave 5 COMPLETE ===')
|
||||||
Loading…
Reference in New Issue