diff --git a/agent/experiments/README.md b/agent/experiments/README.md new file mode 100644 index 0000000..cc9d8e0 --- /dev/null +++ b/agent/experiments/README.md @@ -0,0 +1,30 @@ +# Experiment Scripts + +These scripts were used to run individual training experiments. +Each corresponds to an entry in docs/TEST_HISTORY.md. + +| Script | Experiment | Key change | +|---|---|---| +| mountain_v5.py | Exp 5 | v5 reward + throttle_min=0.5, direct model.learn() | +| mountain_continue.py | Exp 4 | Continued Exp3 training | +| mountain_high_throttle.py | Exp 3 | throttle_min=0.5, old v4 reward | +| exp6_mountain_v5_proper.py | Exp 6 | v5 + termination, wrong steps_per_switch (=total) | +| exp7_mountain_proper.py | Exp 7 | v5 + termination, correct steps_per_switch=6000, had phantom car issue | +| exp8_mountain_clean.py | Exp 8 | v5 + throttle_min=0.5, single connection, correct checkpointing | +| exp9_mountain_v5_throttle02.py | Exp 9 | v5 + throttle_min=0.2, OUR BEST MODEL | +| exp10_two_tracks.py | Exp 10 | Two tracks via custom script (abandoned — used multitrack_runner.py instead) | +| overnight.py | Overnight runs | mountain-only and Trial9-repeat experiments | +| wave5_train.py | Wave 5 | generated_track only with throttle_min=0.2 | + +## Rule going forward +ALL experiment scripts must be saved here and committed to git +BEFORE running. Scripts in /tmp are lost on reboot. + +## Running experiments +Use multitrack_runner.py directly for two-track training: + python3 multitrack_runner.py --total-timesteps 90000 --steps-per-switch 6000 ... + +For single-track experiments, use the pattern from exp8/exp9: + - VecTransposeImage(DummyVecEnv([make_env])) for env creation + - Direct model.learn() loop with manual checkpointing + - No close_and_switch() for single track diff --git a/agent/experiments/exp10_two_tracks.py b/agent/experiments/exp10_two_tracks.py new file mode 100644 index 0000000..676d157 --- /dev/null +++ b/agent/experiments/exp10_two_tracks.py @@ -0,0 +1,68 @@ +""" +Exp 10 (fixed): generated_track + mountain_track, v5 reward, throttle_min=0.2 + +Fix: pass plain wrap_env() to train_multitrack(), NOT VecTransposeImage. +SB3 applies VecTransposeImage internally. close_and_switch() then works +on all segments because env.unwrapped.viewer is accessible via gym.Wrapper. +""" +import sys, os, time +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') + +from multitrack_runner import (log, _send_exit_scene, train_multitrack, + wrap_env, TRAINING_TRACKS, THROTTLE_MIN) +from stable_baselines3 import PPO +import gymnasium as gym, numpy as np + +LR = 0.000725 +TOTAL_STEPS = 90000 +STEPS_PER_SEG = 6000 +SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp10-two-tracks' +os.makedirs(SAVE_DIR, exist_ok=True) + +log('='*60) +log('Exp 10 (fixed): generated_track + mountain_track, v5, throttle_min=0.2') +log(f' Tracks: {[t[0] for t in TRAINING_TRACKS]}') +log(f' throttle_min={THROTTLE_MIN}, lr={LR}, total={TOTAL_STEPS:,}') +log(f' FIX: passing gym.Wrapper to train_multitrack (not VecTransposeImage)') +log(f' SB3 applies VecEnv wrapping internally — exit_scene works on all segments') +log('='*60) + +# Clear sim state +log('Clearing sim...') +tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2) +_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) + +# Create first env as plain gym.Wrapper — SB3 wraps it internally +first_track_id = TRAINING_TRACKS[0][1] # generated_track +log(f'Connecting to {TRAINING_TRACKS[0][0]}...') +raw = gym.make(first_track_id) +first_env = wrap_env(raw) # gym.Wrapper chain — close_and_switch works on this + +# PPO.__init__ calls _wrap_env() → DummyVecEnv + VecTransposeImage automatically +model = PPO('CnnPolicy', first_env, learning_rate=LR, verbose=1, device='cpu') +log(f'Model env type: {type(model.env).__name__}') # Should show VecTransposeImage +log('Connected. Starting two-track training.') + +env, segment_rewards = train_multitrack( + model, first_env, # <-- gym.Wrapper, not VecTransposeImage + total_timesteps=TOTAL_STEPS, + steps_per_switch=STEPS_PER_SEG, + save_dir=SAVE_DIR, +) +env.close(); time.sleep(3) + +log(f'\nTraining complete. Checkpoints in {SAVE_DIR}:') +for f in sorted(os.listdir(SAVE_DIR)): + size = os.path.getsize(os.path.join(SAVE_DIR, f)) // (1024*1024) + log(f' {f} ({size}MB)') + +log('\nRunning standard 3-set eval on all tracks...') +import subprocess +subprocess.run([ + 'python3', + '/home/paulh/projects/donkeycar-rl-autoresearch/agent/run_eval.py', + '--model', os.path.join(SAVE_DIR, 'best_model.zip'), + '--sets', '3', '--steps', '2000' +], cwd='/home/paulh/projects/donkeycar-rl-autoresearch/agent') + +log('\n=== Exp 10 COMPLETE ===') diff --git a/agent/experiments/exp6_mountain_v5_proper.py b/agent/experiments/exp6_mountain_v5_proper.py new file mode 100644 index 0000000..da0d0f3 --- /dev/null +++ b/agent/experiments/exp6_mountain_v5_proper.py @@ -0,0 +1,110 @@ +""" +Exp 6: mountain_track, v5 reward, throttle_min=0.5, lr=0.000725, 90k steps +PROPERLY CHECKPOINTED - numbered saves every segment, best_model.zip kept. + +This re-runs Exp5 with the checkpointing fix. The model doing 20-second +laps at step ~30k will be captured in checkpoint_0027404.zip (or similar). +""" +import sys, os, time +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') + +from multitrack_runner import (wrap_env, log, _send_exit_scene, + StuckTerminationWrapper, close_and_switch, + TRAINING_TRACKS, train_multitrack, + evaluate_test_tracks) +from donkeycar_sb3_runner import ThrottleClampWrapper +from reward_wrapper import SpeedRewardWrapper +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage +import gymnasium as gym + +THROTTLE_MIN = 0.5 +LR = 0.000725 +TOTAL_STEPS = 90000 +SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp6-mountain-v5-checkpointed' +os.makedirs(SAVE_DIR, exist_ok=True) + +def make_env(env_id): + raw = gym.make(env_id) + env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) + env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5) + env = SpeedRewardWrapper(env) + return env + +def switch_to(current_id, next_id, name): + log(f' Switching to {name}...') + tmp = gym.make(current_id); time.sleep(2) + _send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) + env = VecTransposeImage(DummyVecEnv([lambda: make_env(next_id)])) + log(f' Connected to {name}'); return env + +log('='*60) +log('Exp 6: mountain_track, v5 reward, throttle_min=0.5') +log(f' lr={LR}, steps={TOTAL_STEPS:,}') +log(f' Save dir: {SAVE_DIR}') +log(' Checkpointing: numbered saves every segment + best_model.zip') +log('='*60) + +# Switch sim to mountain_track +log('Connecting to mountain_track...') +tmp = gym.make('donkey-generated-roads-v0'); time.sleep(2) +_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) + +env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')])) +model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu') + +# ── TRAINING with full checkpointing ── +# We temporarily override TRAINING_TRACKS to mountain_track only +import multitrack_runner as mr +_orig_tracks = mr.TRAINING_TRACKS +mr.TRAINING_TRACKS = [('mountain_track', 'donkey-mountain-track-v0')] + +env, segment_rewards = train_multitrack( + model, env, + total_timesteps=TOTAL_STEPS, + steps_per_switch=TOTAL_STEPS, # one long segment — no switching + save_dir=SAVE_DIR, +) + +mr.TRAINING_TRACKS = _orig_tracks # restore + +log(f'\nTraining complete. Files saved in {SAVE_DIR}:') +for f in sorted(os.listdir(SAVE_DIR)): + size = os.path.getsize(os.path.join(SAVE_DIR, f)) // (1024*1024) + log(f' {f} ({size}MB)') + +env.close(); time.sleep(3) + +# ── EVAL on all tracks using best_model ── +best_path = os.path.join(SAVE_DIR, 'best_model.zip') +log(f'\nEvaluating best_model.zip on all tracks...') + +def eval_track(current_id, track_id, name, n=3): + log(f'\n--- EVAL: {name} ---') + ev = switch_to(current_id, track_id, name) + m = PPO.load(best_path, env=ev, device='cpu') + for ep in range(1, n+1): + obs = ev.reset(); total, steps, done = 0.0, 0, False + while not done and steps < 2000: + action, _ = m.predict(obs, deterministic=True) + result = ev.step(action) + if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0]) + else: obs,r,d,info=result; done=bool(d[0]) + total+=float(r[0]); steps+=1 + status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}' + log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}') + time.sleep(1) + ev.close(); time.sleep(3) + return track_id + +current = 'donkey-mountain-track-v0' +current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)') +current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)') +current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)') +current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)') + +log('\n' + '='*60) +log('Exp 6 COMPLETE. Check results above.') +log(f'Best model: {best_path}') +log('All checkpoints saved in: ' + SAVE_DIR) +log('='*60) diff --git a/agent/experiments/exp7_mountain_proper.py b/agent/experiments/exp7_mountain_proper.py new file mode 100644 index 0000000..17e7ccf --- /dev/null +++ b/agent/experiments/exp7_mountain_proper.py @@ -0,0 +1,96 @@ +""" +Exp 7: mountain_track, v5 reward, throttle_min=0.5, lr=0.000725, 90k steps +- steps_per_switch=6000 → 15 checkpoints saved across the run +- best_model.zip saved whenever a new best segment score is achieved +- Single track: TRAINING_TRACKS overridden to mountain_track only +""" +import sys, os, time +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') + +import multitrack_runner as mr +from multitrack_runner import (log, _send_exit_scene, StuckTerminationWrapper, + train_multitrack) +from donkeycar_sb3_runner import ThrottleClampWrapper +from reward_wrapper import SpeedRewardWrapper +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage +import gymnasium as gym + +THROTTLE_MIN = 0.5 +LR = 0.000725 +TOTAL_STEPS = 90000 +STEPS_PER_SEG = 6000 # 15 checkpoints across the run, NOT 1 +SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp7-mountain-checkpointed' +os.makedirs(SAVE_DIR, exist_ok=True) + +def make_env(env_id): + raw = gym.make(env_id) + env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) + env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5) + env = SpeedRewardWrapper(env) + return env + +log('='*60) +log('Exp 7: mountain_track ONLY, v5 reward, throttle_min=0.5') +log(f' lr={LR}, total_steps={TOTAL_STEPS:,}, steps_per_seg={STEPS_PER_SEG:,}') +log(f' Checkpoints: {TOTAL_STEPS // STEPS_PER_SEG} saves across the run') +log(f' Save dir: {SAVE_DIR}') +log('='*60) + +# Switch sim to mountain_track +log('Switching to mountain_track...') +tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2) +_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) +env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')])) +model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu') + +# Override to single track — segments still run every 6k steps for checkpointing +_orig = mr.TRAINING_TRACKS +mr.TRAINING_TRACKS = [('mountain_track', 'donkey-mountain-track-v0')] + +env, segment_rewards = train_multitrack( + model, env, + total_timesteps=TOTAL_STEPS, + steps_per_switch=STEPS_PER_SEG, + save_dir=SAVE_DIR, +) + +mr.TRAINING_TRACKS = _orig + +log(f'\nCheckpoints saved in {SAVE_DIR}:') +for f in sorted(os.listdir(SAVE_DIR)): + size = os.path.getsize(os.path.join(SAVE_DIR, f)) // (1024*1024) + log(f' {f} ({size}MB)') + +env.close(); time.sleep(3) + +# Eval best_model on all tracks +best_path = os.path.join(SAVE_DIR, 'best_model.zip') + +def eval_track(current_id, track_id, name, n=3): + log(f'\n--- EVAL: {name} ---') + tmp = gym.make(current_id); time.sleep(2) + _send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) + ev = VecTransposeImage(DummyVecEnv([lambda: make_env(track_id)])) + m = PPO.load(best_path, env=ev, device='cpu') + for ep in range(1, n+1): + obs = ev.reset(); total, steps, done = 0.0, 0, False + while not done and steps < 2000: + action, _ = m.predict(obs, deterministic=True) + result = ev.step(action) + if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0]) + else: obs,r,d,info=result; done=bool(d[0]) + total+=float(r[0]); steps+=1 + status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}' + log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}') + time.sleep(1) + ev.close(); time.sleep(3) + return track_id + +current = 'donkey-mountain-track-v0' +current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)') +current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)') +current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)') +current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)') + +log('\n=== Exp 7 COMPLETE ===') diff --git a/agent/experiments/exp8_mountain_clean.py b/agent/experiments/exp8_mountain_clean.py new file mode 100644 index 0000000..cb90823 --- /dev/null +++ b/agent/experiments/exp8_mountain_clean.py @@ -0,0 +1,133 @@ +""" +Exp 8: mountain_track, v5 reward (speed x CTE), throttle_min=0.5 +- Single TCP connection for the entire run (no disconnect/reconnect) +- Saves numbered checkpoint every 6000 steps +- Saves best_model.zip whenever a new best is found +- Circle exploit: episode terminates immediately on short lap +""" +import sys, os, time +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') + +from multitrack_runner import log, _send_exit_scene, StuckTerminationWrapper +from donkeycar_sb3_runner import ThrottleClampWrapper +from reward_wrapper import SpeedRewardWrapper +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage +from stable_baselines3.common.utils import get_schedule_fn +import gymnasium as gym + +THROTTLE_MIN = 0.5 +LR = 0.000725 +TOTAL_STEPS = 90000 +STEPS_PER_SEG = 6000 # checkpoint frequency — NOT track switching +SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp8-mountain-clean' +os.makedirs(SAVE_DIR, exist_ok=True) + +def make_env(): + raw = gym.make('donkey-mountain-track-v0') + env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) + env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5) + env = SpeedRewardWrapper(env) + return env + +log('='*60) +log('Exp 8: mountain_track ONLY — single connection throughout') +log(f' throttle_min={THROTTLE_MIN}, lr={LR}, total_steps={TOTAL_STEPS:,}') +log(f' Checkpoint every {STEPS_PER_SEG:,} steps ({TOTAL_STEPS//STEPS_PER_SEG} checkpoints)') +log(f' Reward: v5 (speed x CTE-quality)') +log(f' Circle fix: short lap terminates episode immediately') +log(f' NO disconnect/reconnect between chunks') +log('='*60) + +# Connect ONCE — stay connected for the entire run +log('Connecting to mountain_track...') +tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2) +_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) + +env = VecTransposeImage(DummyVecEnv([make_env])) +model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu') +log('Connected. Training begins — sim will NOT go to main menu between segments.') +log('You will see: car runs → crashes/stuck → resets to start → runs again.') + +best_reward = float('-inf') +steps_done = 0 +seg_num = 0 + +while steps_done < TOTAL_STEPS: + seg_steps = min(STEPS_PER_SEG, TOTAL_STEPS - steps_done) + seg_num += 1 + log(f'\n[Seg {seg_num}] steps {steps_done:,} → {steps_done+seg_steps:,}') + + model.learn(total_timesteps=seg_steps, reset_num_timesteps=False) + steps_done += seg_steps + + # Numbered checkpoint — never overwritten + ckpt = os.path.join(SAVE_DIR, f'checkpoint_{steps_done:07d}') + model.save(ckpt) + log(f'[Seg {seg_num}] Checkpoint saved: {ckpt}.zip') + + # Quick 1-episode deterministic eval to measure quality + try: + obs = env.reset() + ep_reward, ep_steps, done = 0.0, 0, False + while not done and ep_steps < 2000: + action, _ = model.predict(obs, deterministic=True) + result = env.step(action) + if len(result)==5: obs,r,t,tr,_ = result; done=bool(t[0] or tr[0]) + else: obs,r,d,_ = result; done=bool(d[0]) + ep_reward += float(r[0]); ep_steps += 1 + log(f'[Seg {seg_num}] Eval: {ep_reward:.1f} reward / {ep_steps} steps (deterministic)') + + if ep_reward > best_reward: + best_reward = ep_reward + best_path = os.path.join(SAVE_DIR, 'best_model') + model.save(best_path) + log(f'[Seg {seg_num}] ⭐ NEW BEST: {best_reward:.1f} → best_model.zip') + except Exception as e: + log(f'[Seg {seg_num}] Eval failed: {e}') + +env.close() +time.sleep(2) +log(f'\nTraining complete. Best reward: {best_reward:.1f}') +log(f'Checkpoints: {SAVE_DIR}/') +for f in sorted(os.listdir(SAVE_DIR)): + log(f' {f}') + +# Eval best model on all 4 tracks +best_path = os.path.join(SAVE_DIR, 'best_model.zip') +log(f'\nEvaluating best_model.zip on all tracks...') + +def eval_track(current_id, track_id, name, n=3): + log(f'\n--- EVAL: {name} ---') + tmp2 = gym.make(current_id); time.sleep(2) + _send_exit_scene(tmp2, verbose=False); tmp2.close(); time.sleep(5) + ev = VecTransposeImage(DummyVecEnv([lambda: ( + SpeedRewardWrapper( + StuckTerminationWrapper( + ThrottleClampWrapper(gym.make(track_id), throttle_min=THROTTLE_MIN), + stuck_steps=80, min_displacement=0.5 + ) + ) + )])) + m = PPO.load(best_path, env=ev, device='cpu') + for ep in range(1, n+1): + obs = ev.reset(); total, steps, done = 0.0, 0, False + while not done and steps < 2000: + action, _ = m.predict(obs, deterministic=True) + result = ev.step(action) + if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0]) + else: obs,r,d,info=result; done=bool(d[0]) + total+=float(r[0]); steps+=1 + status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}' + log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}') + time.sleep(1) + ev.close(); time.sleep(3) + return track_id + +current = 'donkey-mountain-track-v0' +current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)') +current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)') +current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)') +current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)') + +log('\n=== Exp 8 COMPLETE ===') diff --git a/agent/experiments/exp9_mountain_v5_throttle02.py b/agent/experiments/exp9_mountain_v5_throttle02.py new file mode 100644 index 0000000..70a9070 --- /dev/null +++ b/agent/experiments/exp9_mountain_v5_throttle02.py @@ -0,0 +1,128 @@ +""" +Exp 9: mountain_track, v5 reward, throttle_min=0.2 +ONE VARIABLE CHANGED from Exp8: throttle_min 0.5 → 0.2 + +Hypothesis: v5 reward (speed × CTE) has non-zero gradient on hill. +Model can learn to output high throttle when needed even with 0.2 floor. +Full throttle range [0.2, 1.0] allows model to also slow for corners. + +If this works: can drive mountain_track AND potentially mini_monaco corners. +If this fails: car stalls on hill, confirming 0.5 minimum is physically required. +""" +import sys, os, time +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') + +from multitrack_runner import log, _send_exit_scene, StuckTerminationWrapper +from donkeycar_sb3_runner import ThrottleClampWrapper +from reward_wrapper import SpeedRewardWrapper +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage +import gymnasium as gym, numpy as np + +THROTTLE_MIN = 0.2 # ← ONLY CHANGE from Exp8 +LR = 0.000725 # same +TOTAL_STEPS = 90000 # same +STEPS_PER_SEG = 6000 # same — 15 checkpoints +SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp9-mountain-v5-throttle02' +os.makedirs(SAVE_DIR, exist_ok=True) + +def make_env(): + raw = gym.make('donkey-mountain-track-v0') + env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) + env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5) + env = SpeedRewardWrapper(env) + return env + +log('='*60) +log('Exp 9: mountain_track, v5 reward, throttle_min=0.2') +log('ONE CHANGE from Exp8: throttle_min 0.5 → 0.2') +log(f' lr={LR}, total_steps={TOTAL_STEPS:,}, steps_per_seg={STEPS_PER_SEG:,}') +log(f' Hypothesis: v5 gradient non-zero on hill → model learns high throttle') +log(f' Save: {SAVE_DIR}') +log('='*60) + +# Clear previous sim state +log('Clearing sim state...') +tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2) +_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) + +# Single connection for entire run +env = VecTransposeImage(DummyVecEnv([make_env])) +model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu') +log('Connected. Training begins on mountain_track with throttle_min=0.2') +log('Watch: does model get over the hill?') + +best_reward = float('-inf') +steps_done, seg_num = 0, 0 + +while steps_done < TOTAL_STEPS: + seg_steps = min(STEPS_PER_SEG, TOTAL_STEPS - steps_done) + seg_num += 1 + log(f'\n[Seg {seg_num}] steps {steps_done:,} → {steps_done+seg_steps:,}') + + model.learn(total_timesteps=seg_steps, reset_num_timesteps=False) + steps_done += seg_steps + + ckpt = os.path.join(SAVE_DIR, f'checkpoint_{steps_done:07d}') + model.save(ckpt) + log(f'[Seg {seg_num}] Checkpoint: {ckpt}.zip') + + try: + obs = env.reset() + ep_reward, ep_steps, done = 0.0, 0, False + while not done and ep_steps < 2000: + action, _ = model.predict(obs, deterministic=True) + result = env.step(action) + if len(result)==5: obs,r,t,tr,_ = result; done=bool(t[0] or tr[0]) + else: obs,r,d,_ = result; done=bool(d[0]) + ep_reward += float(r[0]); ep_steps += 1 + log(f'[Seg {seg_num}] Eval: {ep_reward:.1f} reward / {ep_steps} steps (deterministic)') + if ep_reward > best_reward: + best_reward = ep_reward + model.save(os.path.join(SAVE_DIR, 'best_model')) + log(f'[Seg {seg_num}] ⭐ NEW BEST: {best_reward:.1f}') + except Exception as e: + log(f'[Seg {seg_num}] Eval error: {e}') + +env.close(); time.sleep(2) +log(f'\nTraining complete. Best reward: {best_reward:.1f}') + +# Eval best_model on all tracks +best_path = os.path.join(SAVE_DIR, 'best_model.zip') + +def eval_track(current_id, track_id, name, n=3): + log(f'\n--- EVAL: {name} ---') + tmp2 = gym.make(current_id); time.sleep(2) + _send_exit_scene(tmp2, verbose=False); tmp2.close(); time.sleep(5) + ev = VecTransposeImage(DummyVecEnv([lambda: ( + SpeedRewardWrapper(StuckTerminationWrapper( + ThrottleClampWrapper(gym.make(track_id), throttle_min=THROTTLE_MIN), + 80, 0.5)))])) + m = PPO.load(best_path, env=ev, device='cpu') + results = [] + for ep in range(1, n+1): + obs = ev.reset(); total, steps, done = 0.0, 0, False + while not done and steps < 2000: + action, _ = m.predict(obs, deterministic=True) + result = ev.step(action) + if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0]) + else: obs,r,d,info=result; done=bool(d[0]) + total+=float(r[0]); steps+=1 + status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}' + log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}') + results.append(steps) + time.sleep(1) + log(f' Mean steps: {np.mean(results):.0f}') + ev.close(); time.sleep(3) + return track_id + +current = 'donkey-mountain-track-v0' +current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)') +current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)') +current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)') +current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)') + +log('\n=== Exp 9 COMPLETE ===') +log(f'Compare with Exp8 best_model results:') +log(f' mountain_track: 382/529/182 (mean=364)') +log(f' mini_monaco: 154/155/104 (mean=138) ← crashed at one corner') diff --git a/agent/experiments/mountain_continue.py b/agent/experiments/mountain_continue.py new file mode 100644 index 0000000..af6bfd5 --- /dev/null +++ b/agent/experiments/mountain_continue.py @@ -0,0 +1,86 @@ +import sys, os, time +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') +from multitrack_runner import log, _send_exit_scene +from donkeycar_sb3_runner import ThrottleClampWrapper +from reward_wrapper import SpeedRewardWrapper +from multitrack_runner import StuckTerminationWrapper +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage +from stable_baselines3.common.callbacks import BaseCallback +import gymnasium as gym + +THROTTLE_MIN = 0.5 +EXTRA_STEPS = 110000 # already did 90k, adding 110k = 200k total +BASE_MODEL = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp3-mountain-throttle05/model.zip' +SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp4-mountain-200k/model' +os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True) + +def make_env(env_id): + raw = gym.make(env_id) + env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) + env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5) + env = SpeedRewardWrapper(env, speed_scale=0.1) + return env + +def switch_to(current_id, next_id, name): + log(f' → Switching to {name}...') + tmp = gym.make(current_id); time.sleep(2) + _send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) + raw = gym.make(next_id) + env = VecTransposeImage(DummyVecEnv([lambda: make_env(next_id)])) + log(f' Connected to {name}'); return env + +class ProgressCB(BaseCallback): + def __init__(self, extra): super().__init__(verbose=0); self._last=0; self._extra=extra + def _on_step(self): + if self.num_timesteps - self._last >= 10000: + log(f' +{self.num_timesteps:,} steps (of {self._extra:,} extra)') + self._last = self.num_timesteps + return True + +log('='*60) +log(f'Exp 4: CONTINUE mountain_track from exp3 — adding {EXTRA_STEPS:,} steps') +log(f' Total training will be 200,000 steps on mountain_track') +log(f' throttle_min={THROTTLE_MIN}, loading: {os.path.basename(BASE_MODEL)}') +log('='*60) + +# Switch to mountain_track +log('Switching to mountain_track...') +tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2) +_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) +env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')])) + +log(f'Loading exp3 model and continuing training...') +model = PPO.load(BASE_MODEL, env=env, device='cpu') +model.learn(total_timesteps=EXTRA_STEPS, callback=ProgressCB(EXTRA_STEPS), + reset_num_timesteps=True) +model.save(SAVE_PATH) +log(f'Saved: {SAVE_PATH}.zip') +env.close(); time.sleep(3) + +def eval_track(current_id, track_id, name, n=3): + log(f'\n--- EVAL: {name} ---') + ev = switch_to(current_id, track_id, name) + m = PPO.load(SAVE_PATH, env=ev, device='cpu') + for ep in range(1, n+1): + obs = ev.reset(); total, steps, done = 0.0, 0, False + while not done and steps < 2000: + action, _ = m.predict(obs, deterministic=True) + result = ev.step(action) + if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0]) + else: obs,r,d,info=result; done=bool(d[0]) + total+=float(r[0]); steps+=1 + status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}' + log(f' ep{ep}: {total:.0f} reward / {steps} steps — {status}') + time.sleep(1) + ev.close(); time.sleep(3) + return track_id + +log('\nEvaluating deterministic policy on all tracks...') +current = 'donkey-mountain-track-v0' +current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training track)') +current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)') +current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)') +current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)') + +log('\n=== Exp 4 COMPLETE ===') diff --git a/agent/experiments/mountain_high_throttle.py b/agent/experiments/mountain_high_throttle.py new file mode 100644 index 0000000..be218dc --- /dev/null +++ b/agent/experiments/mountain_high_throttle.py @@ -0,0 +1,106 @@ +import sys, os, time +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') +from multitrack_runner import wrap_env, log, _send_exit_scene +from donkeycar_sb3_runner import ThrottleClampWrapper +from reward_wrapper import SpeedRewardWrapper +from multitrack_runner import StuckTerminationWrapper +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage +from stable_baselines3.common.callbacks import BaseCallback +import gymnasium as gym, numpy as np + +LR = 0.000725 +TOTAL_STEPS = 90000 +THROTTLE_MIN = 0.5 # raised from 0.2 +SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp3-mountain-throttle05/model' +os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True) + +def make_env_high_throttle(env_id): + raw = gym.make(env_id) + env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) + env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5) + env = SpeedRewardWrapper(env, speed_scale=0.1) + return env + +def switch_to(current_id, next_id, name, throttle_min=THROTTLE_MIN): + log(f' → Switching to {name} (throttle_min={throttle_min})...') + tmp = gym.make(current_id); time.sleep(2) + _send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) + raw = gym.make(next_id) + env = VecTransposeImage(DummyVecEnv( + [lambda e=raw, t=throttle_min: ( + lambda ee: (ThrottleClampWrapper(ee, throttle_min=t), + StuckTerminationWrapper(ThrottleClampWrapper(ee, throttle_min=t), 80, 0.5), + SpeedRewardWrapper(StuckTerminationWrapper(ThrottleClampWrapper(ee, throttle_min=t), 80, 0.5), 0.1) + )[-1] + )(e)] + )) + log(f' Connected to {name}'); return env + +def simple_switch(current_id, next_id, name): + log(f' → Switching to {name}...') + tmp = gym.make(current_id); time.sleep(2) + _send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) + raw = gym.make(next_id) + env = VecTransposeImage(DummyVecEnv([lambda e=raw: make_env_high_throttle(next_id)])) + log(f' Connected to {name}'); return env + +class ProgressCB(BaseCallback): + def __init__(self, total): super().__init__(verbose=0); self._last=0; self._total=total + def _on_step(self): + if self.num_timesteps - self._last >= 10000: + log(f' step {self.num_timesteps:,}/{self._total:,}') + self._last = self.num_timesteps + return True + +log('='*60) +log(f'Exp 3: mountain_track ONLY, throttle_min={THROTTLE_MIN}, lr={LR}') +log('Hypothesis: higher min throttle gets car over the hill,') +log(' allowing it to learn the full track.') +log('='*60) + +# Connect - sim is on mountain_track from last test +log('Connecting to mountain_track...') +tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2) +_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) + +raw = gym.make('donkey-mountain-track-v0') +train_env = VecTransposeImage(DummyVecEnv([lambda: make_env_high_throttle('donkey-mountain-track-v0')])) +log('Connected. Training...') + +model = PPO('CnnPolicy', train_env, learning_rate=LR, verbose=1, device='cpu') +model.learn(total_timesteps=TOTAL_STEPS, callback=ProgressCB(TOTAL_STEPS), + reset_num_timesteps=True) +model.save(SAVE_PATH) +log(f'Saved: {SAVE_PATH}.zip') +train_env.close(); time.sleep(3) + +# Eval on all 4 tracks +def eval_track(current_id, track_id, track_name, model_path, n=3, max_steps=2000): + log(f'\n--- EVAL: {track_name} ({n} episodes) ---') + tmp = gym.make(current_id); time.sleep(2) + _send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) + raw = gym.make(track_id) + ev = VecTransposeImage(DummyVecEnv([lambda e=raw: make_env_high_throttle(track_id)])) + m = PPO.load(model_path, env=ev, device='cpu') + for ep in range(1, n+1): + obs = ev.reset(); total, steps, done = 0.0, 0, False + while not done and steps < max_steps: + action, _ = m.predict(obs, deterministic=True) + result = ev.step(action) + if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0]) + else: obs,r,d,info=result; done=bool(d[0]) + total+=float(r[0]); steps+=1 + status='✅ FULL' if steps>=max_steps else f'❌ crash@{steps}' + log(f' ep{ep}: {total:.0f} reward / {steps} steps — {status}') + time.sleep(1) + ev.close(); time.sleep(3) + return track_id + +current = 'donkey-mountain-track-v0' +current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training track)', SAVE_PATH) +current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)', SAVE_PATH) +current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)', SAVE_PATH) +current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)', SAVE_PATH) + +log('\n=== Exp 3 COMPLETE ===') diff --git a/agent/experiments/mountain_v5.py b/agent/experiments/mountain_v5.py new file mode 100644 index 0000000..eabd489 --- /dev/null +++ b/agent/experiments/mountain_v5.py @@ -0,0 +1,79 @@ +import sys, os, time +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') +from multitrack_runner import log, _send_exit_scene, StuckTerminationWrapper +from donkeycar_sb3_runner import ThrottleClampWrapper +from reward_wrapper import SpeedRewardWrapper +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage +from stable_baselines3.common.callbacks import BaseCallback +import gymnasium as gym + +THROTTLE_MIN = 0.5 +LR = 0.000725 +TOTAL_STEPS = 90000 +SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp5-mountain-v5reward/model' +os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True) + +def make_env(env_id): + raw = gym.make(env_id) + env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) + env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5) + env = SpeedRewardWrapper(env) # v5 reward + return env + +def switch_to(current_id, next_id, name): + log(f' → {name}...') + tmp = gym.make(current_id); time.sleep(2) + _send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) + env = VecTransposeImage(DummyVecEnv([lambda: make_env(next_id)])) + log(f' Connected to {name}'); return env + +class ProgressCB(BaseCallback): + def __init__(self, total): + super().__init__(verbose=0); self._last=0; self._total=total + def _on_step(self): + if self.num_timesteps - self._last >= 10000: + log(f' step {self.num_timesteps:,}/{self._total:,}') + self._last = self.num_timesteps + return True + +log('='*60) +log('Exp 5: mountain_track, v5 reward (speed×CTE), throttle_min=0.5') +log('v5 reward gives direct gradient signal for hill: slow=low reward') +log('='*60) + +# Switch sim to mountain_track +log('Switching to mountain_track...') +tmp = gym.make('donkey-mountain-track-v0'); time.sleep(2) +_send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) +env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')])) +model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu') +model.learn(total_timesteps=TOTAL_STEPS, callback=ProgressCB(TOTAL_STEPS), + reset_num_timesteps=True) +model.save(SAVE_PATH); log(f'Saved.') +env.close(); time.sleep(3) + +def eval_track(current_id, track_id, name, n=3): + log(f'\n--- EVAL: {name} ---') + ev = switch_to(current_id, track_id, name) + m = PPO.load(SAVE_PATH, env=ev, device='cpu') + for ep in range(1, n+1): + obs = ev.reset(); total, steps, done = 0.0, 0, False + while not done and steps < 2000: + action, _ = m.predict(obs, deterministic=True) + result = ev.step(action) + if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0]) + else: obs,r,d,info=result; done=bool(d[0]) + total+=float(r[0]); steps+=1 + status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}' + log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}') + time.sleep(1) + ev.close(); time.sleep(3) + return track_id + +current = 'donkey-mountain-track-v0' +current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)') +current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)') +current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)') +current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)') +log('\n=== Exp 5 COMPLETE ===') diff --git a/agent/experiments/overnight.py b/agent/experiments/overnight.py new file mode 100644 index 0000000..81f4d28 --- /dev/null +++ b/agent/experiments/overnight.py @@ -0,0 +1,111 @@ +import sys, os, time +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') +from multitrack_runner import wrap_env, log, _send_exit_scene +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage +from stable_baselines3.common.callbacks import BaseCallback +import gymnasium as gym, numpy as np + +LR = 0.000725 + +def exit_connect(current_id, next_id, name): + log(f' → Switching to {name}...') + tmp = gym.make(current_id); time.sleep(2) + _send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) + raw = gym.make(next_id) + env = VecTransposeImage(DummyVecEnv([lambda e=raw: wrap_env(e)])) + log(f' Connected to {name}'); return env + +class ProgressCB(BaseCallback): + def __init__(self,total): super().__init__(verbose=0); self._last=0; self._total=total + def _on_step(self): + if self.num_timesteps - self._last >= 10000: + log(f' step {self.num_timesteps:,}/{self._total:,}') + self._last = self.num_timesteps + return True + +def train(current_id, track_id, track_name, steps, save_path): + log(f'\nTRAINING on {track_name} — {steps:,} steps, lr={LR}') + os.makedirs(os.path.dirname(save_path), exist_ok=True) + env = exit_connect(current_id, track_id, track_name) + model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu') + model.learn(total_timesteps=steps, callback=ProgressCB(steps), reset_num_timesteps=True) + model.save(save_path); log(f' Saved: {save_path}.zip') + env.close(); time.sleep(3) + return track_id + +def train_two_tracks(current_id, steps, switch, save_path): + """Round-robin training: generated_track + mountain_track.""" + from multitrack_runner import close_and_switch + log(f'\nTRAINING generated_track+mountain_track — {steps:,} steps, switch={switch}, lr={LR}') + os.makedirs(os.path.dirname(save_path), exist_ok=True) + TRACKS = [('generated_track','donkey-generated-track-v0'), + ('mountain_track', 'donkey-mountain-track-v0')] + env = exit_connect(current_id, TRACKS[0][1], TRACKS[0][0]) + model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu') + steps_done, idx = 0, 0 + while steps_done < steps: + seg = min(switch, steps - steps_done) + log(f' Segment: {TRACKS[idx][0]} | {steps_done:,}/{steps:,}') + model.learn(total_timesteps=seg, reset_num_timesteps=False) + steps_done += seg + try: model.save(save_path) # checkpoint + except: pass + if steps_done < steps: + nxt = (idx+1) % 2 + env = close_and_switch(env, TRACKS[nxt][1]) + model.set_env(env) + idx = nxt + model.save(save_path); log(f' Saved: {save_path}.zip') + env.close(); time.sleep(3) + return TRACKS[idx][1] + +def eval_all(current_id, model_path, label): + log(f'\n{"="*60}') + log(f'EVAL: {label}') + log(f'{"="*60}') + tests = [ + ('generated_track','donkey-generated-track-v0'), + ('mini_monaco', 'donkey-minimonaco-track-v0'), + ('generated_road', 'donkey-generated-roads-v0'), + ] + cur = current_id + for tname, tid in tests: + ev = exit_connect(cur, tid, tname) + m = PPO.load(model_path, env=ev, device='cpu') + log(f' --- {tname} (3 episodes) ---') + for ep in range(1,4): + obs = ev.reset(); total,steps,done = 0.0,0,False + while not done and steps < 2000: + action,_ = m.predict(obs, deterministic=True) + result = ev.step(action) + if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0]) + else: obs,r,d,info=result; done=bool(d[0]) + total+=float(r[0]); steps+=1 + status='✅ FULL 2000' if steps>=2000 else f'❌ crash@{steps}' + log(f' ep{ep}: {total:.0f} reward/{steps} steps — {status}') + time.sleep(1) + ev.close(); time.sleep(3) + cur = tid + return cur + +# ── START ── sim is on mini_monaco +current = 'donkey-minimonaco-track-v0' + +log('\n'+'#'*60) +log('EXPERIMENT 1: mountain_track ONLY, 90k steps') +log('#'*60) +SAVE1 = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp1-mountain-only/model' +current = train(current, 'donkey-mountain-track-v0', 'mountain_track', 90000, SAVE1) +current = eval_all(current, SAVE1, 'Exp 1: mountain_track only model') + +log('\n'+'#'*60) +log('EXPERIMENT 2: Trial 9 REPEAT — generated_track+mountain_track, 90k, switch=6851') +log('#'*60) +SAVE2 = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp2-trial9-repeat/model' +current = train_two_tracks(current, steps=90000, switch=6851, save_path=SAVE2) +current = eval_all(current, SAVE2, 'Exp 2: Trial 9 repeat model') + +log('\n'+'='*60) +log('ALL OVERNIGHT EXPERIMENTS COMPLETE') +log('='*60) diff --git a/agent/experiments/wave5_train.py b/agent/experiments/wave5_train.py new file mode 100644 index 0000000..b713bf0 --- /dev/null +++ b/agent/experiments/wave5_train.py @@ -0,0 +1,74 @@ +import sys, os, time +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') +from multitrack_runner import wrap_env, log, _send_exit_scene +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage +from stable_baselines3.common.callbacks import BaseCallback +import gymnasium as gym + +TRAIN_TRACK = 'donkey-generated-track-v0' +LR = 0.000725 +TOTAL_STEPS = 90000 +SAVE_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave5-gentrack-only/model' +os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True) + +def exit_and_connect(current_track_id, next_track_id, next_name): + """Exit current scene cleanly then connect to next track.""" + log(f'Exiting current scene, connecting to {next_name}...') + tmp = gym.make(current_track_id) + time.sleep(2) + _send_exit_scene(tmp, verbose=False) + tmp.close() + time.sleep(5) + raw = gym.make(next_track_id) + env = VecTransposeImage(DummyVecEnv([lambda e=raw: wrap_env(e)])) + log(f'Connected to {next_name}') + return env + +log('='*60) +log('Wave 5: generated_track ONLY, from scratch, lr=0.000725') +log('='*60) + +# The sim is currently on generated_road — exit and switch to generated_track +env = exit_and_connect('donkey-generated-roads-v0', TRAIN_TRACK, 'generated_track') + +model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu') +log(f'Training from scratch on generated_track, {TOTAL_STEPS:,} steps...') + +class ProgressCB(BaseCallback): + def __init__(self): super().__init__(verbose=0); self._last=0 + def _on_step(self): + if self.num_timesteps - self._last >= 10000: + log(f' step {self.num_timesteps:,}/{TOTAL_STEPS:,}') + self._last = self.num_timesteps + return True + +model.learn(total_timesteps=TOTAL_STEPS, callback=ProgressCB(), reset_num_timesteps=True) +model.save(SAVE_PATH) +log(f'Model saved.') +env.close() +time.sleep(3) + +def eval_track(current_id, eval_id, track_name, n_eps=3): + log(f'\n--- EVAL: {track_name} (zero-shot) ---') + ev = exit_and_connect(current_id, eval_id, track_name) + m = PPO.load(SAVE_PATH, env=ev, device='cpu') + for ep in range(1, n_eps+1): + obs = ev.reset() + total, steps, done = 0.0, 0, False + while not done and steps < 2000: + action, _ = m.predict(obs, deterministic=True) + result = ev.step(action) + if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0]) + else: obs,r,d,info=result; done=bool(d[0]) + total+=float(r[0]); steps+=1 + status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}' + log(f' ep{ep}: {total:.0f} reward / {steps} steps — {status}') + ev.close() + time.sleep(3) + return eval_id + +current = eval_track(TRAIN_TRACK, 'donkey-minimonaco-track-v0', 'mini_monaco') +current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road') + +log('\n=== Wave 5 COMPLETE ===')