""" Exp 6: mountain_track, v5 reward, throttle_min=0.5, lr=0.000725, 90k steps PROPERLY CHECKPOINTED - numbered saves every segment, best_model.zip kept. This re-runs Exp5 with the checkpointing fix. The model doing 20-second laps at step ~30k will be captured in checkpoint_0027404.zip (or similar). """ import sys, os, time sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') from multitrack_runner import (wrap_env, log, _send_exit_scene, StuckTerminationWrapper, close_and_switch, TRAINING_TRACKS, train_multitrack, evaluate_test_tracks) from donkeycar_sb3_runner import ThrottleClampWrapper from reward_wrapper import SpeedRewardWrapper from stable_baselines3 import PPO from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage import gymnasium as gym THROTTLE_MIN = 0.5 LR = 0.000725 TOTAL_STEPS = 90000 SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp6-mountain-v5-checkpointed' os.makedirs(SAVE_DIR, exist_ok=True) def make_env(env_id): raw = gym.make(env_id) env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5) env = SpeedRewardWrapper(env) return env def switch_to(current_id, next_id, name): log(f' Switching to {name}...') tmp = gym.make(current_id); time.sleep(2) _send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) env = VecTransposeImage(DummyVecEnv([lambda: make_env(next_id)])) log(f' Connected to {name}'); return env log('='*60) log('Exp 6: mountain_track, v5 reward, throttle_min=0.5') log(f' lr={LR}, steps={TOTAL_STEPS:,}') log(f' Save dir: {SAVE_DIR}') log(' Checkpointing: numbered saves every segment + best_model.zip') log('='*60) # Switch sim to mountain_track log('Connecting to mountain_track...') tmp = gym.make('donkey-generated-roads-v0'); time.sleep(2) _send_exit_scene(tmp, verbose=False); tmp.close(); time.sleep(5) env = VecTransposeImage(DummyVecEnv([lambda: make_env('donkey-mountain-track-v0')])) model = PPO('CnnPolicy', env, learning_rate=LR, verbose=1, device='cpu') # ── TRAINING with full checkpointing ── # We temporarily override TRAINING_TRACKS to mountain_track only import multitrack_runner as mr _orig_tracks = mr.TRAINING_TRACKS mr.TRAINING_TRACKS = [('mountain_track', 'donkey-mountain-track-v0')] env, segment_rewards = train_multitrack( model, env, total_timesteps=TOTAL_STEPS, steps_per_switch=TOTAL_STEPS, # one long segment — no switching save_dir=SAVE_DIR, ) mr.TRAINING_TRACKS = _orig_tracks # restore log(f'\nTraining complete. Files saved in {SAVE_DIR}:') for f in sorted(os.listdir(SAVE_DIR)): size = os.path.getsize(os.path.join(SAVE_DIR, f)) // (1024*1024) log(f' {f} ({size}MB)') env.close(); time.sleep(3) # ── EVAL on all tracks using best_model ── best_path = os.path.join(SAVE_DIR, 'best_model.zip') log(f'\nEvaluating best_model.zip on all tracks...') def eval_track(current_id, track_id, name, n=3): log(f'\n--- EVAL: {name} ---') ev = switch_to(current_id, track_id, name) m = PPO.load(best_path, env=ev, device='cpu') for ep in range(1, n+1): obs = ev.reset(); total, steps, done = 0.0, 0, False while not done and steps < 2000: action, _ = m.predict(obs, deterministic=True) result = ev.step(action) if len(result)==5: obs,r,t,tr,info=result; done=bool(t[0] or tr[0]) else: obs,r,d,info=result; done=bool(d[0]) total+=float(r[0]); steps+=1 status='✅ FULL' if steps>=2000 else f'❌ crash@{steps}' log(f' ep{ep}: {total:.1f} reward / {steps} steps — {status}') time.sleep(1) ev.close(); time.sleep(3) return track_id current = 'donkey-mountain-track-v0' current = eval_track(current, 'donkey-mountain-track-v0', 'mountain_track (training)') current = eval_track(current, 'donkey-generated-track-v0', 'generated_track (zero-shot)') current = eval_track(current, 'donkey-minimonaco-track-v0', 'mini_monaco (zero-shot)') current = eval_track(current, 'donkey-generated-roads-v0', 'generated_road (zero-shot)') log('\n' + '='*60) log('Exp 6 COMPLETE. Check results above.') log(f'Best model: {best_path}') log('All checkpoints saved in: ' + SAVE_DIR) log('='*60)