diff --git a/agent/experiments/exp26_generated_road_warmstart.py b/agent/experiments/exp26_generated_road_warmstart.py new file mode 100644 index 0000000..afd84c8 --- /dev/null +++ b/agent/experiments/exp26_generated_road_warmstart.py @@ -0,0 +1,267 @@ +""" +Exp 26: Warm-start from exp25 best_model — extended training on generated_road. + +What changed from exp25: + - Warm start: loads exp25 best_model (381r @ 80k) instead of fresh weights. + This skips the early exploration phase and pushes the policy further. + - 300k total steps (vs 200k) — more time to improve beyond the exp25 plateau. + - Python-side hit check now active: multitrack_runner.py checks info['hit'] != 'none' + as the FIRST termination condition (added late in exp25 session, not loaded then). + - Everything else identical to exp25: discrete(7) steering, wheel OverlapSphere Unity fix, + road regen every 10k steps, LR=0.0003. +""" +import os +import sys +import time +from datetime import datetime + +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') + +_SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart' +_PIDFILE = os.path.join(_SAVE_DIR, 'current.pid') +_WARM_MODEL = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp25-wheel-fix/best_model.zip' +os.makedirs(_SAVE_DIR, exist_ok=True) + +if os.path.exists(_PIDFILE): + try: + _old = int(open(_PIDFILE).read().strip()) + if _old != os.getpid(): + import signal + os.kill(_old, 0) + print(f'[exp26] Another instance already running (PID {_old}). Exiting.', flush=True) + sys.exit(1) + except (OSError, ValueError): + pass + +import gymnasium as gym +import numpy as np +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage + +from discretize_action import DiscretizedActionWrapper +from donkeycar_sb3_runner import ThrottleClampWrapper +from multitrack_runner import StuckTerminationWrapper +from reward_wrapper import SpeedRewardWrapper + + +HOST = 'localhost' +THROTTLE_MIN = 0.2 +LR = 0.0003 +TOTAL_STEPS = 300_000 +CHECKPOINT_EVERY = 10_000 +SCENE_RELOAD_WAIT = 5.0 + +N_STEER = 7 +N_THROTTLE = 1 + +EFFICIENCY_WINDOW = 30 +MIN_EFFICIENCY = 0.15 +MAX_CTE = 8.0 +MIN_LAP_TIME = 12.0 +PROGRESS_PATIENCE = 100 + +MAX_STUCK_SECONDS = 5.0 +MAX_EPISODE_SECONDS = 30.0 +LOW_SPEED_THRESHOLD = 1.0 +MAX_LOW_SPEED_SECONDS = 1.5 +MAX_CTE_TERMINATION = 3.0 +MAX_HIGH_CTE_SECONDS = 1.0 + +TRACK_ID = 'donkey-generated-roads-v0' +PORT = 9091 + + +def log(msg): + print(f'[{datetime.now().strftime("%H:%M:%S")}] {msg}', flush=True) + + +def make_env(track_id, port): + def _init(): + raw = gym.make(track_id, conf={'host': HOST, 'port': port}) + env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) + env = DiscretizedActionWrapper(env, n_steer=N_STEER, n_throttle=N_THROTTLE) + env = StuckTerminationWrapper( + env, + stuck_steps=40, + min_displacement=0.5, + max_stuck_seconds=MAX_STUCK_SECONDS, + max_episode_seconds=MAX_EPISODE_SECONDS, + low_speed_threshold=LOW_SPEED_THRESHOLD, + max_low_speed_seconds=MAX_LOW_SPEED_SECONDS, + max_cte=MAX_CTE_TERMINATION, + max_high_cte_seconds=MAX_HIGH_CTE_SECONDS, + ) + env = SpeedRewardWrapper( + env, + window_size=EFFICIENCY_WINDOW, + min_efficiency=MIN_EFFICIENCY, + max_cte=MAX_CTE, + min_lap_time=MIN_LAP_TIME, + progress_patience=PROGRESS_PATIENCE, + ) + return env + return _init + + +def connect_env(): + new_env = DummyVecEnv([make_env(TRACK_ID, PORT)]) + new_env = VecTransposeImage(new_env) + return new_env + + +def reconnect_env(old_env): + try: + old_env.close() + except Exception as e: + log(f' env.close() warning: {e}') + time.sleep(SCENE_RELOAD_WAIT) + return connect_env() + + +log('=' * 60) +log('Exp 26: generated_road — warm start from exp25 best_model') +log(f' Sim: {HOST}:{PORT} -> {TRACK_ID}') +log(f' Warm model: {_WARM_MODEL}') +log(f' Discrete steering: {N_STEER} bins, throttle fixed at {THROTTLE_MIN}') +log(f' LR={LR}, total={TOTAL_STEPS:,}, checkpoint every {CHECKPOINT_EVERY:,}') +log(f' Reward: v7 (speed×CTE, efficiency gate, no-progress kill)') +log(f' Stuck: speed<{LOW_SPEED_THRESHOLD}/{MAX_LOW_SPEED_SECONDS}s OR hit!=none OR CTE>{MAX_CTE_TERMINATION}/{MAX_HIGH_CTE_SECONDS}s') +log(f' Episode cap: {MAX_EPISODE_SECONDS}s | Road regen: every {CHECKPOINT_EVERY:,} steps') +log('=' * 60) + +log('Connecting to sim...') +env = connect_env() +log(f' obs={env.observation_space.shape}, action={env.action_space}') + +log(f'Loading warm-start model from exp25...') +model = PPO.load(_WARM_MODEL, env=env, device='cpu') +model.learning_rate = LR +log(f' Warm model loaded. LR={LR}') + +with open(_PIDFILE, 'w') as f: + f.write(str(os.getpid())) + +best_total_steps = float('-inf') +best_total_reward = float('-inf') +steps_done = 0 +run_tag = datetime.now().strftime('%Y-%m-%d_%H%M%S') + '_warmstart' +log_path = os.path.join(_SAVE_DIR, f'run_{run_tag}.log') +best_model_path = os.path.join(_SAVE_DIR, 'best_model.zip') + +import logging +_file_handler = logging.FileHandler(log_path) +_file_handler.setFormatter(logging.Formatter('%(message)s')) +_stream_handler = logging.StreamHandler(sys.stdout) +_stream_handler.setFormatter(logging.Formatter('%(message)s')) +file_log = logging.getLogger('exp26') +file_log.setLevel(logging.INFO) +file_log.propagate = False +file_log.addHandler(_file_handler) +file_log.addHandler(_stream_handler) + + +def flog(msg): + ts = datetime.now().strftime('%H:%M:%S') + file_log.info(f'[{ts}] {msg}') + + +flog('=' * 60) +flog(f'Exp 26 started — PID {os.getpid()}') +flog(f'Log: {log_path}') +flog(f'Warm start: exp25 best_model (381r @ 80k)') +flog('=' * 60) + +while steps_done < TOTAL_STEPS: + seg_steps = min(CHECKPOINT_EVERY, TOTAL_STEPS - steps_done) + model.learn(total_timesteps=seg_steps, reset_num_timesteps=False) + steps_done += seg_steps + + ckpt = os.path.join(_SAVE_DIR, f'checkpoint_{steps_done:07d}') + model.save(ckpt) + model.save(os.path.join(_SAVE_DIR, 'model')) + flog(f'[{steps_done:,}/{TOTAL_STEPS:,}] Checkpoint saved: {ckpt}.zip') + + flog(f' Reconnecting for fresh road...') + env = reconnect_env(env) + model.set_env(env) + flog(f' Connected (new road)') + + try: + obs = env.reset() + ep_rewards = np.zeros(env.num_envs) + ep_steps = np.zeros(env.num_envs) + done_mask = np.zeros(env.num_envs, dtype=bool) + + for _ in range(2000): + action, _ = model.predict(obs, deterministic=True) + obs, rewards, dones, infos = env.step(action) + for i in range(env.num_envs): + if not done_mask[i]: + ep_rewards[i] += rewards[i] + ep_steps[i] += 1 + if dones[i]: + done_mask[i] = True + if done_mask.all(): + break + + total_steps_eval = int(ep_steps.sum()) + total_reward_eval = float(ep_rewards.sum()) + + status = '✅' if ep_steps[0] >= 2000 else f'❌@{int(ep_steps[0])}' + flog(f' Eval: gen_road={total_reward_eval:.1f}r/{int(ep_steps[0])}s {status}') + + if (total_steps_eval > best_total_steps + or (total_steps_eval == best_total_steps + and total_reward_eval > best_total_reward)): + best_total_steps = total_steps_eval + best_total_reward = total_reward_eval + model.save(best_model_path) + flog(f' NEW BEST: steps={best_total_steps} reward={best_total_reward:.1f}') + + except Exception as e: + flog(f' Eval error: {e}') + +env.close() + +flog('=' * 60) +flog('FINAL EVALUATION: best_model on generated_road (3 fresh roads)') +flog('=' * 60) + +EVAL_SETS = 3 +EVAL_MAX_STEPS = 2000 + +steps_list = [] +reward_list = [] + +for s in range(1, EVAL_SETS + 1): + try: + time.sleep(SCENE_RELOAD_WAIT) + eval_env = connect_env() + eval_model = PPO.load(best_model_path, env=eval_env, device='cpu') + obs = eval_env.reset() + done = False + total_s = 0 + total_r = 0.0 + + while not done and total_s < EVAL_MAX_STEPS: + action, _ = eval_model.predict(obs, deterministic=True) + result = eval_env.step(action) + obs, r, done = result[0], result[1], result[2] + if hasattr(done, '__len__'): + done = bool(done[0]) + total_r += float(r) if not hasattr(r, '__len__') else float(r[0]) + total_s += 1 + + status = '✅' if total_s >= EVAL_MAX_STEPS else f'❌@{total_s}' + flog(f' Set {s}: {total_r:.1f}r / {total_s}s {status}') + steps_list.append(total_s) + reward_list.append(total_r) + eval_env.close() + + except Exception as e: + flog(f' Set {s} error: {e}') + +if steps_list: + flog(f' Mean: {np.mean(steps_list):.0f} steps / {np.mean(reward_list):.1f} reward') + +flog('Exp 26 complete.') diff --git a/agent/models/exp26-warmstart/current.pid b/agent/models/exp26-warmstart/current.pid new file mode 100644 index 0000000..96ff464 --- /dev/null +++ b/agent/models/exp26-warmstart/current.pid @@ -0,0 +1 @@ +962156 \ No newline at end of file diff --git a/agent/models/exp26-warmstart/run_2026-05-06_073652_warmstart.log b/agent/models/exp26-warmstart/run_2026-05-06_073652_warmstart.log new file mode 100644 index 0000000..e3d3665 --- /dev/null +++ b/agent/models/exp26-warmstart/run_2026-05-06_073652_warmstart.log @@ -0,0 +1,135 @@ +[07:36:52] ============================================================ +[07:36:52] Exp 26 started — PID 962156 +[07:36:52] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/run_2026-05-06_073652_warmstart.log +[07:36:52] Warm start: exp25 best_model (381r @ 80k) +[07:36:52] ============================================================ +[07:41:49] [10,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0010000.zip +[07:41:49] Reconnecting for fresh road... +[07:41:54] Connected (new road) +[07:42:20] Eval: gen_road=380.0r/2000s ✅ +[07:42:20] NEW BEST: steps=2000 reward=380.0 +[07:47:11] [20,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0020000.zip +[07:47:11] Reconnecting for fresh road... +[07:47:16] Connected (new road) +[07:47:42] Eval: gen_road=388.8r/2000s ✅ +[07:47:43] NEW BEST: steps=2000 reward=388.8 +[07:52:32] [30,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0030000.zip +[07:52:32] Reconnecting for fresh road... +[07:52:37] Connected (new road) +[07:53:03] Eval: gen_road=379.7r/2000s ✅ +[07:57:55] [40,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0040000.zip +[07:57:55] Reconnecting for fresh road... +[07:58:00] Connected (new road) +[07:58:27] Eval: gen_road=371.8r/2000s ✅ +[08:03:15] [50,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0050000.zip +[08:03:15] Reconnecting for fresh road... +[08:03:20] Connected (new road) +[08:03:47] Eval: gen_road=374.4r/2000s ✅ +[08:08:44] [60,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0060000.zip +[08:08:44] Reconnecting for fresh road... +[08:08:50] Connected (new road) +[08:09:16] Eval: gen_road=370.9r/2000s ✅ +[08:14:11] [70,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0070000.zip +[08:14:11] Reconnecting for fresh road... +[08:14:16] Connected (new road) +[08:14:42] Eval: gen_road=375.3r/2000s ✅ +[08:19:33] [80,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0080000.zip +[08:19:33] Reconnecting for fresh road... +[08:19:38] Connected (new road) +[08:20:04] Eval: gen_road=359.1r/2000s ✅ +[08:24:55] [90,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0090000.zip +[08:24:55] Reconnecting for fresh road... +[08:25:00] Connected (new road) +[08:25:27] Eval: gen_road=357.5r/2000s ✅ +[08:30:19] [100,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0100000.zip +[08:30:19] Reconnecting for fresh road... +[08:30:24] Connected (new road) +[08:30:51] Eval: gen_road=361.5r/2000s ✅ +[08:35:39] [110,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0110000.zip +[08:35:39] Reconnecting for fresh road... +[08:35:44] Connected (new road) +[08:36:10] Eval: gen_road=353.5r/2000s ✅ +[08:41:01] [120,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0120000.zip +[08:41:01] Reconnecting for fresh road... +[08:41:06] Connected (new road) +[08:41:32] Eval: gen_road=355.0r/2000s ✅ +[08:46:25] [130,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0130000.zip +[08:46:25] Reconnecting for fresh road... +[08:46:30] Connected (new road) +[08:46:56] Eval: gen_road=348.6r/2000s ✅ +[08:51:52] [140,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0140000.zip +[08:51:52] Reconnecting for fresh road... +[08:51:57] Connected (new road) +[08:52:23] Eval: gen_road=333.6r/2000s ✅ +[08:57:15] [150,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0150000.zip +[08:57:15] Reconnecting for fresh road... +[08:57:20] Connected (new road) +[08:57:46] Eval: gen_road=327.4r/2000s ✅ +[09:02:40] [160,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0160000.zip +[09:02:40] Reconnecting for fresh road... +[09:02:45] Connected (new road) +[09:03:11] Eval: gen_road=332.3r/2000s ✅ +[09:08:07] [170,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0170000.zip +[09:08:07] Reconnecting for fresh road... +[09:08:12] Connected (new road) +[09:08:22] Eval: gen_road=96.7r/626s ❌@626 +[09:13:17] [180,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0180000.zip +[09:13:17] Reconnecting for fresh road... +[09:13:22] Connected (new road) +[09:13:37] Eval: gen_road=157.8r/1009s ❌@1009 +[09:18:24] [190,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0190000.zip +[09:18:24] Reconnecting for fresh road... +[09:18:29] Connected (new road) +[09:18:55] Eval: gen_road=327.3r/2000s ✅ +[09:23:45] [200,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0200000.zip +[09:23:45] Reconnecting for fresh road... +[09:23:50] Connected (new road) +[09:24:01] Eval: gen_road=107.7r/728s ❌@728 +[09:28:57] [210,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0210000.zip +[09:28:57] Reconnecting for fresh road... +[09:29:02] Connected (new road) +[09:29:14] Eval: gen_road=104.2r/726s ❌@726 +[09:34:15] [220,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0220000.zip +[09:34:15] Reconnecting for fresh road... +[09:34:20] Connected (new road) +[09:34:40] Eval: gen_road=207.1r/1390s ❌@1390 +[09:40:21] [230,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0230000.zip +[09:40:21] Reconnecting for fresh road... +[09:40:27] Connected (new road) +[09:40:53] Eval: gen_road=290.6r/2000s ✅ +[09:46:20] [240,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0240000.zip +[09:46:20] Reconnecting for fresh road... +[09:46:25] Connected (new road) +[09:46:51] Eval: gen_road=298.6r/2000s ✅ +[09:52:16] [250,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0250000.zip +[09:52:16] Reconnecting for fresh road... +[09:52:21] Connected (new road) +[09:52:47] Eval: gen_road=309.2r/2000s ✅ +[09:57:58] [260,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0260000.zip +[09:57:58] Reconnecting for fresh road... +[09:58:03] Connected (new road) +[09:58:30] Eval: gen_road=305.2r/2000s ✅ +[10:03:51] [270,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0270000.zip +[10:03:51] Reconnecting for fresh road... +[10:03:56] Connected (new road) +[10:04:01] Eval: gen_road=22.3r/206s ❌@206 +[10:09:22] [280,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0280000.zip +[10:09:22] Reconnecting for fresh road... +[10:09:27] Connected (new road) +[10:09:53] Eval: gen_road=311.9r/2000s ✅ +[10:14:53] [290,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0290000.zip +[10:14:53] Reconnecting for fresh road... +[10:14:58] Connected (new road) +[10:15:24] Eval: gen_road=295.8r/2000s ✅ +[10:20:23] [300,000/300,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp26-warmstart/checkpoint_0300000.zip +[10:20:23] Reconnecting for fresh road... +[10:20:28] Connected (new road) +[10:20:42] Eval: gen_road=125.3r/879s ❌@879 +[10:20:42] ============================================================ +[10:20:42] FINAL EVALUATION: best_model on generated_road (3 fresh roads) +[10:20:42] ============================================================ +[10:21:15] Set 1: 389.5r / 2000s ✅ +[10:21:31] Set 2: 110.7r / 628s ❌@628 +[10:22:03] Set 3: 389.2r / 2000s ✅ +[10:22:03] Mean: 1543 steps / 296.5 reward +[10:22:03] Exp 26 complete.