diff --git a/agent/experiments/exp15_gentrack_from_mountain.py b/agent/experiments/exp15_gentrack_from_mountain.py new file mode 100644 index 0000000..7f12868 --- /dev/null +++ b/agent/experiments/exp15_gentrack_from_mountain.py @@ -0,0 +1,186 @@ +""" +Exp 15: Warm-start generated_track from the best mountain champion. + +Goal: +- Test cross-track transfer cleanly using a single-track setup. +- Warm-start from mountain robust winner: + agent/models/exp14-mountain-v5-finetune/best_robust_model_0036000.zip +- Train on generated_track only using the known-good Exp 13 v4 setup. + +Why: +- Earlier warm-start conclusions were contaminated by bad multi-track / scene + switching setups. +- This isolates transfer: one source model, one target track, one stable env. +""" +import sys, os, time +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') + +from donkeycar_sb3_runner import ThrottleClampWrapper +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage +import gymnasium as gym +import numpy as np +from collections import deque +from datetime import datetime + +HOST = '10.0.0.55' +PORT = 9091 +TRACK_ID = 'donkey-generated-track-v0' +TRACK_NAME = 'generated_track' +THROTTLE_MIN = 0.2 +SPEED_SCALE = 0.1 +LR = 0.0004 +MAX_STEPS = 300000 +EVAL_EVERY = 5000 +LAP_STOP = 3 +WARM_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp14-mountain-v5-finetune/best_robust_model_0036000.zip' +SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp15-gentrack-from-mountain' +os.makedirs(SAVE_DIR, exist_ok=True) + +class V4RewardWrapper(gym.Wrapper): + def __init__(self, env, speed_scale=0.1, window_size=60, + min_efficiency=0.05, max_cte=8.0): + super().__init__(env) + self.speed_scale = speed_scale + self.min_efficiency = min_efficiency + self.max_cte = max_cte + self._pos_history = deque(maxlen=window_size + 1) + + def reset(self, **kwargs): + self._pos_history.clear() + return self.env.reset(**kwargs) + + def step(self, action): + result = self.env.step(action) + if len(result) == 5: + obs, _sim_r, terminated, truncated, info = result + done = terminated or truncated + else: + obs, _sim_r, done, info = result + terminated, truncated = done, False + reward = self._compute_reward(done, info) + if len(result) == 5: + return obs, reward, terminated, truncated, info + return obs, reward, done, info + + def _compute_reward(self, done, info): + if done: + return -1.0 + pos = info.get('pos', None) + if pos is not None: + try: + self._pos_history.append(np.array(list(pos)[:3], dtype=np.float64)) + except (TypeError, ValueError): + pass + try: + cte = float(info.get('cte', 0.0) or 0.0) + except (TypeError, ValueError): + cte = 0.0 + base = 1.0 - min(abs(cte) / self.max_cte, 1.0) + efficiency = self._compute_efficiency() + eff = max(0.0, (efficiency - self.min_efficiency) / (1.0 - self.min_efficiency)) + try: + speed = max(0.0, float(info.get('speed', 0.0) or 0.0)) + except (TypeError, ValueError): + speed = 0.0 + return base * eff * (1.0 + self.speed_scale * speed) + + def _compute_efficiency(self): + if len(self._pos_history) < 3: + return 1.0 + positions = list(self._pos_history) + net = np.linalg.norm(positions[-1] - positions[0]) + total = sum(np.linalg.norm(positions[i+1] - positions[i]) + for i in range(len(positions) - 1)) + return float(net / total) if total > 1e-6 else 1.0 + + +def log(msg): + print(f'[{datetime.now().strftime("%H:%M:%S")}] {msg}', flush=True) + + +def make_env(): + def _init(): + raw = gym.make(TRACK_ID, conf={'host': HOST, 'port': PORT}) + env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) + env = V4RewardWrapper(env, speed_scale=SPEED_SCALE) + return env + return _init + + +log('='*60) +log(f'Exp 15: {TRACK_NAME} warm-start from mountain robust model') +log(f' Host: {HOST}:{PORT}') +log(f' Warm start: {WARM_PATH}') +log(f' throttle_min={THROTTLE_MIN}, lr={LR}') +log(f' Reward: v4 (Exp 13 known-good generated setup)') +log(f' Stop: eval every {EVAL_EVERY:,} steps, stop at {LAP_STOP} laps') +log('='*60) + +env = VecTransposeImage(DummyVecEnv([make_env()])) +if os.path.exists(WARM_PATH): + model = PPO.load(WARM_PATH, device='cpu') + model.set_env(env) + model.learning_rate = LR + try: + for pg in model.policy.optimizer.param_groups: + pg['lr'] = LR + except Exception: + pass + log('Loaded warm-start model and attached generated_track env') +else: + raise FileNotFoundError(WARM_PATH) + +best_reward = float('-inf') +best_laps = 0 +steps_done = 0 + +while steps_done < MAX_STEPS: + seg = min(EVAL_EVERY, MAX_STEPS - steps_done) + model.learn(total_timesteps=seg, reset_num_timesteps=False) + steps_done += seg + + ckpt = os.path.join(SAVE_DIR, f'checkpoint_{steps_done:07d}') + model.save(ckpt) + model.save(os.path.join(SAVE_DIR, 'model')) + + try: + obs = env.reset() + ep_r = 0.0 + ep_steps = 0 + laps = 0 + prev_lc = 0 + for _ in range(2000): + action, _ = model.predict(obs, deterministic=True) + obs, r, d, info = env.step(action) + ep_r += float(r[0]) + ep_steps += 1 + try: + lc = int((info[0] if isinstance(info, (list, tuple)) else info).get('lap_count', 0) or 0) + if lc > prev_lc: + laps = lc + prev_lc = lc + except Exception: + pass + if bool(d[0]): + break + status = '✅' if ep_steps >= 2000 else f'❌@{ep_steps}' + log(f'[{steps_done:,}] reward={ep_r:.1f} steps={ep_steps} laps={laps} {status}') + if ep_r > best_reward: + best_reward = ep_r + model.save(os.path.join(SAVE_DIR, 'best_model')) + log(f' ⭐ NEW BEST: {best_reward:.1f}') + if laps > best_laps: + best_laps = laps + log(f' 🏆 BEST LAPS: {best_laps}') + if laps >= LAP_STOP: + log(f' 🎯 {laps} laps achieved at {steps_done:,} steps — STOPPING') + break + except Exception as e: + log(f' Eval error: {e}') + +env.close() +time.sleep(3) +log(f'\nDone. best_laps={best_laps} best_reward={best_reward:.1f}') +log(f'Best model: {SAVE_DIR}/best_model.zip') +log('=== Exp 15 COMPLETE ===') diff --git a/agent/experiments/exp16_mountain_from_gentrack.py b/agent/experiments/exp16_mountain_from_gentrack.py new file mode 100644 index 0000000..7d0da68 --- /dev/null +++ b/agent/experiments/exp16_mountain_from_gentrack.py @@ -0,0 +1,194 @@ +""" +Exp 16: Warm-start mountain_track from the generated_track champion. + +Goal: +- Test reverse transfer cleanly using a single-track setup. +- Warm-start from generated champion: + agent/models/exp13-gentrack-v4/best_model.zip +- Train on mountain_track only using the known-good Exp 14 v5 setup. + +Caveat: +- Mountain may still be affected by Unity traction/material issues, so results + should be interpreted with that in mind. +""" +import sys, os, time +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') + +from donkeycar_sb3_runner import ThrottleClampWrapper +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage +import gymnasium as gym +import numpy as np +from datetime import datetime + +HOST = '10.0.0.55' +PORT = 9091 +TRACK_ID = 'donkey-mountain-track-v0' +TRACK_NAME = 'mountain_track' +THROTTLE_MIN = 0.2 +LR = 0.0004 +MAX_STEPS = 300000 +EVAL_EVERY = 5000 +LAP_STOP = 3 +WARM_PATH = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp13-gentrack-v4/best_model.zip' +SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp16-mountain-from-gentrack' +os.makedirs(SAVE_DIR, exist_ok=True) + + +def log(msg): + print(f'[{datetime.now().strftime("%H:%M:%S")}] {msg}', flush=True) + + +class V5RewardWrapper(gym.Wrapper): + def __init__(self, env, max_cte=8.0, min_lap_time=5.0): + super().__init__(env) + self.max_cte = max_cte + self.min_lap_time = min_lap_time + self._last_lc = 0 + + def reset(self, **kwargs): + self._last_lc = 0 + return self.env.reset(**kwargs) + + def step(self, action): + result = self.env.step(action) + if len(result) == 5: + obs, _r, terminated, truncated, info = result + else: + obs, _r, done, info = result + terminated, truncated = done, False + reward, force_term = self._compute(info, terminated or truncated) + if force_term: + terminated = True + if len(result) == 5: + return obs, reward, terminated, truncated, info + return obs, reward, terminated or truncated, info + + def _compute(self, info, done): + if done: + return -1.0, False + try: + lc = int(info.get('lap_count', 0) or 0) + except (TypeError, ValueError): + lc = self._last_lc + if lc > self._last_lc: + self._last_lc = lc + try: + lt = float(info.get('last_lap_time', 999) or 999) + except (TypeError, ValueError): + lt = 999 + if lt < self.min_lap_time: + penalty = -10.0 * (self.min_lap_time / max(lt, 0.1)) + return penalty, True + try: + cte = float(info.get('cte', 0) or 0) + except (TypeError, ValueError): + cte = 0.0 + cte_quality = 1.0 - min(abs(cte) / self.max_cte, 1.0) + try: + speed = max(0.0, float(info.get('speed', 0) or 0)) + except (TypeError, ValueError): + speed = 0.0 + speed_norm = min(speed / 10.0, 1.0) + return cte_quality * speed_norm, False + + +def make_env(): + def _init(): + raw = gym.make(TRACK_ID, conf={'host': HOST, 'port': PORT}) + env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) + env = V5RewardWrapper(env) + return env + return _init + + +log('='*60) +log(f'Exp 16: {TRACK_NAME} warm-start from generated champion') +log(f' Host: {HOST}:{PORT}') +log(f' Warm start: {WARM_PATH}') +log(f' throttle_min={THROTTLE_MIN}, lr={LR}') +log(f' Reward: v5 (Exp 14 known-good mountain setup)') +log(f' Stop: eval every {EVAL_EVERY:,} steps, stop at {LAP_STOP} laps') +log('='*60) + +# scene switch first +log('Switching sim to mountain_track...') +_tmp = gym.make('donkey-generated-track-v0', conf={'host': HOST, 'port': PORT}) +time.sleep(2) +try: + _tmp.unwrapped.viewer.exit_scene() + time.sleep(0.5) +except Exception as e: + log(f' exit_scene warning: {e}') +_tmp.close() +time.sleep(6) +log('Sim should now be at main menu. Connecting to mountain_track...') + +env = VecTransposeImage(DummyVecEnv([make_env()])) +if os.path.exists(WARM_PATH): + model = PPO.load(WARM_PATH, device='cpu') + model.set_env(env) + model.learning_rate = LR + try: + for pg in model.policy.optimizer.param_groups: + pg['lr'] = LR + except Exception: + pass + log('Loaded warm-start model and attached mountain env') +else: + raise FileNotFoundError(WARM_PATH) + +best_reward = float('-inf') +best_laps = 0 +steps_done = 0 + +while steps_done < MAX_STEPS: + seg = min(EVAL_EVERY, MAX_STEPS - steps_done) + model.learn(total_timesteps=seg, reset_num_timesteps=False) + steps_done += seg + + ckpt = os.path.join(SAVE_DIR, f'checkpoint_{steps_done:07d}') + model.save(ckpt) + model.save(os.path.join(SAVE_DIR, 'model')) + + try: + obs = env.reset() + ep_r = 0.0 + ep_s = 0 + laps = 0 + prev_lc = 0 + for _ in range(2000): + action, _ = model.predict(obs, deterministic=True) + obs, r, d, info = env.step(action) + ep_r += float(r[0]) + ep_s += 1 + try: + lc = int((info[0] if isinstance(info, (list,tuple)) else info).get('lap_count', 0) or 0) + if lc > prev_lc: + laps = lc + prev_lc = lc + except Exception: + pass + if bool(d[0]): + break + status = '✅' if ep_s >= 2000 else f'❌@{ep_s}' + log(f'[{steps_done:,}] reward={ep_r:.1f} steps={ep_s} laps={laps} {status}') + if ep_r > best_reward: + best_reward = ep_r + model.save(os.path.join(SAVE_DIR, 'best_model')) + log(f' ⭐ NEW BEST: {best_reward:.1f}') + if laps > best_laps: + best_laps = laps + log(f' 🏆 BEST LAPS: {best_laps}') + if laps >= LAP_STOP: + log(f' 🎯 {laps} laps at {steps_done:,} steps — STOPPING') + break + except Exception as e: + log(f' Eval error: {e}') + import traceback; traceback.print_exc() + +env.close() +time.sleep(3) +log(f'\nDone. best_laps={best_laps} best_reward={best_reward:.1f}') +log(f'Best model: {SAVE_DIR}/best_model.zip') +log('=== Exp 16 COMPLETE ===')