From f784fdebd1fe5d108e32459fdd4786eff3497bab Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Tue, 5 May 2026 20:28:31 -0400 Subject: [PATCH] feat(exp25): wheel OverlapSphere collision fix + auto-transition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Car.cs (sdsandbox): per-wheel OverlapSphereNonAlloc in FixedUpdate catches barrier contact from any angle, any throttle — forward raycast only covered nose-first. Built, rsync'd, sim restart pending exp24 completion. exp25 script: identical to exp24 params, fresh weights, patched Unity binary. Auto-transition monitor armed: kills sim, restarts with new binary, launches exp25 when exp24 finishes (~22:00 EST). Co-Authored-By: Claude Sonnet 4.6 --- agent/SESSION_HANDOFF.md | 44 ++- .../experiments/exp25_wheel_collision_fix.py | 276 ++++++++++++++++++ 2 files changed, 311 insertions(+), 9 deletions(-) create mode 100644 agent/experiments/exp25_wheel_collision_fix.py diff --git a/agent/SESSION_HANDOFF.md b/agent/SESSION_HANDOFF.md index 6d27b0a..4c19e72 100644 --- a/agent/SESSION_HANDOFF.md +++ b/agent/SESSION_HANDOFF.md @@ -37,6 +37,28 @@ Current runtime simulator folders in use: Unity build log: - `C:\Users\Paul\AppData\Local\Temp\unity_rebuild.log` +## Car.cs Collision Fix (this session) + +### Per-wheel OverlapSphere (exp25+) + +`/mnt/c/Users/Paul/Documents/projects/sdsandbox/sdsim/Assets/Scripts/Car.cs` + +**Root cause of remaining stuck episodes:** The forward raycast added in the previous +session only covered nose-first contact when `requestTorque > 0.05`. Side contact, +diagonal contact, rear contact, and low-throttle nose contact were all missed. + +**Fix:** Added per-wheel `OverlapSphere` check in `FixedUpdate()`: +- Calls `wc.GetWorldPose()` to get each wheel's world position +- Runs `Physics.OverlapSphereNonAlloc(wheelCenter, wc.radius + 0.05f, _wheelOverlapBuf)` +- Filters hits by name containing `"barrier"` (matches `left_barrier_seg*`, `right_barrier_seg*`) +- Calls `RegisterCollision()` on first barrier hit → episode terminates immediately +- Pre-allocated `Collider[8]` buffer as a field (no GC per frame) +- Both forward raycast AND OverlapSphere are active (complementary) + +**Build:** Unity 6000.4.4f1, succeeded at ~20:35 on 2026-05-05 +**Rsync:** Done — runtime folder has new Assembly-CSharp.dll (298,496 bytes) +**Effect:** Takes effect after sim restart (pending exp24 completion) + ## What Was Fixed This Session ### Barrier physics (previous session, still in effect) @@ -115,16 +137,20 @@ to Unity physics non-determinism, NOT road variety. - Rsync'd to: `/mnt/c/Users/Paul/Downloads/DonkeySimWin/DonkeySimWin/` - Sim restarted on port 9091 with new binary -### Exp 24 status — RUNNING +### Exp 24 status — RUNNING (~100 min remaining as of 20:22) - PID 733053, launched at 20:09 on 2026-05-05 -- Log: `agent/models/exp24-discrete/run_2026-05-05_200903_discrete.log` -- Monitor with: - ```bash - tail -f /tmp/exp24.out - ``` - NOTE: The log file (run_*_discrete.log) is 0 bytes — logging.basicConfig() is a - no-op because gym_donkeycar already configured the root logger. All output goes - to /tmp/exp24.out via nohup. Fix in exp25: use `file_log.addHandler(FileHandler(...))` directly. +- Monitor: `tail -f /tmp/exp24.out` +- Results so far: 10k→250.4r/2000s ✅, 20k→320.9r/2000s ✅ (NEW BEST both times) +- NOTE: log file is 0 bytes (logging.basicConfig no-op). All output in /tmp/exp24.out. +- Auto-transition to exp25 is armed — when exp24 finishes, monitor kills sim, restarts + with wheel-fix binary, and launches exp25 automatically. + +### Exp 25 status — PENDING +- Script: `agent/experiments/exp25_wheel_collision_fix.py` +- Model dir: `agent/models/exp25-wheel-fix/` +- Monitor: `tail -f /tmp/exp25.out` (once launched) +- Key fix: per-wheel OverlapSphere in Car.cs catches any-angle barrier contact +- Binary: already rsync'd, sim restart needed (auto-transition handles this) ## Useful Commands diff --git a/agent/experiments/exp25_wheel_collision_fix.py b/agent/experiments/exp25_wheel_collision_fix.py new file mode 100644 index 0000000..7840ff7 --- /dev/null +++ b/agent/experiments/exp25_wheel_collision_fix.py @@ -0,0 +1,276 @@ +""" +Exp 25: Wheel OverlapSphere collision fix — same setup as exp24, patched Unity sim. + +What changed from exp24: + - Unity Car.cs now has per-wheel OverlapSphere checks in FixedUpdate(). + WheelColliders never fire OnCollisionEnter/Stay on the car body. The previous + forward raycast only caught nose-first contact when throttle > 0.05. Now: + * Forward raycast: still fires for fast nose-first approach (before contact) + * Per-wheel OverlapSphere: fires for any wheel touching any barrier, any angle, + regardless of throttle. Filter: only objects with "barrier" in the name. + Both checks call RegisterCollision() → episode terminates immediately on contact. + - Python speed-check backstop unchanged (speed < 0.5 for 2s → terminate). + - Everything else identical to exp24: discrete(7) steering, road regen, LR=0.0003. + - Requires sim restart after rsync of new Assembly-CSharp.dll (built 2026-05-05). +""" +import os +import sys +import time +from datetime import datetime + +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') + +_SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp25-wheel-fix' +_PIDFILE = os.path.join(_SAVE_DIR, 'current.pid') +os.makedirs(_SAVE_DIR, exist_ok=True) + +if os.path.exists(_PIDFILE): + try: + _old = int(open(_PIDFILE).read().strip()) + if _old != os.getpid(): + import signal + os.kill(_old, 0) + print(f'[exp25] Another instance already running (PID {_old}). Exiting.', flush=True) + sys.exit(1) + except (OSError, ValueError): + pass + +import gymnasium as gym +import numpy as np +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage + +from discretize_action import DiscretizedActionWrapper +from donkeycar_sb3_runner import ThrottleClampWrapper +from multitrack_runner import StuckTerminationWrapper +from reward_wrapper import SpeedRewardWrapper + + +HOST = 'localhost' +THROTTLE_MIN = 0.2 +LR = 0.0003 +TOTAL_STEPS = 200_000 +CHECKPOINT_EVERY = 10_000 +SCENE_RELOAD_WAIT = 5.0 + +N_STEER = 7 +N_THROTTLE = 1 + +EFFICIENCY_WINDOW = 30 +MIN_EFFICIENCY = 0.15 +MAX_CTE = 8.0 +MIN_LAP_TIME = 12.0 +PROGRESS_PATIENCE = 100 + +MAX_STUCK_SECONDS = 5.0 +MAX_EPISODE_SECONDS = 30.0 +LOW_SPEED_THRESHOLD = 0.5 +MAX_LOW_SPEED_SECONDS = 2.0 + +TRACK_ID = 'donkey-generated-roads-v0' +PORT = 9091 + + +def log(msg): + print(f'[{datetime.now().strftime("%H:%M:%S")}] {msg}', flush=True) + + +def make_env(track_id, port): + def _init(): + raw = gym.make(track_id, conf={'host': HOST, 'port': port}) + env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) + env = DiscretizedActionWrapper(env, n_steer=N_STEER, n_throttle=N_THROTTLE) + env = StuckTerminationWrapper( + env, + stuck_steps=40, + min_displacement=0.5, + max_stuck_seconds=MAX_STUCK_SECONDS, + max_episode_seconds=MAX_EPISODE_SECONDS, + low_speed_threshold=LOW_SPEED_THRESHOLD, + max_low_speed_seconds=MAX_LOW_SPEED_SECONDS, + ) + env = SpeedRewardWrapper( + env, + window_size=EFFICIENCY_WINDOW, + min_efficiency=MIN_EFFICIENCY, + max_cte=MAX_CTE, + min_lap_time=MIN_LAP_TIME, + progress_patience=PROGRESS_PATIENCE, + ) + return env + return _init + + +def connect_env(): + new_env = DummyVecEnv([make_env(TRACK_ID, PORT)]) + new_env = VecTransposeImage(new_env) + return new_env + + +def reconnect_env(old_env): + try: + old_env.close() + except Exception as e: + log(f' env.close() warning: {e}') + time.sleep(SCENE_RELOAD_WAIT) + return connect_env() + + +log('=' * 60) +log('Exp 25: generated_road — wheel OverlapSphere collision fix') +log(f' Sim: {HOST}:{PORT} -> {TRACK_ID}') +log(f' Discrete steering: {N_STEER} bins, throttle fixed at {THROTTLE_MIN}') +log(f' LR={LR}, total={TOTAL_STEPS:,}, checkpoint every {CHECKPOINT_EVERY:,}') +log(f' Reward: v7 (speed×CTE, efficiency gate, no-progress kill)') +log(f' Stuck: position/{MAX_STUCK_SECONDS}s OR speed<{LOW_SPEED_THRESHOLD}/{MAX_LOW_SPEED_SECONDS}s') +log(f' Episode cap: {MAX_EPISODE_SECONDS}s | Road regen: every {CHECKPOINT_EVERY:,} steps') +log(f' Unity fix: per-wheel OverlapSphere → any-angle barrier detection') +log('=' * 60) + +log('Connecting to sim...') +env = connect_env() +log(f' obs={env.observation_space.shape}, action={env.action_space}') + +model = PPO( + 'CnnPolicy', + env, + learning_rate=LR, + n_steps=2048, + batch_size=64, + n_epochs=10, + gamma=0.99, + gae_lambda=0.95, + clip_range=0.2, + ent_coef=0.01, + verbose=1, + device='cpu', +) + +with open(_PIDFILE, 'w') as f: + f.write(str(os.getpid())) + +log(f'Fresh PPO (Discrete({N_STEER * N_THROTTLE})). Starting training...') + +best_total_steps = float('-inf') +best_total_reward = float('-inf') +steps_done = 0 +run_tag = datetime.now().strftime('%Y-%m-%d_%H%M%S') + '_wheel_fix' +log_path = os.path.join(_SAVE_DIR, f'run_{run_tag}.log') +best_model_path = os.path.join(_SAVE_DIR, 'best_model.zip') + +import logging +_file_handler = logging.FileHandler(log_path) +_file_handler.setFormatter(logging.Formatter('%(message)s')) +_stream_handler = logging.StreamHandler(sys.stdout) +_stream_handler.setFormatter(logging.Formatter('%(message)s')) +file_log = logging.getLogger('exp25') +file_log.setLevel(logging.INFO) +file_log.propagate = False +file_log.addHandler(_file_handler) +file_log.addHandler(_stream_handler) + + +def flog(msg): + ts = datetime.now().strftime('%H:%M:%S') + file_log.info(f'[{ts}] {msg}') + + +flog('=' * 60) +flog(f'Exp 25 started — PID {os.getpid()}') +flog(f'Log: {log_path}') +flog('=' * 60) + +while steps_done < TOTAL_STEPS: + seg_steps = min(CHECKPOINT_EVERY, TOTAL_STEPS - steps_done) + model.learn(total_timesteps=seg_steps, reset_num_timesteps=False) + steps_done += seg_steps + + ckpt = os.path.join(_SAVE_DIR, f'checkpoint_{steps_done:07d}') + model.save(ckpt) + model.save(os.path.join(_SAVE_DIR, 'model')) + flog(f'[{steps_done:,}/{TOTAL_STEPS:,}] Checkpoint saved: {ckpt}.zip') + + flog(f' Reconnecting for fresh road...') + env = reconnect_env(env) + model.set_env(env) + flog(f' Connected (new road)') + + try: + obs = env.reset() + ep_rewards = np.zeros(env.num_envs) + ep_steps = np.zeros(env.num_envs) + done_mask = np.zeros(env.num_envs, dtype=bool) + + for _ in range(2000): + action, _ = model.predict(obs, deterministic=True) + obs, rewards, dones, infos = env.step(action) + for i in range(env.num_envs): + if not done_mask[i]: + ep_rewards[i] += rewards[i] + ep_steps[i] += 1 + if dones[i]: + done_mask[i] = True + if done_mask.all(): + break + + total_steps_eval = int(ep_steps.sum()) + total_reward_eval = float(ep_rewards.sum()) + + status = '✅' if ep_steps[0] >= 2000 else f'❌@{int(ep_steps[0])}' + flog(f' Eval: gen_road={total_reward_eval:.1f}r/{int(ep_steps[0])}s {status}') + + if (total_steps_eval > best_total_steps + or (total_steps_eval == best_total_steps + and total_reward_eval > best_total_reward)): + best_total_steps = total_steps_eval + best_total_reward = total_reward_eval + model.save(best_model_path) + flog(f' NEW BEST: steps={best_total_steps} reward={best_total_reward:.1f}') + + except Exception as e: + flog(f' Eval error: {e}') + +env.close() + +flog('=' * 60) +flog('FINAL EVALUATION: best_model on generated_road (3 fresh roads)') +flog('=' * 60) + +EVAL_SETS = 3 +EVAL_MAX_STEPS = 2000 + +steps_list = [] +reward_list = [] + +for s in range(1, EVAL_SETS + 1): + try: + time.sleep(SCENE_RELOAD_WAIT) + eval_env = connect_env() + eval_model = PPO.load(best_model_path, env=eval_env, device='cpu') + obs = eval_env.reset() + done = False + total_s = 0 + total_r = 0.0 + + while not done and total_s < EVAL_MAX_STEPS: + action, _ = eval_model.predict(obs, deterministic=True) + result = eval_env.step(action) + obs, r, done = result[0], result[1], result[2] + if hasattr(done, '__len__'): + done = bool(done[0]) + total_r += float(r) if not hasattr(r, '__len__') else float(r[0]) + total_s += 1 + + status = '✅' if total_s >= EVAL_MAX_STEPS else f'❌@{total_s}' + flog(f' Set {s}: {total_r:.1f}r / {total_s}s {status}') + steps_list.append(total_s) + reward_list.append(total_r) + eval_env.close() + + except Exception as e: + flog(f' Set {s} error: {e}') + +if steps_list: + flog(f' Mean: {np.mean(steps_list):.0f} steps / {np.mean(reward_list):.1f} reward') + +flog('Exp 25 complete.')