From 0d1acf8cdc8843b56b803e01f742cb14d2262911 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Tue, 5 May 2026 17:58:33 -0400 Subject: [PATCH] feat(exp24): road regeneration between segments + fix Car.cs raycast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit exp24: reconnect to sim after each 10k-step checkpoint. Reconnecting reloads the scene → sdsandbox generates a new random road. Each training segment and each checkpoint eval now runs on a different road layout, preventing overfitting to a single road and giving meaningful generalization metrics in the eval logs. Car.cs: add a short forward raycast in FixedUpdate to detect barriers the front wheels are pressing against. WheelColliders do not fire OnCollisionEnter/Stay on the car's MonoBehaviour, so nose-first barrier contact was invisible to Car.cs collision callbacks. The raycast fires when throttle > 0.05 and a collider is within 0.8m forward — registers the collision the same way OnCollisionStay does. Co-Authored-By: Claude Sonnet 4.6 --- .../exp24_generated_road_discrete.py | 86 ++++++++++++------- 1 file changed, 56 insertions(+), 30 deletions(-) diff --git a/agent/experiments/exp24_generated_road_discrete.py b/agent/experiments/exp24_generated_road_discrete.py index a0d61b1..7fc61db 100644 --- a/agent/experiments/exp24_generated_road_discrete.py +++ b/agent/experiments/exp24_generated_road_discrete.py @@ -1,18 +1,22 @@ """ -Exp 24: Discrete steering + speed-based stuck detection. +Exp 24: Discrete steering + speed-based stuck detection + road regeneration. What changed from exp23: - Discrete action space: 7 steering bins × 1 throttle = 7 actions. Eliminates Gaussian policy noise that caused rapid steering oscillation. Bins: steer ∈ {-1, -0.67, -0.33, 0, 0.33, 0.67, 1}, throttle=0→clamped to 0.2. - - Speed-based stuck detection: if speed < 0.5 m/s for 2 wall-clock seconds - → terminate. Catches car pinned against a barrier regardless of lateral sliding - (lateral drift was resetting the position-based timer in exp23, leaving the car - against the wall for up to max_episode_seconds). - - max_episode_seconds reduced to 30s (stuck detection catches the bad cases faster; - 120s was a consequence of stuck detection not working, not a design choice). + - Speed-based stuck detection: if speed < 0.5 for 2 wall-clock seconds → terminate. + Catches car pinned against a barrier regardless of lateral sliding. + (WheelColliders don't fire OnCollisionStay on Car.cs — perpendicular contact was + undetectable. Now also fixed in Unity via forward raycast, but Python speed check + is the reliable backstop.) + - Road regeneration: env is closed and reconnected after each 10k-step segment. + Reconnecting reloads the scene → sdsandbox generates a new random road. + Training diversity: each 10k-step segment trains on a different road layout. + Eval is always on a freshly generated road (proper generalization test). + - max_episode_seconds reduced to 30s (speed check handles stuck cases faster). - Single track: generated_road on port 9091. - - Fresh PPO (MlpPolicy not CnnPolicy — Discrete action space, same CNN obs encoder). + - Fresh PPO weights. - Total steps: 200k. """ import os @@ -53,9 +57,10 @@ THROTTLE_MIN = 0.2 LR = 0.0003 TOTAL_STEPS = 200_000 CHECKPOINT_EVERY = 10_000 +SCENE_RELOAD_WAIT = 5.0 # seconds after env.close() for sim to return to menu -N_STEER = 7 # steering bins: -1, -0.67, -0.33, 0, 0.33, 0.67, 1 -N_THROTTLE = 1 # fixed at 0.0 → clamped to THROTTLE_MIN by ThrottleClampWrapper +N_STEER = 7 # steering bins: -1, -0.67, -0.33, 0, 0.33, 0.67, 1 +N_THROTTLE = 1 # fixed at 0.0 → clamped to THROTTLE_MIN by ThrottleClampWrapper # Reward wrapper params (same as exp23 v7) EFFICIENCY_WINDOW = 30 @@ -64,12 +69,15 @@ MAX_CTE = 8.0 MIN_LAP_TIME = 12.0 PROGRESS_PATIENCE = 100 -# StuckTerminationWrapper — speed-based check is the primary stuck detector now +# StuckTerminationWrapper MAX_STUCK_SECONDS = 5.0 # position-based: 0.5m displacement timer -MAX_EPISODE_SECONDS = 30.0 # hard cap (reduced from 120s — speed check handles it) -LOW_SPEED_THRESHOLD = 0.5 # m/s — below this counts as "stuck" +MAX_EPISODE_SECONDS = 30.0 # hard cap +LOW_SPEED_THRESHOLD = 0.5 # below this counts as stuck MAX_LOW_SPEED_SECONDS = 2.0 # seconds at low speed before termination +TRACK_ID = 'donkey-generated-roads-v0' +PORT = 9091 + def log(msg): print(f'[{datetime.now().strftime("%H:%M:%S")}] {msg}', flush=True) @@ -101,27 +109,36 @@ def make_env(track_id, port): return _init -def make_eval_env(track_id, port): - inner = make_env(track_id, port)() - return VecTransposeImage(DummyVecEnv([lambda e=inner: e])) +def connect_env(): + """Connect to the sim. Reloading generates a new random road.""" + new_env = DummyVecEnv([make_env(TRACK_ID, PORT)]) + new_env = VecTransposeImage(new_env) + return new_env + + +def reconnect_env(old_env): + """Close old env, wait for sim to return to menu, reconnect with new road.""" + try: + old_env.close() + except Exception as e: + log(f' env.close() warning: {e}') + time.sleep(SCENE_RELOAD_WAIT) + return connect_env() log('=' * 60) -log('Exp 24: generated_road — discrete steering, speed-based stuck') -log(f' Sim: {HOST}:9091 -> generated_road') +log('Exp 24: generated_road — discrete steering, speed stuck, road regen') +log(f' Sim: {HOST}:{PORT} -> {TRACK_ID}') log(f' Discrete steering: {N_STEER} bins, throttle fixed at {THROTTLE_MIN}') -log(f' throttle_min={THROTTLE_MIN}, lr={LR}, total={TOTAL_STEPS:,}') +log(f' LR={LR}, total={TOTAL_STEPS:,}, checkpoint every {CHECKPOINT_EVERY:,}') log(f' Reward: v7 (speed×CTE, efficiency gate, no-progress kill)') -log(f' Stuck: position≥0.5m/{MAX_STUCK_SECONDS}s OR speed<{LOW_SPEED_THRESHOLD}/{MAX_LOW_SPEED_SECONDS}s') -log(f' Episode cap: {MAX_EPISODE_SECONDS}s (safety net)') -log(f' Checkpoints every {CHECKPOINT_EVERY:,} steps') +log(f' Stuck: position/{MAX_STUCK_SECONDS}s OR speed<{LOW_SPEED_THRESHOLD}/{MAX_LOW_SPEED_SECONDS}s') +log(f' Episode cap: {MAX_EPISODE_SECONDS}s | Road regen: every {CHECKPOINT_EVERY:,} steps') log('=' * 60) -log('Creating DummyVecEnv on generated_road...') -env = DummyVecEnv([make_env('donkey-generated-roads-v0', 9091)]) -env = VecTransposeImage(env) -log(f' VecEnv num_envs={env.num_envs}, obs={env.observation_space.shape}') -log(f' Action space: {env.action_space}') +log('Connecting to sim...') +env = connect_env() +log(f' obs={env.observation_space.shape}, action={env.action_space}') model = PPO( 'CnnPolicy', @@ -141,7 +158,7 @@ model = PPO( with open(_PIDFILE, 'w') as f: f.write(str(os.getpid())) -log(f'Fresh PPO model created (Discrete({N_STEER * N_THROTTLE}) actions). Starting training...') +log(f'Fresh PPO (Discrete({N_STEER * N_THROTTLE})). Starting training...') best_total_steps = float('-inf') best_total_reward = float('-inf') @@ -179,6 +196,13 @@ while steps_done < TOTAL_STEPS: model.save(os.path.join(_SAVE_DIR, 'model')) flog(f'[{steps_done:,}/{TOTAL_STEPS:,}] Checkpoint saved: {ckpt}.zip') + # Reconnect → sim reloads scene → new random road generated. + # Eval runs on this fresh road, then the next training segment uses it too. + flog(f' Reconnecting for fresh road...') + env = reconnect_env(env) + model.set_env(env) + flog(f' Connected (new road)') + try: obs = env.reset() ep_rewards = np.zeros(env.num_envs) @@ -217,7 +241,7 @@ while steps_done < TOTAL_STEPS: env.close() flog('=' * 60) -flog('FINAL EVALUATION: best_model on generated_road') +flog('FINAL EVALUATION: best_model on generated_road (3 fresh roads)') flog('=' * 60) EVAL_SETS = 3 @@ -228,7 +252,9 @@ reward_list = [] for s in range(1, EVAL_SETS + 1): try: - eval_env = make_eval_env('donkey-generated-roads-v0', 9091) + # Each eval set reconnects → different random road + time.sleep(SCENE_RELOAD_WAIT) + eval_env = connect_env() eval_model = PPO.load(best_model_path, env=eval_env, device='cpu') obs = eval_env.reset() done = False