From 0d1acf8cdc8843b56b803e01f742cb14d2262911 Mon Sep 17 00:00:00 2001
From: Paul Huliganga <paje0101@gmail.com>
Date: Tue, 5 May 2026 17:58:33 -0400
Subject: [PATCH] feat(exp24): road regeneration between segments + fix Car.cs
 raycast
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

exp24: reconnect to sim after each 10k-step checkpoint.  Reconnecting reloads
the scene → sdsandbox generates a new random road.  Each training segment and
each checkpoint eval now runs on a different road layout, preventing overfitting
to a single road and giving meaningful generalization metrics in the eval logs.

Car.cs: add a short forward raycast in FixedUpdate to detect barriers the front
wheels are pressing against.  WheelColliders do not fire OnCollisionEnter/Stay on
the car's MonoBehaviour, so nose-first barrier contact was invisible to Car.cs
collision callbacks.  The raycast fires when throttle > 0.05 and a collider is
within 0.8m forward — registers the collision the same way OnCollisionStay does.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../exp24_generated_road_discrete.py          | 86 ++++++++++++-------
 1 file changed, 56 insertions(+), 30 deletions(-)

diff --git a/agent/experiments/exp24_generated_road_discrete.py b/agent/experiments/exp24_generated_road_discrete.py
index a0d61b1..7fc61db 100644
--- a/agent/experiments/exp24_generated_road_discrete.py
+++ b/agent/experiments/exp24_generated_road_discrete.py
@@ -1,18 +1,22 @@
 """
-Exp 24: Discrete steering + speed-based stuck detection.
+Exp 24: Discrete steering + speed-based stuck detection + road regeneration.
 
 What changed from exp23:
   - Discrete action space: 7 steering bins × 1 throttle = 7 actions.
     Eliminates Gaussian policy noise that caused rapid steering oscillation.
     Bins: steer ∈ {-1, -0.67, -0.33, 0, 0.33, 0.67, 1}, throttle=0→clamped to 0.2.
-  - Speed-based stuck detection: if speed < 0.5 m/s for 2 wall-clock seconds
-    → terminate. Catches car pinned against a barrier regardless of lateral sliding
-    (lateral drift was resetting the position-based timer in exp23, leaving the car
-    against the wall for up to max_episode_seconds).
-  - max_episode_seconds reduced to 30s (stuck detection catches the bad cases faster;
-    120s was a consequence of stuck detection not working, not a design choice).
+  - Speed-based stuck detection: if speed < 0.5 for 2 wall-clock seconds → terminate.
+    Catches car pinned against a barrier regardless of lateral sliding.
+    (WheelColliders don't fire OnCollisionStay on Car.cs — perpendicular contact was
+    undetectable. Now also fixed in Unity via forward raycast, but Python speed check
+    is the reliable backstop.)
+  - Road regeneration: env is closed and reconnected after each 10k-step segment.
+    Reconnecting reloads the scene → sdsandbox generates a new random road.
+    Training diversity: each 10k-step segment trains on a different road layout.
+    Eval is always on a freshly generated road (proper generalization test).
+  - max_episode_seconds reduced to 30s (speed check handles stuck cases faster).
   - Single track: generated_road on port 9091.
-  - Fresh PPO (MlpPolicy not CnnPolicy — Discrete action space, same CNN obs encoder).
+  - Fresh PPO weights.
   - Total steps: 200k.
 """
 import os
@@ -53,9 +57,10 @@ THROTTLE_MIN     = 0.2
 LR               = 0.0003
 TOTAL_STEPS      = 200_000
 CHECKPOINT_EVERY = 10_000
+SCENE_RELOAD_WAIT = 5.0   # seconds after env.close() for sim to return to menu
 
-N_STEER  = 7   # steering bins: -1, -0.67, -0.33, 0, 0.33, 0.67, 1
-N_THROTTLE = 1  # fixed at 0.0 → clamped to THROTTLE_MIN by ThrottleClampWrapper
+N_STEER    = 7   # steering bins: -1, -0.67, -0.33, 0, 0.33, 0.67, 1
+N_THROTTLE = 1   # fixed at 0.0 → clamped to THROTTLE_MIN by ThrottleClampWrapper
 
 # Reward wrapper params (same as exp23 v7)
 EFFICIENCY_WINDOW   = 30
@@ -64,12 +69,15 @@ MAX_CTE             = 8.0
 MIN_LAP_TIME        = 12.0
 PROGRESS_PATIENCE   = 100
 
-# StuckTerminationWrapper — speed-based check is the primary stuck detector now
+# StuckTerminationWrapper
 MAX_STUCK_SECONDS       = 5.0    # position-based: 0.5m displacement timer
-MAX_EPISODE_SECONDS     = 30.0   # hard cap (reduced from 120s — speed check handles it)
-LOW_SPEED_THRESHOLD     = 0.5    # m/s — below this counts as "stuck"
+MAX_EPISODE_SECONDS     = 30.0   # hard cap
+LOW_SPEED_THRESHOLD     = 0.5    # below this counts as stuck
 MAX_LOW_SPEED_SECONDS   = 2.0    # seconds at low speed before termination
 
+TRACK_ID = 'donkey-generated-roads-v0'
+PORT     = 9091
+
 
 def log(msg):
     print(f'[{datetime.now().strftime("%H:%M:%S")}] {msg}', flush=True)
@@ -101,27 +109,36 @@ def make_env(track_id, port):
     return _init
 
 
-def make_eval_env(track_id, port):
-    inner = make_env(track_id, port)()
-    return VecTransposeImage(DummyVecEnv([lambda e=inner: e]))
+def connect_env():
+    """Connect to the sim. Reloading generates a new random road."""
+    new_env = DummyVecEnv([make_env(TRACK_ID, PORT)])
+    new_env = VecTransposeImage(new_env)
+    return new_env
+
+
+def reconnect_env(old_env):
+    """Close old env, wait for sim to return to menu, reconnect with new road."""
+    try:
+        old_env.close()
+    except Exception as e:
+        log(f'  env.close() warning: {e}')
+    time.sleep(SCENE_RELOAD_WAIT)
+    return connect_env()
 
 
 log('=' * 60)
-log('Exp 24: generated_road — discrete steering, speed-based stuck')
-log(f'  Sim: {HOST}:9091 -> generated_road')
+log('Exp 24: generated_road — discrete steering, speed stuck, road regen')
+log(f'  Sim: {HOST}:{PORT} -> {TRACK_ID}')
 log(f'  Discrete steering: {N_STEER} bins, throttle fixed at {THROTTLE_MIN}')
-log(f'  throttle_min={THROTTLE_MIN}, lr={LR}, total={TOTAL_STEPS:,}')
+log(f'  LR={LR}, total={TOTAL_STEPS:,}, checkpoint every {CHECKPOINT_EVERY:,}')
 log(f'  Reward: v7 (speed×CTE, efficiency gate, no-progress kill)')
-log(f'  Stuck: position≥0.5m/{MAX_STUCK_SECONDS}s OR speed<{LOW_SPEED_THRESHOLD}/{MAX_LOW_SPEED_SECONDS}s')
-log(f'  Episode cap: {MAX_EPISODE_SECONDS}s (safety net)')
-log(f'  Checkpoints every {CHECKPOINT_EVERY:,} steps')
+log(f'  Stuck: position/{MAX_STUCK_SECONDS}s OR speed<{LOW_SPEED_THRESHOLD}/{MAX_LOW_SPEED_SECONDS}s')
+log(f'  Episode cap: {MAX_EPISODE_SECONDS}s | Road regen: every {CHECKPOINT_EVERY:,} steps')
 log('=' * 60)
 
-log('Creating DummyVecEnv on generated_road...')
-env = DummyVecEnv([make_env('donkey-generated-roads-v0', 9091)])
-env = VecTransposeImage(env)
-log(f'  VecEnv num_envs={env.num_envs}, obs={env.observation_space.shape}')
-log(f'  Action space: {env.action_space}')
+log('Connecting to sim...')
+env = connect_env()
+log(f'  obs={env.observation_space.shape}, action={env.action_space}')
 
 model = PPO(
     'CnnPolicy',
@@ -141,7 +158,7 @@ model = PPO(
 with open(_PIDFILE, 'w') as f:
     f.write(str(os.getpid()))
 
-log(f'Fresh PPO model created (Discrete({N_STEER * N_THROTTLE}) actions). Starting training...')
+log(f'Fresh PPO (Discrete({N_STEER * N_THROTTLE})). Starting training...')
 
 best_total_steps  = float('-inf')
 best_total_reward = float('-inf')
@@ -179,6 +196,13 @@ while steps_done < TOTAL_STEPS:
     model.save(os.path.join(_SAVE_DIR, 'model'))
     flog(f'[{steps_done:,}/{TOTAL_STEPS:,}] Checkpoint saved: {ckpt}.zip')
 
+    # Reconnect → sim reloads scene → new random road generated.
+    # Eval runs on this fresh road, then the next training segment uses it too.
+    flog(f'  Reconnecting for fresh road...')
+    env = reconnect_env(env)
+    model.set_env(env)
+    flog(f'  Connected (new road)')
+
     try:
         obs = env.reset()
         ep_rewards = np.zeros(env.num_envs)
@@ -217,7 +241,7 @@ while steps_done < TOTAL_STEPS:
 env.close()
 
 flog('=' * 60)
-flog('FINAL EVALUATION: best_model on generated_road')
+flog('FINAL EVALUATION: best_model on generated_road (3 fresh roads)')
 flog('=' * 60)
 
 EVAL_SETS      = 3
@@ -228,7 +252,9 @@ reward_list = []
 
 for s in range(1, EVAL_SETS + 1):
     try:
-        eval_env   = make_eval_env('donkey-generated-roads-v0', 9091)
+        # Each eval set reconnects → different random road
+        time.sleep(SCENE_RELOAD_WAIT)
+        eval_env   = connect_env()
         eval_model = PPO.load(best_model_path, env=eval_env, device='cpu')
         obs        = eval_env.reset()
         done       = False