feat(exp24): road regeneration between segments + fix Car.cs raycast
exp24: reconnect to sim after each 10k-step checkpoint. Reconnecting reloads the scene → sdsandbox generates a new random road. Each training segment and each checkpoint eval now runs on a different road layout, preventing overfitting to a single road and giving meaningful generalization metrics in the eval logs. Car.cs: add a short forward raycast in FixedUpdate to detect barriers the front wheels are pressing against. WheelColliders do not fire OnCollisionEnter/Stay on the car's MonoBehaviour, so nose-first barrier contact was invisible to Car.cs collision callbacks. The raycast fires when throttle > 0.05 and a collider is within 0.8m forward — registers the collision the same way OnCollisionStay does. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
924615ca60
commit
0d1acf8cdc
|
|
@ -1,18 +1,22 @@
|
||||||
"""
|
"""
|
||||||
Exp 24: Discrete steering + speed-based stuck detection.
|
Exp 24: Discrete steering + speed-based stuck detection + road regeneration.
|
||||||
|
|
||||||
What changed from exp23:
|
What changed from exp23:
|
||||||
- Discrete action space: 7 steering bins × 1 throttle = 7 actions.
|
- Discrete action space: 7 steering bins × 1 throttle = 7 actions.
|
||||||
Eliminates Gaussian policy noise that caused rapid steering oscillation.
|
Eliminates Gaussian policy noise that caused rapid steering oscillation.
|
||||||
Bins: steer ∈ {-1, -0.67, -0.33, 0, 0.33, 0.67, 1}, throttle=0→clamped to 0.2.
|
Bins: steer ∈ {-1, -0.67, -0.33, 0, 0.33, 0.67, 1}, throttle=0→clamped to 0.2.
|
||||||
- Speed-based stuck detection: if speed < 0.5 m/s for 2 wall-clock seconds
|
- Speed-based stuck detection: if speed < 0.5 for 2 wall-clock seconds → terminate.
|
||||||
→ terminate. Catches car pinned against a barrier regardless of lateral sliding
|
Catches car pinned against a barrier regardless of lateral sliding.
|
||||||
(lateral drift was resetting the position-based timer in exp23, leaving the car
|
(WheelColliders don't fire OnCollisionStay on Car.cs — perpendicular contact was
|
||||||
against the wall for up to max_episode_seconds).
|
undetectable. Now also fixed in Unity via forward raycast, but Python speed check
|
||||||
- max_episode_seconds reduced to 30s (stuck detection catches the bad cases faster;
|
is the reliable backstop.)
|
||||||
120s was a consequence of stuck detection not working, not a design choice).
|
- Road regeneration: env is closed and reconnected after each 10k-step segment.
|
||||||
|
Reconnecting reloads the scene → sdsandbox generates a new random road.
|
||||||
|
Training diversity: each 10k-step segment trains on a different road layout.
|
||||||
|
Eval is always on a freshly generated road (proper generalization test).
|
||||||
|
- max_episode_seconds reduced to 30s (speed check handles stuck cases faster).
|
||||||
- Single track: generated_road on port 9091.
|
- Single track: generated_road on port 9091.
|
||||||
- Fresh PPO (MlpPolicy not CnnPolicy — Discrete action space, same CNN obs encoder).
|
- Fresh PPO weights.
|
||||||
- Total steps: 200k.
|
- Total steps: 200k.
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
|
|
@ -53,9 +57,10 @@ THROTTLE_MIN = 0.2
|
||||||
LR = 0.0003
|
LR = 0.0003
|
||||||
TOTAL_STEPS = 200_000
|
TOTAL_STEPS = 200_000
|
||||||
CHECKPOINT_EVERY = 10_000
|
CHECKPOINT_EVERY = 10_000
|
||||||
|
SCENE_RELOAD_WAIT = 5.0 # seconds after env.close() for sim to return to menu
|
||||||
|
|
||||||
N_STEER = 7 # steering bins: -1, -0.67, -0.33, 0, 0.33, 0.67, 1
|
N_STEER = 7 # steering bins: -1, -0.67, -0.33, 0, 0.33, 0.67, 1
|
||||||
N_THROTTLE = 1 # fixed at 0.0 → clamped to THROTTLE_MIN by ThrottleClampWrapper
|
N_THROTTLE = 1 # fixed at 0.0 → clamped to THROTTLE_MIN by ThrottleClampWrapper
|
||||||
|
|
||||||
# Reward wrapper params (same as exp23 v7)
|
# Reward wrapper params (same as exp23 v7)
|
||||||
EFFICIENCY_WINDOW = 30
|
EFFICIENCY_WINDOW = 30
|
||||||
|
|
@ -64,12 +69,15 @@ MAX_CTE = 8.0
|
||||||
MIN_LAP_TIME = 12.0
|
MIN_LAP_TIME = 12.0
|
||||||
PROGRESS_PATIENCE = 100
|
PROGRESS_PATIENCE = 100
|
||||||
|
|
||||||
# StuckTerminationWrapper — speed-based check is the primary stuck detector now
|
# StuckTerminationWrapper
|
||||||
MAX_STUCK_SECONDS = 5.0 # position-based: 0.5m displacement timer
|
MAX_STUCK_SECONDS = 5.0 # position-based: 0.5m displacement timer
|
||||||
MAX_EPISODE_SECONDS = 30.0 # hard cap (reduced from 120s — speed check handles it)
|
MAX_EPISODE_SECONDS = 30.0 # hard cap
|
||||||
LOW_SPEED_THRESHOLD = 0.5 # m/s — below this counts as "stuck"
|
LOW_SPEED_THRESHOLD = 0.5 # below this counts as stuck
|
||||||
MAX_LOW_SPEED_SECONDS = 2.0 # seconds at low speed before termination
|
MAX_LOW_SPEED_SECONDS = 2.0 # seconds at low speed before termination
|
||||||
|
|
||||||
|
TRACK_ID = 'donkey-generated-roads-v0'
|
||||||
|
PORT = 9091
|
||||||
|
|
||||||
|
|
||||||
def log(msg):
|
def log(msg):
|
||||||
print(f'[{datetime.now().strftime("%H:%M:%S")}] {msg}', flush=True)
|
print(f'[{datetime.now().strftime("%H:%M:%S")}] {msg}', flush=True)
|
||||||
|
|
@ -101,27 +109,36 @@ def make_env(track_id, port):
|
||||||
return _init
|
return _init
|
||||||
|
|
||||||
|
|
||||||
def make_eval_env(track_id, port):
|
def connect_env():
|
||||||
inner = make_env(track_id, port)()
|
"""Connect to the sim. Reloading generates a new random road."""
|
||||||
return VecTransposeImage(DummyVecEnv([lambda e=inner: e]))
|
new_env = DummyVecEnv([make_env(TRACK_ID, PORT)])
|
||||||
|
new_env = VecTransposeImage(new_env)
|
||||||
|
return new_env
|
||||||
|
|
||||||
|
|
||||||
|
def reconnect_env(old_env):
|
||||||
|
"""Close old env, wait for sim to return to menu, reconnect with new road."""
|
||||||
|
try:
|
||||||
|
old_env.close()
|
||||||
|
except Exception as e:
|
||||||
|
log(f' env.close() warning: {e}')
|
||||||
|
time.sleep(SCENE_RELOAD_WAIT)
|
||||||
|
return connect_env()
|
||||||
|
|
||||||
|
|
||||||
log('=' * 60)
|
log('=' * 60)
|
||||||
log('Exp 24: generated_road — discrete steering, speed-based stuck')
|
log('Exp 24: generated_road — discrete steering, speed stuck, road regen')
|
||||||
log(f' Sim: {HOST}:9091 -> generated_road')
|
log(f' Sim: {HOST}:{PORT} -> {TRACK_ID}')
|
||||||
log(f' Discrete steering: {N_STEER} bins, throttle fixed at {THROTTLE_MIN}')
|
log(f' Discrete steering: {N_STEER} bins, throttle fixed at {THROTTLE_MIN}')
|
||||||
log(f' throttle_min={THROTTLE_MIN}, lr={LR}, total={TOTAL_STEPS:,}')
|
log(f' LR={LR}, total={TOTAL_STEPS:,}, checkpoint every {CHECKPOINT_EVERY:,}')
|
||||||
log(f' Reward: v7 (speed×CTE, efficiency gate, no-progress kill)')
|
log(f' Reward: v7 (speed×CTE, efficiency gate, no-progress kill)')
|
||||||
log(f' Stuck: position≥0.5m/{MAX_STUCK_SECONDS}s OR speed<{LOW_SPEED_THRESHOLD}/{MAX_LOW_SPEED_SECONDS}s')
|
log(f' Stuck: position/{MAX_STUCK_SECONDS}s OR speed<{LOW_SPEED_THRESHOLD}/{MAX_LOW_SPEED_SECONDS}s')
|
||||||
log(f' Episode cap: {MAX_EPISODE_SECONDS}s (safety net)')
|
log(f' Episode cap: {MAX_EPISODE_SECONDS}s | Road regen: every {CHECKPOINT_EVERY:,} steps')
|
||||||
log(f' Checkpoints every {CHECKPOINT_EVERY:,} steps')
|
|
||||||
log('=' * 60)
|
log('=' * 60)
|
||||||
|
|
||||||
log('Creating DummyVecEnv on generated_road...')
|
log('Connecting to sim...')
|
||||||
env = DummyVecEnv([make_env('donkey-generated-roads-v0', 9091)])
|
env = connect_env()
|
||||||
env = VecTransposeImage(env)
|
log(f' obs={env.observation_space.shape}, action={env.action_space}')
|
||||||
log(f' VecEnv num_envs={env.num_envs}, obs={env.observation_space.shape}')
|
|
||||||
log(f' Action space: {env.action_space}')
|
|
||||||
|
|
||||||
model = PPO(
|
model = PPO(
|
||||||
'CnnPolicy',
|
'CnnPolicy',
|
||||||
|
|
@ -141,7 +158,7 @@ model = PPO(
|
||||||
with open(_PIDFILE, 'w') as f:
|
with open(_PIDFILE, 'w') as f:
|
||||||
f.write(str(os.getpid()))
|
f.write(str(os.getpid()))
|
||||||
|
|
||||||
log(f'Fresh PPO model created (Discrete({N_STEER * N_THROTTLE}) actions). Starting training...')
|
log(f'Fresh PPO (Discrete({N_STEER * N_THROTTLE})). Starting training...')
|
||||||
|
|
||||||
best_total_steps = float('-inf')
|
best_total_steps = float('-inf')
|
||||||
best_total_reward = float('-inf')
|
best_total_reward = float('-inf')
|
||||||
|
|
@ -179,6 +196,13 @@ while steps_done < TOTAL_STEPS:
|
||||||
model.save(os.path.join(_SAVE_DIR, 'model'))
|
model.save(os.path.join(_SAVE_DIR, 'model'))
|
||||||
flog(f'[{steps_done:,}/{TOTAL_STEPS:,}] Checkpoint saved: {ckpt}.zip')
|
flog(f'[{steps_done:,}/{TOTAL_STEPS:,}] Checkpoint saved: {ckpt}.zip')
|
||||||
|
|
||||||
|
# Reconnect → sim reloads scene → new random road generated.
|
||||||
|
# Eval runs on this fresh road, then the next training segment uses it too.
|
||||||
|
flog(f' Reconnecting for fresh road...')
|
||||||
|
env = reconnect_env(env)
|
||||||
|
model.set_env(env)
|
||||||
|
flog(f' Connected (new road)')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
ep_rewards = np.zeros(env.num_envs)
|
ep_rewards = np.zeros(env.num_envs)
|
||||||
|
|
@ -217,7 +241,7 @@ while steps_done < TOTAL_STEPS:
|
||||||
env.close()
|
env.close()
|
||||||
|
|
||||||
flog('=' * 60)
|
flog('=' * 60)
|
||||||
flog('FINAL EVALUATION: best_model on generated_road')
|
flog('FINAL EVALUATION: best_model on generated_road (3 fresh roads)')
|
||||||
flog('=' * 60)
|
flog('=' * 60)
|
||||||
|
|
||||||
EVAL_SETS = 3
|
EVAL_SETS = 3
|
||||||
|
|
@ -228,7 +252,9 @@ reward_list = []
|
||||||
|
|
||||||
for s in range(1, EVAL_SETS + 1):
|
for s in range(1, EVAL_SETS + 1):
|
||||||
try:
|
try:
|
||||||
eval_env = make_eval_env('donkey-generated-roads-v0', 9091)
|
# Each eval set reconnects → different random road
|
||||||
|
time.sleep(SCENE_RELOAD_WAIT)
|
||||||
|
eval_env = connect_env()
|
||||||
eval_model = PPO.load(best_model_path, env=eval_env, device='cpu')
|
eval_model = PPO.load(best_model_path, env=eval_env, device='cpu')
|
||||||
obs = eval_env.reset()
|
obs = eval_env.reset()
|
||||||
done = False
|
done = False
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue