donkeycar-rl-autoresearch/agent/experiments/exp25_wheel_collision_fix.py

277 lines
8.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Exp 25: Wheel OverlapSphere collision fix — same setup as exp24, patched Unity sim.
What changed from exp24:
- Unity Car.cs now has per-wheel OverlapSphere checks in FixedUpdate().
WheelColliders never fire OnCollisionEnter/Stay on the car body. The previous
forward raycast only caught nose-first contact when throttle > 0.05. Now:
* Forward raycast: still fires for fast nose-first approach (before contact)
* Per-wheel OverlapSphere: fires for any wheel touching any barrier, any angle,
regardless of throttle. Filter: only objects with "barrier" in the name.
Both checks call RegisterCollision() → episode terminates immediately on contact.
- Python speed-check backstop unchanged (speed < 0.5 for 2s → terminate).
- Everything else identical to exp24: discrete(7) steering, road regen, LR=0.0003.
- Requires sim restart after rsync of new Assembly-CSharp.dll (built 2026-05-05).
"""
import os
import sys
import time
from datetime import datetime
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
_SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp25-wheel-fix'
_PIDFILE = os.path.join(_SAVE_DIR, 'current.pid')
os.makedirs(_SAVE_DIR, exist_ok=True)
if os.path.exists(_PIDFILE):
try:
_old = int(open(_PIDFILE).read().strip())
if _old != os.getpid():
import signal
os.kill(_old, 0)
print(f'[exp25] Another instance already running (PID {_old}). Exiting.', flush=True)
sys.exit(1)
except (OSError, ValueError):
pass
import gymnasium as gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from discretize_action import DiscretizedActionWrapper
from donkeycar_sb3_runner import ThrottleClampWrapper
from multitrack_runner import StuckTerminationWrapper
from reward_wrapper import SpeedRewardWrapper
HOST = 'localhost'
THROTTLE_MIN = 0.2
LR = 0.0003
TOTAL_STEPS = 200_000
CHECKPOINT_EVERY = 10_000
SCENE_RELOAD_WAIT = 5.0
N_STEER = 7
N_THROTTLE = 1
EFFICIENCY_WINDOW = 30
MIN_EFFICIENCY = 0.15
MAX_CTE = 8.0
MIN_LAP_TIME = 12.0
PROGRESS_PATIENCE = 100
MAX_STUCK_SECONDS = 5.0
MAX_EPISODE_SECONDS = 30.0
LOW_SPEED_THRESHOLD = 0.5
MAX_LOW_SPEED_SECONDS = 2.0
TRACK_ID = 'donkey-generated-roads-v0'
PORT = 9091
def log(msg):
print(f'[{datetime.now().strftime("%H:%M:%S")}] {msg}', flush=True)
def make_env(track_id, port):
def _init():
raw = gym.make(track_id, conf={'host': HOST, 'port': port})
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
env = DiscretizedActionWrapper(env, n_steer=N_STEER, n_throttle=N_THROTTLE)
env = StuckTerminationWrapper(
env,
stuck_steps=40,
min_displacement=0.5,
max_stuck_seconds=MAX_STUCK_SECONDS,
max_episode_seconds=MAX_EPISODE_SECONDS,
low_speed_threshold=LOW_SPEED_THRESHOLD,
max_low_speed_seconds=MAX_LOW_SPEED_SECONDS,
)
env = SpeedRewardWrapper(
env,
window_size=EFFICIENCY_WINDOW,
min_efficiency=MIN_EFFICIENCY,
max_cte=MAX_CTE,
min_lap_time=MIN_LAP_TIME,
progress_patience=PROGRESS_PATIENCE,
)
return env
return _init
def connect_env():
new_env = DummyVecEnv([make_env(TRACK_ID, PORT)])
new_env = VecTransposeImage(new_env)
return new_env
def reconnect_env(old_env):
try:
old_env.close()
except Exception as e:
log(f' env.close() warning: {e}')
time.sleep(SCENE_RELOAD_WAIT)
return connect_env()
log('=' * 60)
log('Exp 25: generated_road — wheel OverlapSphere collision fix')
log(f' Sim: {HOST}:{PORT} -> {TRACK_ID}')
log(f' Discrete steering: {N_STEER} bins, throttle fixed at {THROTTLE_MIN}')
log(f' LR={LR}, total={TOTAL_STEPS:,}, checkpoint every {CHECKPOINT_EVERY:,}')
log(f' Reward: v7 (speed×CTE, efficiency gate, no-progress kill)')
log(f' Stuck: position/{MAX_STUCK_SECONDS}s OR speed<{LOW_SPEED_THRESHOLD}/{MAX_LOW_SPEED_SECONDS}s')
log(f' Episode cap: {MAX_EPISODE_SECONDS}s | Road regen: every {CHECKPOINT_EVERY:,} steps')
log(f' Unity fix: per-wheel OverlapSphere → any-angle barrier detection')
log('=' * 60)
log('Connecting to sim...')
env = connect_env()
log(f' obs={env.observation_space.shape}, action={env.action_space}')
model = PPO(
'CnnPolicy',
env,
learning_rate=LR,
n_steps=2048,
batch_size=64,
n_epochs=10,
gamma=0.99,
gae_lambda=0.95,
clip_range=0.2,
ent_coef=0.01,
verbose=1,
device='cpu',
)
with open(_PIDFILE, 'w') as f:
f.write(str(os.getpid()))
log(f'Fresh PPO (Discrete({N_STEER * N_THROTTLE})). Starting training...')
best_total_steps = float('-inf')
best_total_reward = float('-inf')
steps_done = 0
run_tag = datetime.now().strftime('%Y-%m-%d_%H%M%S') + '_wheel_fix'
log_path = os.path.join(_SAVE_DIR, f'run_{run_tag}.log')
best_model_path = os.path.join(_SAVE_DIR, 'best_model.zip')
import logging
_file_handler = logging.FileHandler(log_path)
_file_handler.setFormatter(logging.Formatter('%(message)s'))
_stream_handler = logging.StreamHandler(sys.stdout)
_stream_handler.setFormatter(logging.Formatter('%(message)s'))
file_log = logging.getLogger('exp25')
file_log.setLevel(logging.INFO)
file_log.propagate = False
file_log.addHandler(_file_handler)
file_log.addHandler(_stream_handler)
def flog(msg):
ts = datetime.now().strftime('%H:%M:%S')
file_log.info(f'[{ts}] {msg}')
flog('=' * 60)
flog(f'Exp 25 started — PID {os.getpid()}')
flog(f'Log: {log_path}')
flog('=' * 60)
while steps_done < TOTAL_STEPS:
seg_steps = min(CHECKPOINT_EVERY, TOTAL_STEPS - steps_done)
model.learn(total_timesteps=seg_steps, reset_num_timesteps=False)
steps_done += seg_steps
ckpt = os.path.join(_SAVE_DIR, f'checkpoint_{steps_done:07d}')
model.save(ckpt)
model.save(os.path.join(_SAVE_DIR, 'model'))
flog(f'[{steps_done:,}/{TOTAL_STEPS:,}] Checkpoint saved: {ckpt}.zip')
flog(f' Reconnecting for fresh road...')
env = reconnect_env(env)
model.set_env(env)
flog(f' Connected (new road)')
try:
obs = env.reset()
ep_rewards = np.zeros(env.num_envs)
ep_steps = np.zeros(env.num_envs)
done_mask = np.zeros(env.num_envs, dtype=bool)
for _ in range(2000):
action, _ = model.predict(obs, deterministic=True)
obs, rewards, dones, infos = env.step(action)
for i in range(env.num_envs):
if not done_mask[i]:
ep_rewards[i] += rewards[i]
ep_steps[i] += 1
if dones[i]:
done_mask[i] = True
if done_mask.all():
break
total_steps_eval = int(ep_steps.sum())
total_reward_eval = float(ep_rewards.sum())
status = '' if ep_steps[0] >= 2000 else f'❌@{int(ep_steps[0])}'
flog(f' Eval: gen_road={total_reward_eval:.1f}r/{int(ep_steps[0])}s {status}')
if (total_steps_eval > best_total_steps
or (total_steps_eval == best_total_steps
and total_reward_eval > best_total_reward)):
best_total_steps = total_steps_eval
best_total_reward = total_reward_eval
model.save(best_model_path)
flog(f' NEW BEST: steps={best_total_steps} reward={best_total_reward:.1f}')
except Exception as e:
flog(f' Eval error: {e}')
env.close()
flog('=' * 60)
flog('FINAL EVALUATION: best_model on generated_road (3 fresh roads)')
flog('=' * 60)
EVAL_SETS = 3
EVAL_MAX_STEPS = 2000
steps_list = []
reward_list = []
for s in range(1, EVAL_SETS + 1):
try:
time.sleep(SCENE_RELOAD_WAIT)
eval_env = connect_env()
eval_model = PPO.load(best_model_path, env=eval_env, device='cpu')
obs = eval_env.reset()
done = False
total_s = 0
total_r = 0.0
while not done and total_s < EVAL_MAX_STEPS:
action, _ = eval_model.predict(obs, deterministic=True)
result = eval_env.step(action)
obs, r, done = result[0], result[1], result[2]
if hasattr(done, '__len__'):
done = bool(done[0])
total_r += float(r) if not hasattr(r, '__len__') else float(r[0])
total_s += 1
status = '' if total_s >= EVAL_MAX_STEPS else f'❌@{total_s}'
flog(f' Set {s}: {total_r:.1f}r / {total_s}s {status}')
steps_list.append(total_s)
reward_list.append(total_r)
eval_env.close()
except Exception as e:
flog(f' Set {s} error: {e}')
if steps_list:
flog(f' Mean: {np.mean(steps_list):.0f} steps / {np.mean(reward_list):.1f} reward')
flog('Exp 25 complete.')