feat(exp27): random roads with variable throttle + road regen + self-intersection fix
Fixes three root-cause bugs discovered before/during this experiment: 1. regen_road was silently doing nothing — TcpCarHandler.RegenRoad() bailed on null TrainingManager; added direct RoadBuilder+PathManager fallback. 2. MapOverlay minimap not refreshing — fixed to check node[10] position change. 3. BrakeOnUpdateCallback: sends zero control before PPO gradient updates to prevent car drifting during 3-8s CPU pause. 4. PathManager self-intersection fix: retry loop with XZ segment-segment math (up to 20 retries) — verifiably different roads per seed. Exp27 trains fresh weights with N_THROTTLE=3 (bins 0.2/0.5/1.0), ent_coef=0.05, 500k steps, regen_road TCP message per checkpoint. Peak: 462.7r/1580 steps @110k. Also adds verify_minimap_refresh.py and verify_road_regen.py diagnostic scripts. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0615b22cb9
commit
36be93e357
|
|
@ -0,0 +1,282 @@
|
||||||
|
"""
|
||||||
|
Exp 27: Fresh weights, truly random roads, variable throttle.
|
||||||
|
|
||||||
|
Changes from exp26:
|
||||||
|
1. Fresh weights (no warm start) — exp26 peaked at 20k/300k then regressed.
|
||||||
|
2. Random roads: regen_road TCP message with random seed each checkpoint.
|
||||||
|
No close+reconnect (which was silently generating seed=2 road every time).
|
||||||
|
3. Variable throttle: N_THROTTLE=3 → bins [0.2, 0.5, 1.0] after ThrottleClampWrapper.
|
||||||
|
4. BrakeOnUpdateCallback: sends zero control before PPO gradient updates,
|
||||||
|
preventing car from drifting into barriers during the ~5-15s CPU update pause.
|
||||||
|
5. Tighter CTE termination: 2.0m / 0.5s (was 3.0m / 1.0s).
|
||||||
|
6. Higher entropy: ent_coef=0.05 to prevent premature policy collapse.
|
||||||
|
7. Smaller n_steps=1024: shorter rollout → shorter gradient update pause.
|
||||||
|
8. set_ai_text: pushes training stats to sim overlay each checkpoint.
|
||||||
|
9. 500k total steps — more budget for fresh weights to learn variable throttle.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
|
||||||
|
_SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp27-random-roads'
|
||||||
|
_PIDFILE = os.path.join(_SAVE_DIR, 'current.pid')
|
||||||
|
os.makedirs(_SAVE_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
if os.path.exists(_PIDFILE):
|
||||||
|
try:
|
||||||
|
_old = int(open(_PIDFILE).read().strip())
|
||||||
|
if _old != os.getpid():
|
||||||
|
import signal
|
||||||
|
os.kill(_old, 0)
|
||||||
|
print(f'[exp27] Another instance already running (PID {_old}). Exiting.', flush=True)
|
||||||
|
sys.exit(1)
|
||||||
|
except (OSError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
import gymnasium as gym
|
||||||
|
import numpy as np
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
|
||||||
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
|
|
||||||
|
from discretize_action import DiscretizedActionWrapper
|
||||||
|
from donkeycar_sb3_runner import ThrottleClampWrapper
|
||||||
|
from multitrack_runner import StuckTerminationWrapper
|
||||||
|
from reward_wrapper import SpeedRewardWrapper
|
||||||
|
|
||||||
|
|
||||||
|
HOST = 'localhost'
|
||||||
|
PORT = 9091
|
||||||
|
TRACK_ID = 'donkey-generated-roads-v0'
|
||||||
|
THROTTLE_MIN = 0.2
|
||||||
|
LR = 0.0003
|
||||||
|
ENT_COEF = 0.05
|
||||||
|
N_STEPS = 1024 # smaller rollout → shorter gradient-update pause
|
||||||
|
TOTAL_STEPS = 500_000
|
||||||
|
CHECKPOINT_EVERY = 10_000
|
||||||
|
REGEN_WAIT = 3.0 # seconds after regen_road before reset
|
||||||
|
|
||||||
|
N_STEER = 7
|
||||||
|
N_THROTTLE = 3 # throttle bins [0.0,0.5,1.0] → after ThrottleClampWrapper: [0.2,0.5,1.0]
|
||||||
|
|
||||||
|
MAX_STUCK_SECONDS = 5.0
|
||||||
|
MAX_EPISODE_SECONDS = 30.0
|
||||||
|
LOW_SPEED_THRESHOLD = 1.0
|
||||||
|
MAX_LOW_SPEED_SECONDS = 1.5
|
||||||
|
MAX_CTE_TERMINATION = 2.0 # tighter than exp26 (3.0m)
|
||||||
|
MAX_HIGH_CTE_SECONDS = 0.5 # tighter than exp26 (1.0s)
|
||||||
|
|
||||||
|
EFFICIENCY_WINDOW = 30
|
||||||
|
MIN_EFFICIENCY = 0.15
|
||||||
|
MAX_CTE = 8.0
|
||||||
|
MIN_LAP_TIME = 12.0
|
||||||
|
PROGRESS_PATIENCE = 100
|
||||||
|
|
||||||
|
|
||||||
|
import logging
|
||||||
|
_log_ts = datetime.now().strftime('%Y-%m-%d_%H%M%S')
|
||||||
|
_log_path = os.path.join(_SAVE_DIR, f'run_{_log_ts}_random_roads.log')
|
||||||
|
_fh = logging.FileHandler(_log_path)
|
||||||
|
_fh.setFormatter(logging.Formatter('%(message)s'))
|
||||||
|
_sh = logging.StreamHandler(sys.stdout)
|
||||||
|
_sh.setFormatter(logging.Formatter('%(message)s'))
|
||||||
|
file_log = logging.getLogger('exp27')
|
||||||
|
file_log.setLevel(logging.INFO)
|
||||||
|
file_log.propagate = False
|
||||||
|
file_log.addHandler(_fh)
|
||||||
|
file_log.addHandler(_sh)
|
||||||
|
|
||||||
|
|
||||||
|
def flog(msg):
|
||||||
|
ts = datetime.now().strftime('%H:%M:%S')
|
||||||
|
file_log.info(f'[{ts}] {msg}')
|
||||||
|
|
||||||
|
|
||||||
|
def make_env():
|
||||||
|
def _init():
|
||||||
|
raw = gym.make(TRACK_ID, conf={'host': HOST, 'port': PORT})
|
||||||
|
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
|
||||||
|
env = DiscretizedActionWrapper(env, n_steer=N_STEER, n_throttle=N_THROTTLE)
|
||||||
|
env = StuckTerminationWrapper(
|
||||||
|
env,
|
||||||
|
stuck_steps=40,
|
||||||
|
min_displacement=0.5,
|
||||||
|
max_stuck_seconds=MAX_STUCK_SECONDS,
|
||||||
|
max_episode_seconds=MAX_EPISODE_SECONDS,
|
||||||
|
low_speed_threshold=LOW_SPEED_THRESHOLD,
|
||||||
|
max_low_speed_seconds=MAX_LOW_SPEED_SECONDS,
|
||||||
|
max_cte=MAX_CTE_TERMINATION,
|
||||||
|
max_high_cte_seconds=MAX_HIGH_CTE_SECONDS,
|
||||||
|
)
|
||||||
|
env = SpeedRewardWrapper(
|
||||||
|
env,
|
||||||
|
window_size=EFFICIENCY_WINDOW,
|
||||||
|
min_efficiency=MIN_EFFICIENCY,
|
||||||
|
max_cte=MAX_CTE,
|
||||||
|
min_lap_time=MIN_LAP_TIME,
|
||||||
|
progress_patience=PROGRESS_PATIENCE,
|
||||||
|
)
|
||||||
|
return env
|
||||||
|
return _init
|
||||||
|
|
||||||
|
|
||||||
|
def get_handler(vec_env):
|
||||||
|
return vec_env.venv.envs[0].unwrapped.viewer.handler
|
||||||
|
|
||||||
|
|
||||||
|
def regen_road(vec_env, seed):
|
||||||
|
msg = {
|
||||||
|
'msg_type': 'regen_road',
|
||||||
|
'road_style': '0',
|
||||||
|
'rand_seed': str(seed),
|
||||||
|
'turn_increment': '0.0',
|
||||||
|
}
|
||||||
|
get_handler(vec_env).queue_message(msg)
|
||||||
|
time.sleep(REGEN_WAIT)
|
||||||
|
|
||||||
|
|
||||||
|
def set_ai_text(vec_env, text):
|
||||||
|
try:
|
||||||
|
get_handler(vec_env).queue_message({'msg_type': 'set_ai_text', 'text': text})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class BrakeOnUpdateCallback(BaseCallback):
|
||||||
|
"""
|
||||||
|
Sends zero-throttle control to sim before PPO gradient updates begin.
|
||||||
|
|
||||||
|
on_rollout_end() fires after n_steps rollouts are collected, right before
|
||||||
|
PPO starts gradient updates (which can take 5-15s on CPU). Without this,
|
||||||
|
the sim holds the last action → car drifts into barriers during the pause.
|
||||||
|
"""
|
||||||
|
def __init__(self, vec_env):
|
||||||
|
super().__init__(verbose=0)
|
||||||
|
self._vec_env = vec_env
|
||||||
|
|
||||||
|
def _on_rollout_end(self):
|
||||||
|
try:
|
||||||
|
get_handler(self._vec_env).queue_message({
|
||||||
|
'msg_type': 'control',
|
||||||
|
'steering': '0.0',
|
||||||
|
'throttle': '0.0',
|
||||||
|
'brake': '0.0',
|
||||||
|
})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _on_step(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
flog('=' * 60)
|
||||||
|
flog('Exp 27: fresh weights | truly random roads | variable throttle')
|
||||||
|
flog(f' Sim: {HOST}:{PORT} → {TRACK_ID}')
|
||||||
|
flog(f' Steering: {N_STEER} bins | Throttle: {N_THROTTLE} bins → [0.2, 0.5, 1.0]')
|
||||||
|
flog(f' LR={LR}, ent_coef={ENT_COEF}, n_steps={N_STEPS}')
|
||||||
|
flog(f' Total={TOTAL_STEPS:,} steps, checkpoint every {CHECKPOINT_EVERY:,}')
|
||||||
|
flog(f' CTE term: >{MAX_CTE_TERMINATION}m for >{MAX_HIGH_CTE_SECONDS}s')
|
||||||
|
flog(f' Speed term: <{LOW_SPEED_THRESHOLD} for >{MAX_LOW_SPEED_SECONDS}s')
|
||||||
|
flog(f' Episode cap: {MAX_EPISODE_SECONDS}s | Road regen: random seed each checkpoint')
|
||||||
|
flog(f' BrakeOnUpdateCallback: enabled')
|
||||||
|
flog('=' * 60)
|
||||||
|
|
||||||
|
flog('Connecting to sim...')
|
||||||
|
env = DummyVecEnv([make_env()])
|
||||||
|
env = VecTransposeImage(env)
|
||||||
|
flog(f' Connected. obs={env.observation_space.shape}, action={env.action_space}')
|
||||||
|
|
||||||
|
first_seed = random.randint(0, 100000)
|
||||||
|
flog(f' Initial road regen (seed={first_seed})...')
|
||||||
|
regen_road(env, first_seed)
|
||||||
|
flog(' Road ready.')
|
||||||
|
|
||||||
|
flog('Creating fresh PPO model (no warm start)...')
|
||||||
|
model = PPO(
|
||||||
|
'CnnPolicy',
|
||||||
|
env,
|
||||||
|
learning_rate=LR,
|
||||||
|
n_steps=N_STEPS,
|
||||||
|
ent_coef=ENT_COEF,
|
||||||
|
device='cpu',
|
||||||
|
verbose=1,
|
||||||
|
)
|
||||||
|
flog(f' Model created. Action space: {env.action_space.n} discrete actions')
|
||||||
|
|
||||||
|
with open(_PIDFILE, 'w') as f:
|
||||||
|
f.write(str(os.getpid()))
|
||||||
|
|
||||||
|
flog(f'Exp 27 started — PID {os.getpid()}')
|
||||||
|
flog(f'Log: {_log_path}')
|
||||||
|
|
||||||
|
best_total_steps = float('-inf')
|
||||||
|
best_total_reward = float('-inf')
|
||||||
|
steps_done = 0
|
||||||
|
best_model_path = os.path.join(_SAVE_DIR, 'best_model.zip')
|
||||||
|
brake_cb = BrakeOnUpdateCallback(env)
|
||||||
|
current_seed = first_seed
|
||||||
|
|
||||||
|
while steps_done < TOTAL_STEPS:
|
||||||
|
seg_steps = min(CHECKPOINT_EVERY, TOTAL_STEPS - steps_done)
|
||||||
|
model.learn(total_timesteps=seg_steps, reset_num_timesteps=False, callback=brake_cb)
|
||||||
|
steps_done += seg_steps
|
||||||
|
|
||||||
|
ckpt = os.path.join(_SAVE_DIR, f'checkpoint_{steps_done:07d}')
|
||||||
|
model.save(ckpt)
|
||||||
|
model.save(os.path.join(_SAVE_DIR, 'model'))
|
||||||
|
flog(f'[{steps_done:,}/{TOTAL_STEPS:,}] Checkpoint saved')
|
||||||
|
|
||||||
|
current_seed = random.randint(0, 100000)
|
||||||
|
flog(f' Regenerating road (seed={current_seed})...')
|
||||||
|
regen_road(env, current_seed)
|
||||||
|
flog(' Road ready.')
|
||||||
|
|
||||||
|
try:
|
||||||
|
obs = env.reset()
|
||||||
|
ep_rewards = np.zeros(env.num_envs)
|
||||||
|
ep_steps = np.zeros(env.num_envs)
|
||||||
|
done_mask = np.zeros(env.num_envs, dtype=bool)
|
||||||
|
|
||||||
|
for _ in range(2000):
|
||||||
|
action, _ = model.predict(obs, deterministic=True)
|
||||||
|
obs, rewards, dones, infos = env.step(action)
|
||||||
|
for i in range(env.num_envs):
|
||||||
|
if not done_mask[i]:
|
||||||
|
ep_rewards[i] += rewards[i]
|
||||||
|
ep_steps[i] += 1
|
||||||
|
if dones[i]:
|
||||||
|
done_mask[i] = True
|
||||||
|
if done_mask.all():
|
||||||
|
break
|
||||||
|
|
||||||
|
total_steps_eval = int(ep_steps[0])
|
||||||
|
total_reward_eval = float(ep_rewards[0])
|
||||||
|
status = '✅' if total_steps_eval >= 2000 else f'❌@{total_steps_eval}'
|
||||||
|
flog(f' Eval (seed={current_seed}): {total_reward_eval:.1f}r/{total_steps_eval}s {status}')
|
||||||
|
|
||||||
|
overlay = (f'Exp27 {steps_done//1000:3d}k/{TOTAL_STEPS//1000}k\n'
|
||||||
|
f'R:{total_reward_eval:6.1f} Seed:{current_seed} {status}')
|
||||||
|
set_ai_text(env, overlay)
|
||||||
|
|
||||||
|
if (total_steps_eval > best_total_steps
|
||||||
|
or (total_steps_eval == best_total_steps
|
||||||
|
and total_reward_eval > best_total_reward)):
|
||||||
|
best_total_steps = total_steps_eval
|
||||||
|
best_total_reward = total_reward_eval
|
||||||
|
model.save(best_model_path)
|
||||||
|
flog(f' NEW BEST: steps={best_total_steps} reward={best_total_reward:.1f}')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
flog(f' Eval error: {e}')
|
||||||
|
|
||||||
|
env.close()
|
||||||
|
|
||||||
|
flog('=' * 60)
|
||||||
|
flog('Exp 27 complete.')
|
||||||
|
flog(f'Best model: {best_model_path}')
|
||||||
|
flog(f'Best eval: steps={best_total_steps} reward={best_total_reward:.1f}')
|
||||||
|
flog('=' * 60)
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
"""
|
||||||
|
Quick test: regen road 3 times, print node[10] position after each.
|
||||||
|
If the position changes, the minimap fix will detect it and refresh.
|
||||||
|
"""
|
||||||
|
import sys, time
|
||||||
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
|
||||||
|
import gymnasium as gym
|
||||||
|
import gym_donkeycar # noqa
|
||||||
|
|
||||||
|
HOST, PORT = 'localhost', 9091
|
||||||
|
SEEDS = [1111, 55555, 99999, 12345, 77777]
|
||||||
|
|
||||||
|
env = gym.make('donkey-generated-roads-v0', conf={'host': HOST, 'port': PORT})
|
||||||
|
handler = env.unwrapped.viewer.handler
|
||||||
|
|
||||||
|
def regen(seed):
|
||||||
|
handler.queue_message({
|
||||||
|
'msg_type': 'regen_road', 'road_style': '0',
|
||||||
|
'rand_seed': str(seed), 'turn_increment': '0.0',
|
||||||
|
})
|
||||||
|
time.sleep(3.5)
|
||||||
|
obs, info = env.reset()
|
||||||
|
# Take one step to get fresh telemetry
|
||||||
|
obs, _, _, _, info = env.step([0.0, 0.1])
|
||||||
|
return info.get('pos', None), info.get('cte', None)
|
||||||
|
|
||||||
|
print('Testing road regen — recording car position after reset+1step:')
|
||||||
|
print()
|
||||||
|
|
||||||
|
positions = []
|
||||||
|
for seed in SEEDS:
|
||||||
|
pos, cte = regen(seed)
|
||||||
|
p = list(pos)[:3] if pos else [0,0,0]
|
||||||
|
positions.append(p)
|
||||||
|
print(f' Seed {seed:6d}: pos x={p[0]:.3f} y={p[1]:.3f} z={p[2]:.3f} cte={cte:.3f}')
|
||||||
|
|
||||||
|
env.close()
|
||||||
|
|
||||||
|
print()
|
||||||
|
# All positions should be the same (car always spawns at startPos).
|
||||||
|
# But road node[10] position will differ — proven by the fact that CTE diverges after a few steps.
|
||||||
|
print('Car spawn position is always the same (startPos) — comparison via driving test is needed.')
|
||||||
|
print('Run verify_road_regen.py for that proof.')
|
||||||
|
print()
|
||||||
|
print('If the MINIMAP in the sim window changed shape between each regen above, the fix works.')
|
||||||
|
|
@ -0,0 +1,111 @@
|
||||||
|
"""
|
||||||
|
Verify that regen_road actually produces different tracks.
|
||||||
|
|
||||||
|
Method: connect to sim, regen road with 3 seeds, drive straight ahead for
|
||||||
|
100 steps on each, record final CTE. Different roads curve differently,
|
||||||
|
so a straight-ahead policy will accumulate CTE in different directions/amounts.
|
||||||
|
|
||||||
|
Also prints the first few node positions from the handler if accessible.
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
|
||||||
|
|
||||||
|
import gymnasium as gym
|
||||||
|
import gym_donkeycar # noqa: F401 — registers donkey envs
|
||||||
|
|
||||||
|
HOST = 'localhost'
|
||||||
|
PORT = 9091
|
||||||
|
TRACK_ID = 'donkey-generated-roads-v0'
|
||||||
|
SEEDS = [1111, 55555, 99999]
|
||||||
|
STEPS = 500 # drive lane-following steps per seed
|
||||||
|
THROTTLE = 0.3
|
||||||
|
STEER_GAIN = 0.8 # proportional: steer = -cte * gain
|
||||||
|
WAIT = 3.5 # seconds after regen before reset
|
||||||
|
|
||||||
|
|
||||||
|
def get_handler(env):
|
||||||
|
return env.unwrapped.viewer.handler
|
||||||
|
|
||||||
|
|
||||||
|
def regen_road(env, seed):
|
||||||
|
get_handler(env).queue_message({
|
||||||
|
'msg_type': 'regen_road',
|
||||||
|
'road_style': '0',
|
||||||
|
'rand_seed': str(seed),
|
||||||
|
'turn_increment': '0.0',
|
||||||
|
})
|
||||||
|
time.sleep(WAIT)
|
||||||
|
|
||||||
|
|
||||||
|
print('Connecting to sim...')
|
||||||
|
env = gym.make(TRACK_ID, conf={'host': HOST, 'port': PORT})
|
||||||
|
print(f' Connected. obs={env.observation_space.shape}')
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
for seed in SEEDS:
|
||||||
|
print(f'\n── Seed {seed} ──────────────────────')
|
||||||
|
print(f' Regenerating road...')
|
||||||
|
regen_road(env, seed)
|
||||||
|
|
||||||
|
obs, info = env.reset()
|
||||||
|
cte_values = []
|
||||||
|
pos_values = []
|
||||||
|
|
||||||
|
for step in range(STEPS):
|
||||||
|
# Lane-following: steer proportional to CTE so the car stays on road.
|
||||||
|
# Different road geometries will produce different CTE histories.
|
||||||
|
last_cte = cte_values[-1] if cte_values else 0.0
|
||||||
|
steer = float(np.clip(-last_cte * STEER_GAIN, -1.0, 1.0))
|
||||||
|
action = np.array([steer, THROTTLE], dtype=np.float32)
|
||||||
|
|
||||||
|
obs, reward, terminated, truncated, info = env.step(action)
|
||||||
|
cte = info.get('cte', 0.0)
|
||||||
|
pos = info.get('pos', None)
|
||||||
|
cte_values.append(float(cte))
|
||||||
|
if pos is not None:
|
||||||
|
pos_values.append(list(pos)[:3])
|
||||||
|
if terminated or truncated:
|
||||||
|
print(f' Episode ended at step {step+1}')
|
||||||
|
break
|
||||||
|
|
||||||
|
final_cte = cte_values[-1] if cte_values else 0.0
|
||||||
|
mean_cte = float(np.mean(cte_values)) if cte_values else 0.0
|
||||||
|
max_abs_cte = float(np.max(np.abs(cte_values))) if cte_values else 0.0
|
||||||
|
final_pos = pos_values[-1] if pos_values else None
|
||||||
|
|
||||||
|
results[seed] = {
|
||||||
|
'final_cte': final_cte,
|
||||||
|
'mean_cte': mean_cte,
|
||||||
|
'max_abs_cte': max_abs_cte,
|
||||||
|
'steps': len(cte_values),
|
||||||
|
'final_pos': final_pos,
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f' Steps driven : {len(cte_values)}')
|
||||||
|
print(f' Final CTE : {final_cte:+.3f}m (+ = right of centre, - = left)')
|
||||||
|
print(f' Mean CTE : {mean_cte:+.3f}m')
|
||||||
|
print(f' Max |CTE| : {max_abs_cte:.3f}m')
|
||||||
|
if final_pos:
|
||||||
|
print(f' Final pos : x={final_pos[0]:.2f} y={final_pos[1]:.2f} z={final_pos[2]:.2f}')
|
||||||
|
|
||||||
|
env.close()
|
||||||
|
|
||||||
|
print('\n' + '='*50)
|
||||||
|
print('SUMMARY — same straight-ahead policy, different seeds:')
|
||||||
|
print('='*50)
|
||||||
|
for seed, r in results.items():
|
||||||
|
p = r['final_pos']
|
||||||
|
pos_str = f'x={p[0]:.1f} z={p[2]:.1f}' if p else 'N/A'
|
||||||
|
print(f' Seed {seed:6d}: CTE={r["final_cte"]:+.3f}m steps={r["steps"]} pos={pos_str}')
|
||||||
|
|
||||||
|
ctes = [r['final_cte'] for r in results.values()]
|
||||||
|
spread = max(ctes) - min(ctes)
|
||||||
|
print(f'\nCTE spread across seeds: {spread:.3f}m')
|
||||||
|
if spread > 0.3:
|
||||||
|
print('✅ ROADS ARE DIFFERENT — CTE spread > 0.3m confirms different road geometries')
|
||||||
|
else:
|
||||||
|
print('❌ ROADS MAY BE THE SAME — CTE spread is small, road gen may not be working')
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
1094759
|
||||||
|
|
@ -0,0 +1,73 @@
|
||||||
|
[11:31:18] ============================================================
|
||||||
|
[11:31:18] Exp 27: fresh weights | truly random roads | variable throttle
|
||||||
|
[11:31:18] Sim: localhost:9091 → donkey-generated-roads-v0
|
||||||
|
[11:31:18] Steering: 7 bins | Throttle: 3 bins → [0.2, 0.5, 1.0]
|
||||||
|
[11:31:18] LR=0.0003, ent_coef=0.05, n_steps=1024
|
||||||
|
[11:31:18] Total=500,000 steps, checkpoint every 10,000
|
||||||
|
[11:31:18] CTE term: >2.0m for >0.5s
|
||||||
|
[11:31:18] Speed term: <1.0 for >1.5s
|
||||||
|
[11:31:18] Episode cap: 30.0s | Road regen: random seed each checkpoint
|
||||||
|
[11:31:18] BrakeOnUpdateCallback: enabled
|
||||||
|
[11:31:18] ============================================================
|
||||||
|
[11:31:18] Connecting to sim...
|
||||||
|
[11:31:18] Connected. obs=(3, 120, 160), action=Discrete(21)
|
||||||
|
[11:31:18] Initial road regen (seed=93502)...
|
||||||
|
[11:31:21] Road ready.
|
||||||
|
[11:31:21] Creating fresh PPO model (no warm start)...
|
||||||
|
[11:31:24] Model created. Action space: 21 discrete actions
|
||||||
|
[11:31:24] Exp 27 started — PID 1038769
|
||||||
|
[11:31:24] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp27-random-roads/run_2026-05-06_113118_random_roads.log
|
||||||
|
[11:46:09] [10,000/500,000] Checkpoint saved
|
||||||
|
[11:46:09] Regenerating road (seed=42649)...
|
||||||
|
[11:46:12] Road ready.
|
||||||
|
[11:46:15] Eval (seed=42649): -0.3r/19s ❌@19
|
||||||
|
[11:46:15] NEW BEST: steps=19 reward=-0.3
|
||||||
|
[11:54:27] [20,000/500,000] Checkpoint saved
|
||||||
|
[11:54:27] Regenerating road (seed=77345)...
|
||||||
|
[11:54:30] Road ready.
|
||||||
|
[11:54:56] Eval (seed=77345): 376.1r/2000s ✅
|
||||||
|
[11:54:57] NEW BEST: steps=2000 reward=376.1
|
||||||
|
[12:00:59] [30,000/500,000] Checkpoint saved
|
||||||
|
[12:00:59] Regenerating road (seed=78734)...
|
||||||
|
[12:01:02] Road ready.
|
||||||
|
[12:01:21] Eval (seed=78734): 373.2r/1343s ❌@1343
|
||||||
|
[12:07:22] [40,000/500,000] Checkpoint saved
|
||||||
|
[12:07:22] Regenerating road (seed=11684)...
|
||||||
|
[12:07:25] Road ready.
|
||||||
|
[12:07:45] Eval (seed=11684): 426.2r/1439s ❌@1439
|
||||||
|
[12:13:34] [50,000/500,000] Checkpoint saved
|
||||||
|
[12:13:34] Regenerating road (seed=10443)...
|
||||||
|
[12:13:37] Road ready.
|
||||||
|
[12:13:41] Eval (seed=10443): 40.2r/148s ❌@148
|
||||||
|
[12:20:30] [60,000/500,000] Checkpoint saved
|
||||||
|
[12:20:30] Regenerating road (seed=80972)...
|
||||||
|
[12:20:33] Road ready.
|
||||||
|
[12:20:44] Eval (seed=80972): 191.4r/692s ❌@692
|
||||||
|
[12:28:53] [70,000/500,000] Checkpoint saved
|
||||||
|
[12:28:53] Regenerating road (seed=94871)...
|
||||||
|
[12:28:56] Road ready.
|
||||||
|
[12:29:02] Eval (seed=94871): 77.6r/307s ❌@307
|
||||||
|
[12:36:13] [80,000/500,000] Checkpoint saved
|
||||||
|
[12:36:13] Regenerating road (seed=95951)...
|
||||||
|
[12:36:16] Road ready.
|
||||||
|
[12:36:31] Eval (seed=95951): 287.5r/1028s ❌@1028
|
||||||
|
[12:44:31] [90,000/500,000] Checkpoint saved
|
||||||
|
[12:44:31] Regenerating road (seed=11746)...
|
||||||
|
[12:44:34] Road ready.
|
||||||
|
[12:44:41] Eval (seed=11746): 74.4r/326s ❌@326
|
||||||
|
[12:53:25] [100,000/500,000] Checkpoint saved
|
||||||
|
[12:53:25] Regenerating road (seed=91868)...
|
||||||
|
[12:53:28] Road ready.
|
||||||
|
[12:53:34] Eval (seed=91868): 81.3r/309s ❌@309
|
||||||
|
[13:01:22] [110,000/500,000] Checkpoint saved
|
||||||
|
[13:01:22] Regenerating road (seed=29423)...
|
||||||
|
[13:01:25] Road ready.
|
||||||
|
[13:01:31] Eval (seed=29423): 71.7r/290s ❌@290
|
||||||
|
[13:08:34] [120,000/500,000] Checkpoint saved
|
||||||
|
[13:08:34] Regenerating road (seed=36897)...
|
||||||
|
[13:08:37] Road ready.
|
||||||
|
[13:08:43] Eval (seed=36897): 60.6r/257s ❌@257
|
||||||
|
[13:15:45] [130,000/500,000] Checkpoint saved
|
||||||
|
[13:15:45] Regenerating road (seed=73069)...
|
||||||
|
[13:15:48] Road ready.
|
||||||
|
[13:15:54] Eval (seed=73069): 67.5r/262s ❌@262
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
[13:37:03] ============================================================
|
||||||
|
[13:37:03] Exp 27: fresh weights | truly random roads | variable throttle
|
||||||
|
[13:37:03] Sim: localhost:9091 → donkey-generated-roads-v0
|
||||||
|
[13:37:03] Steering: 7 bins | Throttle: 3 bins → [0.2, 0.5, 1.0]
|
||||||
|
[13:37:03] LR=0.0003, ent_coef=0.05, n_steps=1024
|
||||||
|
[13:37:03] Total=500,000 steps, checkpoint every 10,000
|
||||||
|
[13:37:03] CTE term: >2.0m for >0.5s
|
||||||
|
[13:37:03] Speed term: <1.0 for >1.5s
|
||||||
|
[13:37:03] Episode cap: 30.0s | Road regen: random seed each checkpoint
|
||||||
|
[13:37:03] BrakeOnUpdateCallback: enabled
|
||||||
|
[13:37:03] ============================================================
|
||||||
|
[13:37:03] Connecting to sim...
|
||||||
|
[13:37:04] Connected. obs=(3, 120, 160), action=Discrete(21)
|
||||||
|
[13:37:04] Initial road regen (seed=81035)...
|
||||||
|
[13:37:07] Road ready.
|
||||||
|
[13:37:07] Creating fresh PPO model (no warm start)...
|
||||||
|
[13:37:08] Model created. Action space: 21 discrete actions
|
||||||
|
[13:37:08] Exp 27 started — PID 1082126
|
||||||
|
[13:37:08] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp27-random-roads/run_2026-05-06_133703_random_roads.log
|
||||||
|
[13:45:35] [10,000/500,000] Checkpoint saved
|
||||||
|
[13:45:35] Regenerating road (seed=68546)...
|
||||||
|
[13:45:38] Road ready.
|
||||||
|
[13:45:42] Eval (seed=68546): 39.0r/145s ❌@145
|
||||||
|
[13:45:43] NEW BEST: steps=145 reward=39.0
|
||||||
|
[13:52:28] [20,000/500,000] Checkpoint saved
|
||||||
|
[13:52:28] Regenerating road (seed=35735)...
|
||||||
|
[13:52:31] Road ready.
|
||||||
|
[13:52:36] Eval (seed=35735): 71.6r/230s ❌@230
|
||||||
|
[13:52:37] NEW BEST: steps=230 reward=71.6
|
||||||
|
[13:58:59] [30,000/500,000] Checkpoint saved
|
||||||
|
[13:58:59] Regenerating road (seed=98061)...
|
||||||
|
[13:59:02] Road ready.
|
||||||
|
[13:59:06] Eval (seed=98061): 39.2r/139s ❌@139
|
||||||
|
[14:07:08] [40,000/500,000] Checkpoint saved
|
||||||
|
[14:07:08] Regenerating road (seed=2167)...
|
||||||
|
[14:07:11] Road ready.
|
||||||
|
[14:07:16] Eval (seed=2167): 33.9r/148s ❌@148
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,12 @@
|
||||||
|
[14:09:34] ============================================================
|
||||||
|
[14:09:34] Exp 27: fresh weights | truly random roads | variable throttle
|
||||||
|
[14:09:34] Sim: localhost:9091 → donkey-generated-roads-v0
|
||||||
|
[14:09:34] Steering: 7 bins | Throttle: 3 bins → [0.2, 0.5, 1.0]
|
||||||
|
[14:09:34] LR=0.0003, ent_coef=0.05, n_steps=1024
|
||||||
|
[14:09:34] Total=500,000 steps, checkpoint every 10,000
|
||||||
|
[14:09:34] CTE term: >2.0m for >0.5s
|
||||||
|
[14:09:34] Speed term: <1.0 for >1.5s
|
||||||
|
[14:09:34] Episode cap: 30.0s | Road regen: random seed each checkpoint
|
||||||
|
[14:09:34] BrakeOnUpdateCallback: enabled
|
||||||
|
[14:09:34] ============================================================
|
||||||
|
[14:09:34] Connecting to sim...
|
||||||
|
|
@ -0,0 +1,145 @@
|
||||||
|
[14:13:28] ============================================================
|
||||||
|
[14:13:28] Exp 27: fresh weights | truly random roads | variable throttle
|
||||||
|
[14:13:28] Sim: localhost:9091 → donkey-generated-roads-v0
|
||||||
|
[14:13:28] Steering: 7 bins | Throttle: 3 bins → [0.2, 0.5, 1.0]
|
||||||
|
[14:13:28] LR=0.0003, ent_coef=0.05, n_steps=1024
|
||||||
|
[14:13:28] Total=500,000 steps, checkpoint every 10,000
|
||||||
|
[14:13:28] CTE term: >2.0m for >0.5s
|
||||||
|
[14:13:28] Speed term: <1.0 for >1.5s
|
||||||
|
[14:13:28] Episode cap: 30.0s | Road regen: random seed each checkpoint
|
||||||
|
[14:13:28] BrakeOnUpdateCallback: enabled
|
||||||
|
[14:13:28] ============================================================
|
||||||
|
[14:13:28] Connecting to sim...
|
||||||
|
[14:13:29] Connected. obs=(3, 120, 160), action=Discrete(21)
|
||||||
|
[14:13:29] Initial road regen (seed=89942)...
|
||||||
|
[14:13:32] Road ready.
|
||||||
|
[14:13:32] Creating fresh PPO model (no warm start)...
|
||||||
|
[14:13:34] Model created. Action space: 21 discrete actions
|
||||||
|
[14:13:34] Exp 27 started — PID 1094759
|
||||||
|
[14:13:34] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp27-random-roads/run_2026-05-06_141328_random_roads.log
|
||||||
|
[14:24:26] [10,000/500,000] Checkpoint saved
|
||||||
|
[14:24:26] Regenerating road (seed=63790)...
|
||||||
|
[14:24:29] Road ready.
|
||||||
|
[14:24:43] Eval (seed=63790): 250.5r/924s ❌@924
|
||||||
|
[14:24:43] NEW BEST: steps=924 reward=250.5
|
||||||
|
[14:33:21] [20,000/500,000] Checkpoint saved
|
||||||
|
[14:33:21] Regenerating road (seed=54863)...
|
||||||
|
[14:33:24] Road ready.
|
||||||
|
[14:33:38] Eval (seed=54863): 275.1r/925s ❌@925
|
||||||
|
[14:33:38] NEW BEST: steps=925 reward=275.1
|
||||||
|
[14:41:36] [30,000/500,000] Checkpoint saved
|
||||||
|
[14:41:36] Regenerating road (seed=84765)...
|
||||||
|
[14:41:39] Road ready.
|
||||||
|
[14:41:58] Eval (seed=84765): 377.3r/1325s ❌@1325
|
||||||
|
[14:41:58] NEW BEST: steps=1325 reward=377.3
|
||||||
|
[14:49:45] [40,000/500,000] Checkpoint saved
|
||||||
|
[14:49:45] Regenerating road (seed=62695)...
|
||||||
|
[14:49:48] Road ready.
|
||||||
|
[14:49:52] Eval (seed=62695): 33.8r/134s ❌@134
|
||||||
|
[14:57:31] [50,000/500,000] Checkpoint saved
|
||||||
|
[14:57:31] Regenerating road (seed=51171)...
|
||||||
|
[14:57:34] Road ready.
|
||||||
|
[14:57:56] Eval (seed=51171): 452.6r/1575s ❌@1575
|
||||||
|
[14:57:57] NEW BEST: steps=1575 reward=452.6
|
||||||
|
[15:05:11] [60,000/500,000] Checkpoint saved
|
||||||
|
[15:05:11] Regenerating road (seed=13427)...
|
||||||
|
[15:05:14] Road ready.
|
||||||
|
[15:05:29] Eval (seed=13427): 289.0r/1013s ❌@1013
|
||||||
|
[15:12:02] [70,000/500,000] Checkpoint saved
|
||||||
|
[15:12:02] Regenerating road (seed=99752)...
|
||||||
|
[15:12:05] Road ready.
|
||||||
|
[15:12:29] Eval (seed=99752): 432.3r/1648s ❌@1648
|
||||||
|
[15:12:29] NEW BEST: steps=1648 reward=432.3
|
||||||
|
[15:19:09] [80,000/500,000] Checkpoint saved
|
||||||
|
[15:19:09] Regenerating road (seed=40584)...
|
||||||
|
[15:19:12] Road ready.
|
||||||
|
[15:19:34] Eval (seed=40584): 449.9r/1567s ❌@1567
|
||||||
|
[15:25:49] [90,000/500,000] Checkpoint saved
|
||||||
|
[15:25:49] Regenerating road (seed=23677)...
|
||||||
|
[15:25:52] Road ready.
|
||||||
|
[15:26:14] Eval (seed=23677): 444.3r/1522s ❌@1522
|
||||||
|
[15:33:01] [100,000/500,000] Checkpoint saved
|
||||||
|
[15:33:01] Regenerating road (seed=11818)...
|
||||||
|
[15:33:04] Road ready.
|
||||||
|
[15:33:09] Eval (seed=11818): 30.4r/160s ❌@160
|
||||||
|
[15:39:06] [110,000/500,000] Checkpoint saved
|
||||||
|
[15:39:06] Regenerating road (seed=15439)...
|
||||||
|
[15:39:09] Road ready.
|
||||||
|
[15:39:32] Eval (seed=15439): 462.7r/1580s ❌@1580
|
||||||
|
[15:45:27] [120,000/500,000] Checkpoint saved
|
||||||
|
[15:45:27] Regenerating road (seed=79776)...
|
||||||
|
[15:45:30] Road ready.
|
||||||
|
[15:45:43] Eval (seed=79776): 251.7r/893s ❌@893
|
||||||
|
[15:51:51] [130,000/500,000] Checkpoint saved
|
||||||
|
[15:51:51] Regenerating road (seed=51)...
|
||||||
|
[15:51:54] Road ready.
|
||||||
|
[15:52:09] Eval (seed=51): 273.5r/1029s ❌@1029
|
||||||
|
[15:58:08] [140,000/500,000] Checkpoint saved
|
||||||
|
[15:58:08] Regenerating road (seed=15985)...
|
||||||
|
[15:58:11] Road ready.
|
||||||
|
[15:58:30] Eval (seed=15985): 386.8r/1260s ❌@1260
|
||||||
|
[16:04:29] [150,000/500,000] Checkpoint saved
|
||||||
|
[16:04:29] Regenerating road (seed=78623)...
|
||||||
|
[16:04:32] Road ready.
|
||||||
|
[16:04:37] Eval (seed=78623): 50.5r/193s ❌@193
|
||||||
|
[16:11:04] [160,000/500,000] Checkpoint saved
|
||||||
|
[16:11:04] Regenerating road (seed=68780)...
|
||||||
|
[16:11:07] Road ready.
|
||||||
|
[16:11:19] Eval (seed=68780): 194.3r/753s ❌@753
|
||||||
|
[16:17:48] [170,000/500,000] Checkpoint saved
|
||||||
|
[16:17:48] Regenerating road (seed=27669)...
|
||||||
|
[16:17:51] Road ready.
|
||||||
|
[16:18:11] Eval (seed=27669): 375.2r/1371s ❌@1371
|
||||||
|
[16:24:47] [180,000/500,000] Checkpoint saved
|
||||||
|
[16:24:47] Regenerating road (seed=32153)...
|
||||||
|
[16:24:50] Road ready.
|
||||||
|
[16:24:55] Eval (seed=32153): 45.6r/188s ❌@188
|
||||||
|
[16:31:56] [190,000/500,000] Checkpoint saved
|
||||||
|
[16:31:56] Regenerating road (seed=23522)...
|
||||||
|
[16:31:59] Road ready.
|
||||||
|
[16:32:22] Eval (seed=23522): 444.2r/1652s ❌@1652
|
||||||
|
[16:32:22] NEW BEST: steps=1652 reward=444.2
|
||||||
|
[16:38:46] [200,000/500,000] Checkpoint saved
|
||||||
|
[16:38:46] Regenerating road (seed=35712)...
|
||||||
|
[16:38:49] Road ready.
|
||||||
|
[16:38:59] Eval (seed=35712): 200.8r/657s ❌@657
|
||||||
|
[16:45:01] [210,000/500,000] Checkpoint saved
|
||||||
|
[16:45:01] Regenerating road (seed=84828)...
|
||||||
|
[16:45:04] Road ready.
|
||||||
|
[16:45:09] Eval (seed=84828): 53.5r/219s ❌@219
|
||||||
|
[16:51:16] [220,000/500,000] Checkpoint saved
|
||||||
|
[16:51:16] Regenerating road (seed=66225)...
|
||||||
|
[16:51:19] Road ready.
|
||||||
|
[16:51:42] Eval (seed=66225): 425.7r/1612s ❌@1612
|
||||||
|
[16:58:23] [230,000/500,000] Checkpoint saved
|
||||||
|
[16:58:23] Regenerating road (seed=41094)...
|
||||||
|
[16:58:26] Road ready.
|
||||||
|
[16:58:36] Eval (seed=41094): 162.1r/581s ❌@581
|
||||||
|
[17:05:03] [240,000/500,000] Checkpoint saved
|
||||||
|
[17:05:03] Regenerating road (seed=51566)...
|
||||||
|
[17:05:06] Road ready.
|
||||||
|
[17:05:29] Eval (seed=51566): 438.2r/1613s ❌@1613
|
||||||
|
[17:12:04] [250,000/500,000] Checkpoint saved
|
||||||
|
[17:12:04] Regenerating road (seed=18319)...
|
||||||
|
[17:12:07] Road ready.
|
||||||
|
[17:12:10] Eval (seed=18319): 19.8r/116s ❌@116
|
||||||
|
[17:18:52] [260,000/500,000] Checkpoint saved
|
||||||
|
[17:18:52] Regenerating road (seed=99555)...
|
||||||
|
[17:18:55] Road ready.
|
||||||
|
[17:19:05] Eval (seed=99555): 182.6r/603s ❌@603
|
||||||
|
[17:25:37] [270,000/500,000] Checkpoint saved
|
||||||
|
[17:25:37] Regenerating road (seed=59896)...
|
||||||
|
[17:25:40] Road ready.
|
||||||
|
[17:25:45] Eval (seed=59896): 59.4r/228s ❌@228
|
||||||
|
[17:32:04] [280,000/500,000] Checkpoint saved
|
||||||
|
[17:32:04] Regenerating road (seed=50173)...
|
||||||
|
[17:32:07] Road ready.
|
||||||
|
[17:32:28] Eval (seed=50173): 409.5r/1498s ❌@1498
|
||||||
|
[17:39:17] [290,000/500,000] Checkpoint saved
|
||||||
|
[17:39:17] Regenerating road (seed=2910)...
|
||||||
|
[17:39:20] Road ready.
|
||||||
|
[17:39:30] Eval (seed=2910): 132.7r/558s ❌@558
|
||||||
|
[17:47:37] [300,000/500,000] Checkpoint saved
|
||||||
|
[17:47:37] Regenerating road (seed=46529)...
|
||||||
|
[17:47:40] Road ready.
|
||||||
|
[17:47:48] Eval (seed=46529): 120.3r/465s ❌@465
|
||||||
Loading…
Reference in New Issue