feat(exp27): random roads with variable throttle + road regen + self-intersection fix

Fixes three root-cause bugs discovered before/during this experiment:
1. regen_road was silently doing nothing — TcpCarHandler.RegenRoad() bailed on
   null TrainingManager; added direct RoadBuilder+PathManager fallback.
2. MapOverlay minimap not refreshing — fixed to check node[10] position change.
3. BrakeOnUpdateCallback: sends zero control before PPO gradient updates to
   prevent car drifting during 3-8s CPU pause.
4. PathManager self-intersection fix: retry loop with XZ segment-segment math
   (up to 20 retries) — verifiably different roads per seed.

Exp27 trains fresh weights with N_THROTTLE=3 (bins 0.2/0.5/1.0), ent_coef=0.05,
500k steps, regen_road TCP message per checkpoint. Peak: 462.7r/1580 steps @110k.

Also adds verify_minimap_refresh.py and verify_road_regen.py diagnostic scripts.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Paul Huliganga 2026-05-14 15:32:32 -04:00
parent 0615b22cb9
commit 36be93e357
9 changed files with 14077 additions and 0 deletions

View File

@ -0,0 +1,282 @@
"""
Exp 27: Fresh weights, truly random roads, variable throttle.
Changes from exp26:
1. Fresh weights (no warm start) exp26 peaked at 20k/300k then regressed.
2. Random roads: regen_road TCP message with random seed each checkpoint.
No close+reconnect (which was silently generating seed=2 road every time).
3. Variable throttle: N_THROTTLE=3 bins [0.2, 0.5, 1.0] after ThrottleClampWrapper.
4. BrakeOnUpdateCallback: sends zero control before PPO gradient updates,
preventing car from drifting into barriers during the ~5-15s CPU update pause.
5. Tighter CTE termination: 2.0m / 0.5s (was 3.0m / 1.0s).
6. Higher entropy: ent_coef=0.05 to prevent premature policy collapse.
7. Smaller n_steps=1024: shorter rollout shorter gradient update pause.
8. set_ai_text: pushes training stats to sim overlay each checkpoint.
9. 500k total steps more budget for fresh weights to learn variable throttle.
"""
import os
import sys
import time
import random
from datetime import datetime
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
_SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp27-random-roads'
_PIDFILE = os.path.join(_SAVE_DIR, 'current.pid')
os.makedirs(_SAVE_DIR, exist_ok=True)
if os.path.exists(_PIDFILE):
try:
_old = int(open(_PIDFILE).read().strip())
if _old != os.getpid():
import signal
os.kill(_old, 0)
print(f'[exp27] Another instance already running (PID {_old}). Exiting.', flush=True)
sys.exit(1)
except (OSError, ValueError):
pass
import gymnasium as gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from stable_baselines3.common.callbacks import BaseCallback
from discretize_action import DiscretizedActionWrapper
from donkeycar_sb3_runner import ThrottleClampWrapper
from multitrack_runner import StuckTerminationWrapper
from reward_wrapper import SpeedRewardWrapper
HOST = 'localhost'
PORT = 9091
TRACK_ID = 'donkey-generated-roads-v0'
THROTTLE_MIN = 0.2
LR = 0.0003
ENT_COEF = 0.05
N_STEPS = 1024 # smaller rollout → shorter gradient-update pause
TOTAL_STEPS = 500_000
CHECKPOINT_EVERY = 10_000
REGEN_WAIT = 3.0 # seconds after regen_road before reset
N_STEER = 7
N_THROTTLE = 3 # throttle bins [0.0,0.5,1.0] → after ThrottleClampWrapper: [0.2,0.5,1.0]
MAX_STUCK_SECONDS = 5.0
MAX_EPISODE_SECONDS = 30.0
LOW_SPEED_THRESHOLD = 1.0
MAX_LOW_SPEED_SECONDS = 1.5
MAX_CTE_TERMINATION = 2.0 # tighter than exp26 (3.0m)
MAX_HIGH_CTE_SECONDS = 0.5 # tighter than exp26 (1.0s)
EFFICIENCY_WINDOW = 30
MIN_EFFICIENCY = 0.15
MAX_CTE = 8.0
MIN_LAP_TIME = 12.0
PROGRESS_PATIENCE = 100
import logging
_log_ts = datetime.now().strftime('%Y-%m-%d_%H%M%S')
_log_path = os.path.join(_SAVE_DIR, f'run_{_log_ts}_random_roads.log')
_fh = logging.FileHandler(_log_path)
_fh.setFormatter(logging.Formatter('%(message)s'))
_sh = logging.StreamHandler(sys.stdout)
_sh.setFormatter(logging.Formatter('%(message)s'))
file_log = logging.getLogger('exp27')
file_log.setLevel(logging.INFO)
file_log.propagate = False
file_log.addHandler(_fh)
file_log.addHandler(_sh)
def flog(msg):
ts = datetime.now().strftime('%H:%M:%S')
file_log.info(f'[{ts}] {msg}')
def make_env():
def _init():
raw = gym.make(TRACK_ID, conf={'host': HOST, 'port': PORT})
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
env = DiscretizedActionWrapper(env, n_steer=N_STEER, n_throttle=N_THROTTLE)
env = StuckTerminationWrapper(
env,
stuck_steps=40,
min_displacement=0.5,
max_stuck_seconds=MAX_STUCK_SECONDS,
max_episode_seconds=MAX_EPISODE_SECONDS,
low_speed_threshold=LOW_SPEED_THRESHOLD,
max_low_speed_seconds=MAX_LOW_SPEED_SECONDS,
max_cte=MAX_CTE_TERMINATION,
max_high_cte_seconds=MAX_HIGH_CTE_SECONDS,
)
env = SpeedRewardWrapper(
env,
window_size=EFFICIENCY_WINDOW,
min_efficiency=MIN_EFFICIENCY,
max_cte=MAX_CTE,
min_lap_time=MIN_LAP_TIME,
progress_patience=PROGRESS_PATIENCE,
)
return env
return _init
def get_handler(vec_env):
return vec_env.venv.envs[0].unwrapped.viewer.handler
def regen_road(vec_env, seed):
msg = {
'msg_type': 'regen_road',
'road_style': '0',
'rand_seed': str(seed),
'turn_increment': '0.0',
}
get_handler(vec_env).queue_message(msg)
time.sleep(REGEN_WAIT)
def set_ai_text(vec_env, text):
try:
get_handler(vec_env).queue_message({'msg_type': 'set_ai_text', 'text': text})
except Exception:
pass
class BrakeOnUpdateCallback(BaseCallback):
"""
Sends zero-throttle control to sim before PPO gradient updates begin.
on_rollout_end() fires after n_steps rollouts are collected, right before
PPO starts gradient updates (which can take 5-15s on CPU). Without this,
the sim holds the last action car drifts into barriers during the pause.
"""
def __init__(self, vec_env):
super().__init__(verbose=0)
self._vec_env = vec_env
def _on_rollout_end(self):
try:
get_handler(self._vec_env).queue_message({
'msg_type': 'control',
'steering': '0.0',
'throttle': '0.0',
'brake': '0.0',
})
except Exception:
pass
def _on_step(self):
return True
flog('=' * 60)
flog('Exp 27: fresh weights | truly random roads | variable throttle')
flog(f' Sim: {HOST}:{PORT}{TRACK_ID}')
flog(f' Steering: {N_STEER} bins | Throttle: {N_THROTTLE} bins → [0.2, 0.5, 1.0]')
flog(f' LR={LR}, ent_coef={ENT_COEF}, n_steps={N_STEPS}')
flog(f' Total={TOTAL_STEPS:,} steps, checkpoint every {CHECKPOINT_EVERY:,}')
flog(f' CTE term: >{MAX_CTE_TERMINATION}m for >{MAX_HIGH_CTE_SECONDS}s')
flog(f' Speed term: <{LOW_SPEED_THRESHOLD} for >{MAX_LOW_SPEED_SECONDS}s')
flog(f' Episode cap: {MAX_EPISODE_SECONDS}s | Road regen: random seed each checkpoint')
flog(f' BrakeOnUpdateCallback: enabled')
flog('=' * 60)
flog('Connecting to sim...')
env = DummyVecEnv([make_env()])
env = VecTransposeImage(env)
flog(f' Connected. obs={env.observation_space.shape}, action={env.action_space}')
first_seed = random.randint(0, 100000)
flog(f' Initial road regen (seed={first_seed})...')
regen_road(env, first_seed)
flog(' Road ready.')
flog('Creating fresh PPO model (no warm start)...')
model = PPO(
'CnnPolicy',
env,
learning_rate=LR,
n_steps=N_STEPS,
ent_coef=ENT_COEF,
device='cpu',
verbose=1,
)
flog(f' Model created. Action space: {env.action_space.n} discrete actions')
with open(_PIDFILE, 'w') as f:
f.write(str(os.getpid()))
flog(f'Exp 27 started — PID {os.getpid()}')
flog(f'Log: {_log_path}')
best_total_steps = float('-inf')
best_total_reward = float('-inf')
steps_done = 0
best_model_path = os.path.join(_SAVE_DIR, 'best_model.zip')
brake_cb = BrakeOnUpdateCallback(env)
current_seed = first_seed
while steps_done < TOTAL_STEPS:
seg_steps = min(CHECKPOINT_EVERY, TOTAL_STEPS - steps_done)
model.learn(total_timesteps=seg_steps, reset_num_timesteps=False, callback=brake_cb)
steps_done += seg_steps
ckpt = os.path.join(_SAVE_DIR, f'checkpoint_{steps_done:07d}')
model.save(ckpt)
model.save(os.path.join(_SAVE_DIR, 'model'))
flog(f'[{steps_done:,}/{TOTAL_STEPS:,}] Checkpoint saved')
current_seed = random.randint(0, 100000)
flog(f' Regenerating road (seed={current_seed})...')
regen_road(env, current_seed)
flog(' Road ready.')
try:
obs = env.reset()
ep_rewards = np.zeros(env.num_envs)
ep_steps = np.zeros(env.num_envs)
done_mask = np.zeros(env.num_envs, dtype=bool)
for _ in range(2000):
action, _ = model.predict(obs, deterministic=True)
obs, rewards, dones, infos = env.step(action)
for i in range(env.num_envs):
if not done_mask[i]:
ep_rewards[i] += rewards[i]
ep_steps[i] += 1
if dones[i]:
done_mask[i] = True
if done_mask.all():
break
total_steps_eval = int(ep_steps[0])
total_reward_eval = float(ep_rewards[0])
status = '' if total_steps_eval >= 2000 else f'❌@{total_steps_eval}'
flog(f' Eval (seed={current_seed}): {total_reward_eval:.1f}r/{total_steps_eval}s {status}')
overlay = (f'Exp27 {steps_done//1000:3d}k/{TOTAL_STEPS//1000}k\n'
f'R:{total_reward_eval:6.1f} Seed:{current_seed} {status}')
set_ai_text(env, overlay)
if (total_steps_eval > best_total_steps
or (total_steps_eval == best_total_steps
and total_reward_eval > best_total_reward)):
best_total_steps = total_steps_eval
best_total_reward = total_reward_eval
model.save(best_model_path)
flog(f' NEW BEST: steps={best_total_steps} reward={best_total_reward:.1f}')
except Exception as e:
flog(f' Eval error: {e}')
env.close()
flog('=' * 60)
flog('Exp 27 complete.')
flog(f'Best model: {best_model_path}')
flog(f'Best eval: steps={best_total_steps} reward={best_total_reward:.1f}')
flog('=' * 60)

View File

@ -0,0 +1,46 @@
"""
Quick test: regen road 3 times, print node[10] position after each.
If the position changes, the minimap fix will detect it and refresh.
"""
import sys, time
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
import gymnasium as gym
import gym_donkeycar # noqa
HOST, PORT = 'localhost', 9091
SEEDS = [1111, 55555, 99999, 12345, 77777]
env = gym.make('donkey-generated-roads-v0', conf={'host': HOST, 'port': PORT})
handler = env.unwrapped.viewer.handler
def regen(seed):
handler.queue_message({
'msg_type': 'regen_road', 'road_style': '0',
'rand_seed': str(seed), 'turn_increment': '0.0',
})
time.sleep(3.5)
obs, info = env.reset()
# Take one step to get fresh telemetry
obs, _, _, _, info = env.step([0.0, 0.1])
return info.get('pos', None), info.get('cte', None)
print('Testing road regen — recording car position after reset+1step:')
print()
positions = []
for seed in SEEDS:
pos, cte = regen(seed)
p = list(pos)[:3] if pos else [0,0,0]
positions.append(p)
print(f' Seed {seed:6d}: pos x={p[0]:.3f} y={p[1]:.3f} z={p[2]:.3f} cte={cte:.3f}')
env.close()
print()
# All positions should be the same (car always spawns at startPos).
# But road node[10] position will differ — proven by the fact that CTE diverges after a few steps.
print('Car spawn position is always the same (startPos) — comparison via driving test is needed.')
print('Run verify_road_regen.py for that proof.')
print()
print('If the MINIMAP in the sim window changed shape between each regen above, the fix works.')

View File

@ -0,0 +1,111 @@
"""
Verify that regen_road actually produces different tracks.
Method: connect to sim, regen road with 3 seeds, drive straight ahead for
100 steps on each, record final CTE. Different roads curve differently,
so a straight-ahead policy will accumulate CTE in different directions/amounts.
Also prints the first few node positions from the handler if accessible.
"""
import sys
import time
import numpy as np
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
import gymnasium as gym
import gym_donkeycar # noqa: F401 — registers donkey envs
HOST = 'localhost'
PORT = 9091
TRACK_ID = 'donkey-generated-roads-v0'
SEEDS = [1111, 55555, 99999]
STEPS = 500 # drive lane-following steps per seed
THROTTLE = 0.3
STEER_GAIN = 0.8 # proportional: steer = -cte * gain
WAIT = 3.5 # seconds after regen before reset
def get_handler(env):
return env.unwrapped.viewer.handler
def regen_road(env, seed):
get_handler(env).queue_message({
'msg_type': 'regen_road',
'road_style': '0',
'rand_seed': str(seed),
'turn_increment': '0.0',
})
time.sleep(WAIT)
print('Connecting to sim...')
env = gym.make(TRACK_ID, conf={'host': HOST, 'port': PORT})
print(f' Connected. obs={env.observation_space.shape}')
results = {}
for seed in SEEDS:
print(f'\n── Seed {seed} ──────────────────────')
print(f' Regenerating road...')
regen_road(env, seed)
obs, info = env.reset()
cte_values = []
pos_values = []
for step in range(STEPS):
# Lane-following: steer proportional to CTE so the car stays on road.
# Different road geometries will produce different CTE histories.
last_cte = cte_values[-1] if cte_values else 0.0
steer = float(np.clip(-last_cte * STEER_GAIN, -1.0, 1.0))
action = np.array([steer, THROTTLE], dtype=np.float32)
obs, reward, terminated, truncated, info = env.step(action)
cte = info.get('cte', 0.0)
pos = info.get('pos', None)
cte_values.append(float(cte))
if pos is not None:
pos_values.append(list(pos)[:3])
if terminated or truncated:
print(f' Episode ended at step {step+1}')
break
final_cte = cte_values[-1] if cte_values else 0.0
mean_cte = float(np.mean(cte_values)) if cte_values else 0.0
max_abs_cte = float(np.max(np.abs(cte_values))) if cte_values else 0.0
final_pos = pos_values[-1] if pos_values else None
results[seed] = {
'final_cte': final_cte,
'mean_cte': mean_cte,
'max_abs_cte': max_abs_cte,
'steps': len(cte_values),
'final_pos': final_pos,
}
print(f' Steps driven : {len(cte_values)}')
print(f' Final CTE : {final_cte:+.3f}m (+ = right of centre, - = left)')
print(f' Mean CTE : {mean_cte:+.3f}m')
print(f' Max |CTE| : {max_abs_cte:.3f}m')
if final_pos:
print(f' Final pos : x={final_pos[0]:.2f} y={final_pos[1]:.2f} z={final_pos[2]:.2f}')
env.close()
print('\n' + '='*50)
print('SUMMARY — same straight-ahead policy, different seeds:')
print('='*50)
for seed, r in results.items():
p = r['final_pos']
pos_str = f'x={p[0]:.1f} z={p[2]:.1f}' if p else 'N/A'
print(f' Seed {seed:6d}: CTE={r["final_cte"]:+.3f}m steps={r["steps"]} pos={pos_str}')
ctes = [r['final_cte'] for r in results.values()]
spread = max(ctes) - min(ctes)
print(f'\nCTE spread across seeds: {spread:.3f}m')
if spread > 0.3:
print('✅ ROADS ARE DIFFERENT — CTE spread > 0.3m confirms different road geometries')
else:
print('❌ ROADS MAY BE THE SAME — CTE spread is small, road gen may not be working')

View File

@ -0,0 +1 @@
1094759

View File

@ -0,0 +1,73 @@
[11:31:18] ============================================================
[11:31:18] Exp 27: fresh weights | truly random roads | variable throttle
[11:31:18] Sim: localhost:9091 → donkey-generated-roads-v0
[11:31:18] Steering: 7 bins | Throttle: 3 bins → [0.2, 0.5, 1.0]
[11:31:18] LR=0.0003, ent_coef=0.05, n_steps=1024
[11:31:18] Total=500,000 steps, checkpoint every 10,000
[11:31:18] CTE term: >2.0m for >0.5s
[11:31:18] Speed term: <1.0 for >1.5s
[11:31:18] Episode cap: 30.0s | Road regen: random seed each checkpoint
[11:31:18] BrakeOnUpdateCallback: enabled
[11:31:18] ============================================================
[11:31:18] Connecting to sim...
[11:31:18] Connected. obs=(3, 120, 160), action=Discrete(21)
[11:31:18] Initial road regen (seed=93502)...
[11:31:21] Road ready.
[11:31:21] Creating fresh PPO model (no warm start)...
[11:31:24] Model created. Action space: 21 discrete actions
[11:31:24] Exp 27 started — PID 1038769
[11:31:24] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp27-random-roads/run_2026-05-06_113118_random_roads.log
[11:46:09] [10,000/500,000] Checkpoint saved
[11:46:09] Regenerating road (seed=42649)...
[11:46:12] Road ready.
[11:46:15] Eval (seed=42649): -0.3r/19s ❌@19
[11:46:15] NEW BEST: steps=19 reward=-0.3
[11:54:27] [20,000/500,000] Checkpoint saved
[11:54:27] Regenerating road (seed=77345)...
[11:54:30] Road ready.
[11:54:56] Eval (seed=77345): 376.1r/2000s ✅
[11:54:57] NEW BEST: steps=2000 reward=376.1
[12:00:59] [30,000/500,000] Checkpoint saved
[12:00:59] Regenerating road (seed=78734)...
[12:01:02] Road ready.
[12:01:21] Eval (seed=78734): 373.2r/1343s ❌@1343
[12:07:22] [40,000/500,000] Checkpoint saved
[12:07:22] Regenerating road (seed=11684)...
[12:07:25] Road ready.
[12:07:45] Eval (seed=11684): 426.2r/1439s ❌@1439
[12:13:34] [50,000/500,000] Checkpoint saved
[12:13:34] Regenerating road (seed=10443)...
[12:13:37] Road ready.
[12:13:41] Eval (seed=10443): 40.2r/148s ❌@148
[12:20:30] [60,000/500,000] Checkpoint saved
[12:20:30] Regenerating road (seed=80972)...
[12:20:33] Road ready.
[12:20:44] Eval (seed=80972): 191.4r/692s ❌@692
[12:28:53] [70,000/500,000] Checkpoint saved
[12:28:53] Regenerating road (seed=94871)...
[12:28:56] Road ready.
[12:29:02] Eval (seed=94871): 77.6r/307s ❌@307
[12:36:13] [80,000/500,000] Checkpoint saved
[12:36:13] Regenerating road (seed=95951)...
[12:36:16] Road ready.
[12:36:31] Eval (seed=95951): 287.5r/1028s ❌@1028
[12:44:31] [90,000/500,000] Checkpoint saved
[12:44:31] Regenerating road (seed=11746)...
[12:44:34] Road ready.
[12:44:41] Eval (seed=11746): 74.4r/326s ❌@326
[12:53:25] [100,000/500,000] Checkpoint saved
[12:53:25] Regenerating road (seed=91868)...
[12:53:28] Road ready.
[12:53:34] Eval (seed=91868): 81.3r/309s ❌@309
[13:01:22] [110,000/500,000] Checkpoint saved
[13:01:22] Regenerating road (seed=29423)...
[13:01:25] Road ready.
[13:01:31] Eval (seed=29423): 71.7r/290s ❌@290
[13:08:34] [120,000/500,000] Checkpoint saved
[13:08:34] Regenerating road (seed=36897)...
[13:08:37] Road ready.
[13:08:43] Eval (seed=36897): 60.6r/257s ❌@257
[13:15:45] [130,000/500,000] Checkpoint saved
[13:15:45] Regenerating road (seed=73069)...
[13:15:48] Road ready.
[13:15:54] Eval (seed=73069): 67.5r/262s ❌@262

View File

@ -0,0 +1,37 @@
[13:37:03] ============================================================
[13:37:03] Exp 27: fresh weights | truly random roads | variable throttle
[13:37:03] Sim: localhost:9091 → donkey-generated-roads-v0
[13:37:03] Steering: 7 bins | Throttle: 3 bins → [0.2, 0.5, 1.0]
[13:37:03] LR=0.0003, ent_coef=0.05, n_steps=1024
[13:37:03] Total=500,000 steps, checkpoint every 10,000
[13:37:03] CTE term: >2.0m for >0.5s
[13:37:03] Speed term: <1.0 for >1.5s
[13:37:03] Episode cap: 30.0s | Road regen: random seed each checkpoint
[13:37:03] BrakeOnUpdateCallback: enabled
[13:37:03] ============================================================
[13:37:03] Connecting to sim...
[13:37:04] Connected. obs=(3, 120, 160), action=Discrete(21)
[13:37:04] Initial road regen (seed=81035)...
[13:37:07] Road ready.
[13:37:07] Creating fresh PPO model (no warm start)...
[13:37:08] Model created. Action space: 21 discrete actions
[13:37:08] Exp 27 started — PID 1082126
[13:37:08] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp27-random-roads/run_2026-05-06_133703_random_roads.log
[13:45:35] [10,000/500,000] Checkpoint saved
[13:45:35] Regenerating road (seed=68546)...
[13:45:38] Road ready.
[13:45:42] Eval (seed=68546): 39.0r/145s ❌@145
[13:45:43] NEW BEST: steps=145 reward=39.0
[13:52:28] [20,000/500,000] Checkpoint saved
[13:52:28] Regenerating road (seed=35735)...
[13:52:31] Road ready.
[13:52:36] Eval (seed=35735): 71.6r/230s ❌@230
[13:52:37] NEW BEST: steps=230 reward=71.6
[13:58:59] [30,000/500,000] Checkpoint saved
[13:58:59] Regenerating road (seed=98061)...
[13:59:02] Road ready.
[13:59:06] Eval (seed=98061): 39.2r/139s ❌@139
[14:07:08] [40,000/500,000] Checkpoint saved
[14:07:08] Regenerating road (seed=2167)...
[14:07:11] Road ready.
[14:07:16] Eval (seed=2167): 33.9r/148s ❌@148

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,12 @@
[14:09:34] ============================================================
[14:09:34] Exp 27: fresh weights | truly random roads | variable throttle
[14:09:34] Sim: localhost:9091 → donkey-generated-roads-v0
[14:09:34] Steering: 7 bins | Throttle: 3 bins → [0.2, 0.5, 1.0]
[14:09:34] LR=0.0003, ent_coef=0.05, n_steps=1024
[14:09:34] Total=500,000 steps, checkpoint every 10,000
[14:09:34] CTE term: >2.0m for >0.5s
[14:09:34] Speed term: <1.0 for >1.5s
[14:09:34] Episode cap: 30.0s | Road regen: random seed each checkpoint
[14:09:34] BrakeOnUpdateCallback: enabled
[14:09:34] ============================================================
[14:09:34] Connecting to sim...

View File

@ -0,0 +1,145 @@
[14:13:28] ============================================================
[14:13:28] Exp 27: fresh weights | truly random roads | variable throttle
[14:13:28] Sim: localhost:9091 → donkey-generated-roads-v0
[14:13:28] Steering: 7 bins | Throttle: 3 bins → [0.2, 0.5, 1.0]
[14:13:28] LR=0.0003, ent_coef=0.05, n_steps=1024
[14:13:28] Total=500,000 steps, checkpoint every 10,000
[14:13:28] CTE term: >2.0m for >0.5s
[14:13:28] Speed term: <1.0 for >1.5s
[14:13:28] Episode cap: 30.0s | Road regen: random seed each checkpoint
[14:13:28] BrakeOnUpdateCallback: enabled
[14:13:28] ============================================================
[14:13:28] Connecting to sim...
[14:13:29] Connected. obs=(3, 120, 160), action=Discrete(21)
[14:13:29] Initial road regen (seed=89942)...
[14:13:32] Road ready.
[14:13:32] Creating fresh PPO model (no warm start)...
[14:13:34] Model created. Action space: 21 discrete actions
[14:13:34] Exp 27 started — PID 1094759
[14:13:34] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp27-random-roads/run_2026-05-06_141328_random_roads.log
[14:24:26] [10,000/500,000] Checkpoint saved
[14:24:26] Regenerating road (seed=63790)...
[14:24:29] Road ready.
[14:24:43] Eval (seed=63790): 250.5r/924s ❌@924
[14:24:43] NEW BEST: steps=924 reward=250.5
[14:33:21] [20,000/500,000] Checkpoint saved
[14:33:21] Regenerating road (seed=54863)...
[14:33:24] Road ready.
[14:33:38] Eval (seed=54863): 275.1r/925s ❌@925
[14:33:38] NEW BEST: steps=925 reward=275.1
[14:41:36] [30,000/500,000] Checkpoint saved
[14:41:36] Regenerating road (seed=84765)...
[14:41:39] Road ready.
[14:41:58] Eval (seed=84765): 377.3r/1325s ❌@1325
[14:41:58] NEW BEST: steps=1325 reward=377.3
[14:49:45] [40,000/500,000] Checkpoint saved
[14:49:45] Regenerating road (seed=62695)...
[14:49:48] Road ready.
[14:49:52] Eval (seed=62695): 33.8r/134s ❌@134
[14:57:31] [50,000/500,000] Checkpoint saved
[14:57:31] Regenerating road (seed=51171)...
[14:57:34] Road ready.
[14:57:56] Eval (seed=51171): 452.6r/1575s ❌@1575
[14:57:57] NEW BEST: steps=1575 reward=452.6
[15:05:11] [60,000/500,000] Checkpoint saved
[15:05:11] Regenerating road (seed=13427)...
[15:05:14] Road ready.
[15:05:29] Eval (seed=13427): 289.0r/1013s ❌@1013
[15:12:02] [70,000/500,000] Checkpoint saved
[15:12:02] Regenerating road (seed=99752)...
[15:12:05] Road ready.
[15:12:29] Eval (seed=99752): 432.3r/1648s ❌@1648
[15:12:29] NEW BEST: steps=1648 reward=432.3
[15:19:09] [80,000/500,000] Checkpoint saved
[15:19:09] Regenerating road (seed=40584)...
[15:19:12] Road ready.
[15:19:34] Eval (seed=40584): 449.9r/1567s ❌@1567
[15:25:49] [90,000/500,000] Checkpoint saved
[15:25:49] Regenerating road (seed=23677)...
[15:25:52] Road ready.
[15:26:14] Eval (seed=23677): 444.3r/1522s ❌@1522
[15:33:01] [100,000/500,000] Checkpoint saved
[15:33:01] Regenerating road (seed=11818)...
[15:33:04] Road ready.
[15:33:09] Eval (seed=11818): 30.4r/160s ❌@160
[15:39:06] [110,000/500,000] Checkpoint saved
[15:39:06] Regenerating road (seed=15439)...
[15:39:09] Road ready.
[15:39:32] Eval (seed=15439): 462.7r/1580s ❌@1580
[15:45:27] [120,000/500,000] Checkpoint saved
[15:45:27] Regenerating road (seed=79776)...
[15:45:30] Road ready.
[15:45:43] Eval (seed=79776): 251.7r/893s ❌@893
[15:51:51] [130,000/500,000] Checkpoint saved
[15:51:51] Regenerating road (seed=51)...
[15:51:54] Road ready.
[15:52:09] Eval (seed=51): 273.5r/1029s ❌@1029
[15:58:08] [140,000/500,000] Checkpoint saved
[15:58:08] Regenerating road (seed=15985)...
[15:58:11] Road ready.
[15:58:30] Eval (seed=15985): 386.8r/1260s ❌@1260
[16:04:29] [150,000/500,000] Checkpoint saved
[16:04:29] Regenerating road (seed=78623)...
[16:04:32] Road ready.
[16:04:37] Eval (seed=78623): 50.5r/193s ❌@193
[16:11:04] [160,000/500,000] Checkpoint saved
[16:11:04] Regenerating road (seed=68780)...
[16:11:07] Road ready.
[16:11:19] Eval (seed=68780): 194.3r/753s ❌@753
[16:17:48] [170,000/500,000] Checkpoint saved
[16:17:48] Regenerating road (seed=27669)...
[16:17:51] Road ready.
[16:18:11] Eval (seed=27669): 375.2r/1371s ❌@1371
[16:24:47] [180,000/500,000] Checkpoint saved
[16:24:47] Regenerating road (seed=32153)...
[16:24:50] Road ready.
[16:24:55] Eval (seed=32153): 45.6r/188s ❌@188
[16:31:56] [190,000/500,000] Checkpoint saved
[16:31:56] Regenerating road (seed=23522)...
[16:31:59] Road ready.
[16:32:22] Eval (seed=23522): 444.2r/1652s ❌@1652
[16:32:22] NEW BEST: steps=1652 reward=444.2
[16:38:46] [200,000/500,000] Checkpoint saved
[16:38:46] Regenerating road (seed=35712)...
[16:38:49] Road ready.
[16:38:59] Eval (seed=35712): 200.8r/657s ❌@657
[16:45:01] [210,000/500,000] Checkpoint saved
[16:45:01] Regenerating road (seed=84828)...
[16:45:04] Road ready.
[16:45:09] Eval (seed=84828): 53.5r/219s ❌@219
[16:51:16] [220,000/500,000] Checkpoint saved
[16:51:16] Regenerating road (seed=66225)...
[16:51:19] Road ready.
[16:51:42] Eval (seed=66225): 425.7r/1612s ❌@1612
[16:58:23] [230,000/500,000] Checkpoint saved
[16:58:23] Regenerating road (seed=41094)...
[16:58:26] Road ready.
[16:58:36] Eval (seed=41094): 162.1r/581s ❌@581
[17:05:03] [240,000/500,000] Checkpoint saved
[17:05:03] Regenerating road (seed=51566)...
[17:05:06] Road ready.
[17:05:29] Eval (seed=51566): 438.2r/1613s ❌@1613
[17:12:04] [250,000/500,000] Checkpoint saved
[17:12:04] Regenerating road (seed=18319)...
[17:12:07] Road ready.
[17:12:10] Eval (seed=18319): 19.8r/116s ❌@116
[17:18:52] [260,000/500,000] Checkpoint saved
[17:18:52] Regenerating road (seed=99555)...
[17:18:55] Road ready.
[17:19:05] Eval (seed=99555): 182.6r/603s ❌@603
[17:25:37] [270,000/500,000] Checkpoint saved
[17:25:37] Regenerating road (seed=59896)...
[17:25:40] Road ready.
[17:25:45] Eval (seed=59896): 59.4r/228s ❌@228
[17:32:04] [280,000/500,000] Checkpoint saved
[17:32:04] Regenerating road (seed=50173)...
[17:32:07] Road ready.
[17:32:28] Eval (seed=50173): 409.5r/1498s ❌@1498
[17:39:17] [290,000/500,000] Checkpoint saved
[17:39:17] Regenerating road (seed=2910)...
[17:39:20] Road ready.
[17:39:30] Eval (seed=2910): 132.7r/558s ❌@558
[17:47:37] [300,000/500,000] Checkpoint saved
[17:47:37] Regenerating road (seed=46529)...
[17:47:40] Road ready.
[17:47:48] Eval (seed=46529): 120.3r/465s ❌@465