feat(exp29): fine-tune wave4-trial-0009 on generated track (continuous actions)

Warm-starts from wave4-trial-0009/model.zip (best mini-monaco model, completed
laps). Fine-tunes on generated track with continuous Box action space preserved
(no DiscretizedActionWrapper) at LR=0.00005. 50k steps, checkpoint every 5k,
zero-shot mini-monaco eval at end.

Tests whether additional generated-track exposure improves corner handling on
mini-monaco without catastrophic forgetting of driving skill.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Paul Huliganga 2026-05-14 15:32:43 -04:00
parent ee91b8f9a3
commit 1d53bf613f
4 changed files with 1237 additions and 0 deletions

View File

@ -0,0 +1,275 @@
"""
Exp 29: Fine-tune wave4-trial-0009 on generated track.
wave4-trial-0009 is our best mini-monaco model (completed laps in eval).
It was trained on gentrack+mountain with continuous actions and LR0.00073.
What this experiment does:
- Warm-start from wave4-trial-0009/model.zip (continuous Box action space)
- Fine-tune on generated track only, very low LR to preserve driving skill
- Same wrapper stack as wave4: ThrottleClampWrapper StuckTermination SpeedReward
- NO DiscretizedActionWrapper continuous actions throughout
- 50K steps, checkpoint every 5K
- Zero-shot eval on mini-monaco at the end
Goal: does additional generated-track exposure improve mini-monaco corner handling?
"""
import os
import sys
import time
from datetime import datetime
sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent')
_SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune'
_PIDFILE = os.path.join(_SAVE_DIR, 'current.pid')
_WARM_MODEL = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0009/model.zip'
os.makedirs(_SAVE_DIR, exist_ok=True)
if os.path.exists(_PIDFILE):
try:
_old = int(open(_PIDFILE).read().strip())
if _old != os.getpid():
import signal
os.kill(_old, 0)
print(f'[exp29] Another instance already running (PID {_old}). Exiting.', flush=True)
sys.exit(1)
except (OSError, ValueError):
pass
import gymnasium as gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from stable_baselines3.common.utils import get_schedule_fn
from donkeycar_sb3_runner import ThrottleClampWrapper
from multitrack_runner import StuckTerminationWrapper
from reward_wrapper import SpeedRewardWrapper
HOST = 'localhost'
PORT = 9091
THROTTLE_MIN = 0.2
LR = 0.00005
TOTAL_STEPS = 50_000
CHECKPOINT_EVERY = 5_000
SCENE_RELOAD_WAIT = 5.0
TRAIN_TRACK = 'donkey-generated-track-v0'
EVAL_TRACK = 'donkey-minimonaco-track-v0'
STUCK_STEPS = 40
MIN_DISPLACEMENT = 0.5
MAX_STUCK_SECONDS = 12.0
MAX_EPISODE_SECONDS = 30.0
LOW_SPEED_THRESHOLD = 0.5
MAX_LOW_SPEED_SECONDS = 3.0
MAX_CTE = 5.0
MAX_HIGH_CTE_SECONDS = 1.0
EFFICIENCY_WINDOW = 30
MIN_EFFICIENCY = 0.15
REWARD_MAX_CTE = 8.0
MIN_LAP_TIME = 12.0
PROGRESS_PATIENCE = 100
def log(msg):
print(f'[{datetime.now().strftime("%H:%M:%S")}] {msg}', flush=True)
def make_env(track_id, port):
def _init():
raw = gym.make(track_id, conf={'host': HOST, 'port': port})
env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
env = StuckTerminationWrapper(
env,
stuck_steps=STUCK_STEPS,
min_displacement=MIN_DISPLACEMENT,
max_stuck_seconds=MAX_STUCK_SECONDS,
max_episode_seconds=MAX_EPISODE_SECONDS,
low_speed_threshold=LOW_SPEED_THRESHOLD,
max_low_speed_seconds=MAX_LOW_SPEED_SECONDS,
max_cte=MAX_CTE,
max_high_cte_seconds=MAX_HIGH_CTE_SECONDS,
)
env = SpeedRewardWrapper(
env,
window_size=EFFICIENCY_WINDOW,
min_efficiency=MIN_EFFICIENCY,
max_cte=REWARD_MAX_CTE,
min_lap_time=MIN_LAP_TIME,
progress_patience=PROGRESS_PATIENCE,
)
return env
return _init
def connect_env(track_id=TRAIN_TRACK):
vec = DummyVecEnv([make_env(track_id, PORT)])
return VecTransposeImage(vec)
def reconnect_env(old_env, track_id=TRAIN_TRACK):
try:
old_env.close()
except Exception as e:
log(f' env.close() warning: {e}')
time.sleep(SCENE_RELOAD_WAIT)
return connect_env(track_id)
log('=' * 60)
log('Exp 29: wave4-trial-0009 fine-tune on generated track')
log(f' Sim: {HOST}:{PORT} -> {TRAIN_TRACK}')
log(f' Warm model: {_WARM_MODEL}')
log(f' Action space: continuous Box (no discretization)')
log(f' LR={LR}, total={TOTAL_STEPS:,}, checkpoint every {CHECKPOINT_EVERY:,}')
log(f' After training: zero-shot eval on {EVAL_TRACK}')
log('=' * 60)
log('Connecting to sim...')
env = connect_env()
log(f' obs={env.observation_space.shape}, action={env.action_space}')
log('Loading warm-start model from wave4-trial-0009...')
model = PPO.load(_WARM_MODEL, env=env, device='cpu')
# Must update lr_schedule — PPO.load restores the optimizer and schedule from
# the checkpoint. model.learning_rate = LR alone doesn't update the optimizer.
model.learning_rate = LR
model.lr_schedule = get_schedule_fn(LR)
for pg in model.policy.optimizer.param_groups:
pg['lr'] = LR
log(f' Warm model loaded. action={model.action_space} LR={LR}')
with open(_PIDFILE, 'w') as f:
f.write(str(os.getpid()))
best_total_steps = float('-inf')
best_total_reward = float('-inf')
steps_done = 0
run_tag = datetime.now().strftime('%Y-%m-%d_%H%M%S') + '_wave4_finetune'
log_path = os.path.join(_SAVE_DIR, f'run_{run_tag}.log')
best_model_path = os.path.join(_SAVE_DIR, 'best_model.zip')
import logging
_fh = logging.FileHandler(log_path)
_fh.setFormatter(logging.Formatter('%(message)s'))
_sh = logging.StreamHandler(sys.stdout)
_sh.setFormatter(logging.Formatter('%(message)s'))
file_log = logging.getLogger('exp29')
file_log.setLevel(logging.INFO)
file_log.propagate = False
file_log.addHandler(_fh)
file_log.addHandler(_sh)
def flog(msg):
file_log.info(f'[{datetime.now().strftime("%H:%M:%S")}] {msg}')
flog('=' * 60)
flog(f'Exp 29 started — PID {os.getpid()}')
flog(f'Log: {log_path}')
flog(f'Warm start: wave4-trial-0009 | LR={LR}')
flog(f'Track: {TRAIN_TRACK} | continuous actions')
flog('=' * 60)
# ── Training loop ─────────────────────────────────────────────────────────────
while steps_done < TOTAL_STEPS:
seg_steps = min(CHECKPOINT_EVERY, TOTAL_STEPS - steps_done)
model.learn(total_timesteps=seg_steps, reset_num_timesteps=False)
steps_done += seg_steps
ckpt = os.path.join(_SAVE_DIR, f'checkpoint_{steps_done:07d}')
model.save(ckpt)
model.save(os.path.join(_SAVE_DIR, 'model'))
flog(f'[{steps_done:,}/{TOTAL_STEPS:,}] Checkpoint saved: {ckpt}.zip')
flog(' Reconnecting for fresh track layout...')
env = reconnect_env(env)
model.set_env(env)
try:
obs = env.reset()
ep_rewards = np.zeros(env.num_envs)
ep_steps = np.zeros(env.num_envs)
done_mask = np.zeros(env.num_envs, dtype=bool)
for _ in range(2000):
action, _ = model.predict(obs, deterministic=True)
obs, rewards, dones, infos = env.step(action)
for i in range(env.num_envs):
if not done_mask[i]:
ep_rewards[i] += rewards[i]
ep_steps[i] += 1
if dones[i]:
done_mask[i] = True
if done_mask.all():
break
total_steps_eval = int(ep_steps.sum())
total_reward_eval = float(ep_rewards.sum())
status = '' if ep_steps[0] >= 2000 else f'❌@{int(ep_steps[0])}'
flog(f' Eval: gentrack={total_reward_eval:.1f}r/{int(ep_steps[0])}s {status}')
if (total_steps_eval > best_total_steps
or (total_steps_eval == best_total_steps
and total_reward_eval > best_total_reward)):
best_total_steps = total_steps_eval
best_total_reward = total_reward_eval
model.save(best_model_path)
flog(f' NEW BEST: steps={best_total_steps} reward={best_total_reward:.1f}')
except Exception as e:
flog(f' Eval error: {e}')
env.close()
flog('Training complete.')
# ── Zero-shot eval on mini-monaco ─────────────────────────────────────────────
flog('')
flog('=' * 60)
flog(f'ZERO-SHOT EVAL: best_model on {EVAL_TRACK}')
flog('=' * 60)
MINI_EPISODES = 5
MINI_MAX_STEPS = 3000
time.sleep(SCENE_RELOAD_WAIT)
eval_env = connect_env(track_id=EVAL_TRACK)
try:
eval_model = PPO.load(best_model_path, env=eval_env, device='cpu')
rewards_mini, steps_mini = [], []
for ep in range(1, MINI_EPISODES + 1):
obs = eval_env.reset()
total_r, steps, done = 0.0, 0, False
while not done and steps < MINI_MAX_STEPS:
action, _ = eval_model.predict(obs, deterministic=True)
obs, r, d, info = eval_env.step(action)
total_r += float(r[0])
steps += 1
done = bool(d[0])
raw_info = info[0] if isinstance(info, (list, tuple)) else info
hit = raw_info.get('hit', '?') if isinstance(raw_info, dict) else '?'
status = '✅ timeout' if steps >= MINI_MAX_STEPS else f'❌ hit={hit}@{steps}'
flog(f' ep{ep}: {total_r:.1f}r / {steps}s {status}')
rewards_mini.append(total_r)
steps_mini.append(steps)
time.sleep(0.3)
flog(f' Mean: {np.mean(steps_mini):.0f} steps / {np.mean(rewards_mini):.1f} reward')
flog(f' {"✅ GENERALIZES" if np.mean(steps_mini) > 500 else "❌ DOES NOT GENERALIZE"}')
except Exception as e:
flog(f' Mini-monaco eval error: {e}')
finally:
eval_env.close()
flog('')
flog('Exp 29 complete.')
flog(f'Log: {log_path}')

View File

@ -0,0 +1 @@
1263940

View File

@ -0,0 +1,55 @@
[22:55:59] ============================================================
[22:55:59] Exp 29 started — PID 1263940
[22:55:59] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/run_2026-05-06_225559_wave4_finetune.log
[22:55:59] Warm start: wave4-trial-0009 | LR=5e-05
[22:55:59] Track: donkey-generated-track-v0 | continuous actions
[22:55:59] ============================================================
[23:00:17] [5,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0005000.zip
[23:00:17] Reconnecting for fresh track layout...
[23:00:28] Eval: gentrack=34.4r/279s ❌@279
[23:00:28] NEW BEST: steps=279 reward=34.4
[23:04:32] [10,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0010000.zip
[23:04:32] Reconnecting for fresh track layout...
[23:04:45] Eval: gentrack=63.4r/463s ❌@463
[23:04:45] NEW BEST: steps=463 reward=63.4
[23:08:46] [15,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0015000.zip
[23:08:46] Reconnecting for fresh track layout...
[23:08:57] Eval: gentrack=33.1r/268s ❌@268
[23:12:32] [20,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0020000.zip
[23:12:32] Reconnecting for fresh track layout...
[23:12:45] Eval: gentrack=62.4r/450s ❌@450
[23:16:11] [25,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0025000.zip
[23:16:11] Reconnecting for fresh track layout...
[23:16:35] Eval: gentrack=203.3r/1317s ❌@1317
[23:16:35] NEW BEST: steps=1317 reward=203.3
[23:19:47] [30,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0030000.zip
[23:19:47] Reconnecting for fresh track layout...
[23:20:00] Eval: gentrack=64.0r/440s ❌@440
[23:23:34] [35,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0035000.zip
[23:23:34] Reconnecting for fresh track layout...
[23:24:06] Eval: gentrack=323.8r/2000s ✅
[23:24:06] NEW BEST: steps=2000 reward=323.8
[23:27:59] [40,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0040000.zip
[23:27:59] Reconnecting for fresh track layout...
[23:28:12] Eval: gentrack=64.0r/450s ❌@450
[23:32:20] [45,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0045000.zip
[23:32:20] Reconnecting for fresh track layout...
[23:32:40] Eval: gentrack=159.5r/1024s ❌@1024
[23:36:23] [50,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0050000.zip
[23:36:23] Reconnecting for fresh track layout...
[23:36:37] Eval: gentrack=64.5r/440s ❌@440
[23:36:37] Training complete.
[23:36:37]
[23:36:37] ============================================================
[23:36:37] ZERO-SHOT EVAL: best_model on donkey-minimonaco-track-v0
[23:36:37] ============================================================
[23:36:58] ep1: 158.8r / 999s ❌ hit=none@999
[23:37:05] ep2: 46.7r / 338s ❌ hit=right_barrier_seg0@338
[23:37:23] ep3: 206.2r / 1286s ❌ hit=none@1286
[23:37:33] ep4: 63.8r / 444s ❌ hit=right_barrier_seg56@444
[23:37:46] ep5: 108.0r / 700s ❌ hit=right_barrier_seg49@700
[23:37:46] Mean: 753 steps / 116.7 reward
[23:37:46] ✅ GENERALIZES
[23:37:46]
[23:37:46] Exp 29 complete.
[23:37:46] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/run_2026-05-06_225559_wave4_finetune.log

View File

@ -0,0 +1,906 @@
/home/paulh/.local/lib/python3.10/site-packages/matplotlib/projections/__init__.py:63: UserWarning: Unable to import Axes3D. This may be due to multiple versions of Matplotlib being installed (e.g. as a system package and as a pip package). As a result, the 3D projection is not available.
warnings.warn("Unable to import Axes3D. This may be due to multiple versions of "
Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
[22:55:56] ============================================================
[22:55:56] Exp 29: wave4-trial-0009 fine-tune on generated track
[22:55:56] Sim: localhost:9091 -> donkey-generated-track-v0
[22:55:56] Warm model: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0009/model.zip
[22:55:56] Action space: continuous Box (no discretization)
[22:55:56] LR=5e-05, total=50,000, checkpoint every 5,000
[22:55:56] After training: zero-shot eval on donkey-minimonaco-track-v0
[22:55:56] ============================================================
[22:55:56] Connecting to sim...
INFO:gym_donkeycar.core.client:connecting to localhost:9091
/home/paulh/.local/lib/python3.10/site-packages/gymnasium/spaces/box.py:236: UserWarning: WARN: Box low's precision lowered by casting to float32, current low.dtype=float64
gym.logger.warn(
/home/paulh/.local/lib/python3.10/site-packages/gymnasium/spaces/box.py:306: UserWarning: WARN: Box high's precision lowered by casting to float32, current high.dtype=float64
gym.logger.warn(
INFO:gym_donkeycar.envs.donkey_sim:on need car config
INFO:gym_donkeycar.envs.donkey_sim:sending car config.
INFO:gym_donkeycar.envs.donkey_sim:sim started!
starting DonkeyGym env
Setting default: start_delay 5.0
Setting default: max_cte 8.0
Setting default: frame_skip 1
Setting default: cam_resolution (120, 160, 3)
Setting default: log_level 20
Setting default: steer_limit 1.0
Setting default: throttle_min 0.0
Setting default: throttle_max 1.0
[22:55:56] obs=(3, 120, 160), action=Box([-1. 0.2], 1.0, (2,), float32)
[22:55:56] Loading warm-start model from wave4-trial-0009...
/home/paulh/.local/lib/python3.10/site-packages/stable_baselines3/common/utils.py:166: UserWarning: get_schedule_fn() is deprecated, please use FloatSchedule() instead
warnings.warn("get_schedule_fn() is deprecated, please use FloatSchedule() instead")
/home/paulh/.local/lib/python3.10/site-packages/stable_baselines3/common/utils.py:212: UserWarning: constant_fn() is deprecated, please use ConstantSchedule() instead
warnings.warn("constant_fn() is deprecated, please use ConstantSchedule() instead")
[22:55:59] Warm model loaded. action=Box([-1. 0.2], 1.0, (2,), float32) LR=5e-05
[22:55:59] ============================================================
[22:55:59] Exp 29 started — PID 1263940
[22:55:59] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/run_2026-05-06_225559_wave4_finetune.log
[22:55:59] Warm start: wave4-trial-0009 | LR=5e-05
[22:55:59] Track: donkey-generated-track-v0 | continuous actions
[22:55:59] ============================================================
---------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 41 |
| iterations | 1 |
| time_elapsed | 49 |
| total_timesteps | 110592 |
---------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 30 |
| iterations | 2 |
| time_elapsed | 134 |
| total_timesteps | 112640 |
| train/ | |
| approx_kl | 0.15397084 |
| clip_fraction | 0.504 |
| clip_range | 0.2 |
| entropy_loss | -2.27 |
| explained_variance | 0.364 |
| learning_rate | 5e-05 |
| loss | 8.65 |
| n_updates | 540 |
| policy_gradient_loss | 0.0492 |
| std | 0.754 |
| value_loss | 40.2 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 28 |
| iterations | 3 |
| time_elapsed | 215 |
| total_timesteps | 114688 |
| train/ | |
| approx_kl | 0.03846732 |
| clip_fraction | 0.307 |
| clip_range | 0.2 |
| entropy_loss | -2.27 |
| explained_variance | 0.593 |
| learning_rate | 5e-05 |
| loss | 4.59 |
| n_updates | 550 |
| policy_gradient_loss | 0.00024 |
| std | 0.753 |
| value_loss | 18.2 |
----------------------------------------
[23:00:17] [5,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0005000.zip
[23:00:17] Reconnecting for fresh track layout...
INFO:gym_donkeycar.core.client:connecting to localhost:9091
/home/paulh/.local/lib/python3.10/site-packages/gymnasium/spaces/box.py:236: UserWarning: WARN: Box low's precision lowered by casting to float32, current low.dtype=float64
gym.logger.warn(
/home/paulh/.local/lib/python3.10/site-packages/gymnasium/spaces/box.py:306: UserWarning: WARN: Box high's precision lowered by casting to float32, current high.dtype=float64
gym.logger.warn(
INFO:gym_donkeycar.envs.donkey_sim:on need car config
INFO:gym_donkeycar.envs.donkey_sim:sending car config.
INFO:gym_donkeycar.envs.donkey_sim:sim started!
starting DonkeyGym env
Setting default: start_delay 5.0
Setting default: max_cte 8.0
Setting default: frame_skip 1
Setting default: cam_resolution (120, 160, 3)
Setting default: log_level 20
Setting default: steer_limit 1.0
Setting default: throttle_min 0.0
Setting default: throttle_max 1.0
[23:00:28] Eval: gentrack=34.4r/279s ❌@279
[23:00:28] NEW BEST: steps=279 reward=34.4
---------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 54 |
| iterations | 1 |
| time_elapsed | 37 |
| total_timesteps | 116736 |
---------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 34 |
| iterations | 2 |
| time_elapsed | 117 |
| total_timesteps | 118784 |
| train/ | |
| approx_kl | 0.026151573 |
| clip_fraction | 0.286 |
| clip_range | 0.2 |
| entropy_loss | -2.27 |
| explained_variance | 0.352 |
| learning_rate | 5e-05 |
| loss | 4.15 |
| n_updates | 570 |
| policy_gradient_loss | -0.0105 |
| std | 0.752 |
| value_loss | 8.84 |
-----------------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.23 seconds
----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 31 |
| iterations | 3 |
| time_elapsed | 195 |
| total_timesteps | 120832 |
| train/ | |
| approx_kl | 0.02167065 |
| clip_fraction | 0.199 |
| clip_range | 0.2 |
| entropy_loss | -2.27 |
| explained_variance | 0.586 |
| learning_rate | 5e-05 |
| loss | 1.44 |
| n_updates | 580 |
| policy_gradient_loss | -0.0274 |
| std | 0.751 |
| value_loss | 4.3 |
----------------------------------------
[23:04:32] [10,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0010000.zip
[23:04:32] Reconnecting for fresh track layout...
INFO:gym_donkeycar.core.client:connecting to localhost:9091
INFO:gym_donkeycar.envs.donkey_sim:on need car config
INFO:gym_donkeycar.envs.donkey_sim:sending car config.
INFO:gym_donkeycar.envs.donkey_sim:sim started!
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.51 seconds
starting DonkeyGym env
Setting default: start_delay 5.0
Setting default: max_cte 8.0
Setting default: frame_skip 1
Setting default: cam_resolution (120, 160, 3)
Setting default: log_level 20
Setting default: steer_limit 1.0
Setting default: throttle_min 0.0
Setting default: throttle_max 1.0
[23:04:45] Eval: gentrack=63.4r/463s ❌@463
[23:04:45] NEW BEST: steps=463 reward=63.4
---------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 55 |
| iterations | 1 |
| time_elapsed | 36 |
| total_timesteps | 122880 |
---------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.93 seconds
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 34 |
| iterations | 2 |
| time_elapsed | 119 |
| total_timesteps | 124928 |
| train/ | |
| approx_kl | 0.016491085 |
| clip_fraction | 0.151 |
| clip_range | 0.2 |
| entropy_loss | -2.25 |
| explained_variance | 0.741 |
| learning_rate | 5e-05 |
| loss | 1.06 |
| n_updates | 600 |
| policy_gradient_loss | -0.0361 |
| std | 0.746 |
| value_loss | 2.46 |
-----------------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.21 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.93 seconds
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 31 |
| iterations | 3 |
| time_elapsed | 196 |
| total_timesteps | 126976 |
| train/ | |
| approx_kl | 0.021446671 |
| clip_fraction | 0.204 |
| clip_range | 0.2 |
| entropy_loss | -2.25 |
| explained_variance | 0.697 |
| learning_rate | 5e-05 |
| loss | 0.57 |
| n_updates | 610 |
| policy_gradient_loss | -0.036 |
| std | 0.742 |
| value_loss | 2.37 |
-----------------------------------------
[23:08:46] [15,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0015000.zip
[23:08:46] Reconnecting for fresh track layout...
INFO:gym_donkeycar.core.client:connecting to localhost:9091
INFO:gym_donkeycar.envs.donkey_sim:on need car config
INFO:gym_donkeycar.envs.donkey_sim:sending car config.
INFO:gym_donkeycar.envs.donkey_sim:sim started!
starting DonkeyGym env
Setting default: start_delay 5.0
Setting default: max_cte 8.0
Setting default: frame_skip 1
Setting default: cam_resolution (120, 160, 3)
Setting default: log_level 20
Setting default: steer_limit 1.0
Setting default: throttle_min 0.0
Setting default: throttle_max 1.0
[23:08:57] Eval: gentrack=33.1r/268s ❌@268
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.87 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.5 seconds
---------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 59 |
| iterations | 1 |
| time_elapsed | 34 |
| total_timesteps | 129024 |
---------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.47 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.76 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.22 seconds
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 38 |
| iterations | 2 |
| time_elapsed | 105 |
| total_timesteps | 131072 |
| train/ | |
| approx_kl | 0.015673645 |
| clip_fraction | 0.143 |
| clip_range | 0.2 |
| entropy_loss | -2.23 |
| explained_variance | 0.581 |
| learning_rate | 5e-05 |
| loss | 0.689 |
| n_updates | 630 |
| policy_gradient_loss | -0.0369 |
| std | 0.737 |
| value_loss | 2.17 |
-----------------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.5 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.47 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.07 seconds
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 34 |
| iterations | 3 |
| time_elapsed | 176 |
| total_timesteps | 133120 |
| train/ | |
| approx_kl | 0.016333869 |
| clip_fraction | 0.163 |
| clip_range | 0.2 |
| entropy_loss | -2.23 |
| explained_variance | 0.526 |
| learning_rate | 5e-05 |
| loss | 0.683 |
| n_updates | 640 |
| policy_gradient_loss | -0.0348 |
| std | 0.735 |
| value_loss | 2.01 |
-----------------------------------------
[23:12:32] [20,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0020000.zip
[23:12:32] Reconnecting for fresh track layout...
INFO:gym_donkeycar.core.client:connecting to localhost:9091
INFO:gym_donkeycar.envs.donkey_sim:on need car config
INFO:gym_donkeycar.envs.donkey_sim:sending car config.
INFO:gym_donkeycar.envs.donkey_sim:sim started!
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.91 seconds
starting DonkeyGym env
Setting default: start_delay 5.0
Setting default: max_cte 8.0
Setting default: frame_skip 1
Setting default: cam_resolution (120, 160, 3)
Setting default: log_level 20
Setting default: steer_limit 1.0
Setting default: throttle_min 0.0
Setting default: throttle_max 1.0
[23:12:45] Eval: gentrack=62.4r/450s ❌@450
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.96 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.58 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.28 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.38 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.18 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.32 seconds
---------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 72 |
| iterations | 1 |
| time_elapsed | 28 |
| total_timesteps | 135168 |
---------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.71 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.67 seconds
----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 40 |
| iterations | 2 |
| time_elapsed | 100 |
| total_timesteps | 137216 |
| train/ | |
| approx_kl | 0.02011627 |
| clip_fraction | 0.181 |
| clip_range | 0.2 |
| entropy_loss | -2.21 |
| explained_variance | 0.205 |
| learning_rate | 5e-05 |
| loss | 0.231 |
| n_updates | 660 |
| policy_gradient_loss | -0.0454 |
| std | 0.728 |
| value_loss | 0.893 |
----------------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.8 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.21 seconds
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 35 |
| iterations | 3 |
| time_elapsed | 171 |
| total_timesteps | 139264 |
| train/ | |
| approx_kl | 0.019535454 |
| clip_fraction | 0.203 |
| clip_range | 0.2 |
| entropy_loss | -2.2 |
| explained_variance | 0.506 |
| learning_rate | 5e-05 |
| loss | 0.497 |
| n_updates | 670 |
| policy_gradient_loss | -0.0364 |
| std | 0.726 |
| value_loss | 1.54 |
-----------------------------------------
[23:16:11] [25,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0025000.zip
[23:16:11] Reconnecting for fresh track layout...
INFO:gym_donkeycar.core.client:connecting to localhost:9091
INFO:gym_donkeycar.envs.donkey_sim:on need car config
INFO:gym_donkeycar.envs.donkey_sim:sending car config.
INFO:gym_donkeycar.envs.donkey_sim:sim started!
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.99 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.13 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.56 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.6 seconds
starting DonkeyGym env
Setting default: start_delay 5.0
Setting default: max_cte 8.0
Setting default: frame_skip 1
Setting default: cam_resolution (120, 160, 3)
Setting default: log_level 20
Setting default: steer_limit 1.0
Setting default: throttle_min 0.0
Setting default: throttle_max 1.0
[23:16:35] Eval: gentrack=203.3r/1317s ❌@1317
[23:16:35] NEW BEST: steps=1317 reward=203.3
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.74 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.22 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.52 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.2 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.18 seconds
---------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 67 |
| iterations | 1 |
| time_elapsed | 30 |
| total_timesteps | 141312 |
---------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.53 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.34 seconds
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 42 |
| iterations | 2 |
| time_elapsed | 97 |
| total_timesteps | 143360 |
| train/ | |
| approx_kl | 0.020319626 |
| clip_fraction | 0.189 |
| clip_range | 0.2 |
| entropy_loss | -2.19 |
| explained_variance | 0.2 |
| learning_rate | 5e-05 |
| loss | 0.163 |
| n_updates | 690 |
| policy_gradient_loss | -0.0378 |
| std | 0.723 |
| value_loss | 1.83 |
-----------------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.92 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.24 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.49 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.94 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.18 seconds
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 38 |
| iterations | 3 |
| time_elapsed | 160 |
| total_timesteps | 145408 |
| train/ | |
| approx_kl | 0.027640326 |
| clip_fraction | 0.24 |
| clip_range | 0.2 |
| entropy_loss | -2.19 |
| explained_variance | 0.468 |
| learning_rate | 5e-05 |
| loss | 0.271 |
| n_updates | 700 |
| policy_gradient_loss | -0.0384 |
| std | 0.722 |
| value_loss | 1.49 |
-----------------------------------------
[23:19:47] [30,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0030000.zip
[23:19:47] Reconnecting for fresh track layout...
INFO:gym_donkeycar.core.client:connecting to localhost:9091
INFO:gym_donkeycar.envs.donkey_sim:on need car config
INFO:gym_donkeycar.envs.donkey_sim:sending car config.
INFO:gym_donkeycar.envs.donkey_sim:sim started!
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.39 seconds
starting DonkeyGym env
Setting default: start_delay 5.0
Setting default: max_cte 8.0
Setting default: frame_skip 1
Setting default: cam_resolution (120, 160, 3)
Setting default: log_level 20
Setting default: steer_limit 1.0
Setting default: throttle_min 0.0
Setting default: throttle_max 1.0
[23:20:00] Eval: gentrack=64.0r/440s ❌@440
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.8 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.74 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.0 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.81 seconds
---------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 64 |
| iterations | 1 |
| time_elapsed | 31 |
| total_timesteps | 147456 |
---------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.62 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.41 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.12 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 18.01 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.88 seconds
----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 42 |
| iterations | 2 |
| time_elapsed | 96 |
| total_timesteps | 149504 |
| train/ | |
| approx_kl | 0.01823772 |
| clip_fraction | 0.159 |
| clip_range | 0.2 |
| entropy_loss | -2.18 |
| explained_variance | 0.202 |
| learning_rate | 5e-05 |
| loss | 1.07 |
| n_updates | 720 |
| policy_gradient_loss | -0.0387 |
| std | 0.719 |
| value_loss | 2.17 |
----------------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.73 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.67 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.58 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.21 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.58 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.51 seconds
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 36 |
| iterations | 3 |
| time_elapsed | 166 |
| total_timesteps | 151552 |
| train/ | |
| approx_kl | 0.023359878 |
| clip_fraction | 0.185 |
| clip_range | 0.2 |
| entropy_loss | -2.18 |
| explained_variance | 0.23 |
| learning_rate | 5e-05 |
| loss | 1.14 |
| n_updates | 730 |
| policy_gradient_loss | -0.0453 |
| std | 0.717 |
| value_loss | 1.55 |
-----------------------------------------
[23:23:34] [35,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0035000.zip
[23:23:34] Reconnecting for fresh track layout...
INFO:gym_donkeycar.core.client:connecting to localhost:9091
INFO:gym_donkeycar.envs.donkey_sim:on need car config
INFO:gym_donkeycar.envs.donkey_sim:sending car config.
INFO:gym_donkeycar.envs.donkey_sim:sim started!
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.39 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.46 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.84 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.55 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.19 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.01 seconds
starting DonkeyGym env
Setting default: start_delay 5.0
Setting default: max_cte 8.0
Setting default: frame_skip 1
Setting default: cam_resolution (120, 160, 3)
Setting default: log_level 20
Setting default: steer_limit 1.0
Setting default: throttle_min 0.0
Setting default: throttle_max 1.0
[23:24:06] Eval: gentrack=323.8r/2000s ✅
[23:24:06] NEW BEST: steps=2000 reward=323.8
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.43 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.17 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.72 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.03 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.67 seconds
---------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 64 |
| iterations | 1 |
| time_elapsed | 31 |
| total_timesteps | 153600 |
---------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.74 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.52 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.76 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.74 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.48 seconds
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 38 |
| iterations | 2 |
| time_elapsed | 107 |
| total_timesteps | 155648 |
| train/ | |
| approx_kl | 0.021391733 |
| clip_fraction | 0.181 |
| clip_range | 0.2 |
| entropy_loss | -2.16 |
| explained_variance | 0.202 |
| learning_rate | 5e-05 |
| loss | 0.583 |
| n_updates | 750 |
| policy_gradient_loss | -0.0397 |
| std | 0.711 |
| value_loss | 2.25 |
-----------------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.87 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.07 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.1 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.72 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.87 seconds
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 32 |
| iterations | 3 |
| time_elapsed | 190 |
| total_timesteps | 157696 |
| train/ | |
| approx_kl | 0.023227856 |
| clip_fraction | 0.2 |
| clip_range | 0.2 |
| entropy_loss | -2.15 |
| explained_variance | 0.266 |
| learning_rate | 5e-05 |
| loss | 0.503 |
| n_updates | 760 |
| policy_gradient_loss | -0.0421 |
| std | 0.708 |
| value_loss | 1.82 |
-----------------------------------------
[23:27:59] [40,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0040000.zip
[23:27:59] Reconnecting for fresh track layout...
INFO:gym_donkeycar.core.client:connecting to localhost:9091
INFO:gym_donkeycar.envs.donkey_sim:on need car config
INFO:gym_donkeycar.envs.donkey_sim:sending car config.
INFO:gym_donkeycar.envs.donkey_sim:sim started!
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.9 seconds
starting DonkeyGym env
Setting default: start_delay 5.0
Setting default: max_cte 8.0
Setting default: frame_skip 1
Setting default: cam_resolution (120, 160, 3)
Setting default: log_level 20
Setting default: steer_limit 1.0
Setting default: throttle_min 0.0
Setting default: throttle_max 1.0
[23:28:12] Eval: gentrack=64.0r/450s ❌@450
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.63 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.51 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.73 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.39 seconds
---------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 64 |
| iterations | 1 |
| time_elapsed | 31 |
| total_timesteps | 159744 |
---------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.34 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.42 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.69 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 12.78 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.86 seconds
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 37 |
| iterations | 2 |
| time_elapsed | 108 |
| total_timesteps | 161792 |
| train/ | |
| approx_kl | 0.023146037 |
| clip_fraction | 0.197 |
| clip_range | 0.2 |
| entropy_loss | -2.14 |
| explained_variance | 0.262 |
| learning_rate | 5e-05 |
| loss | 0.563 |
| n_updates | 780 |
| policy_gradient_loss | -0.0403 |
| std | 0.706 |
| value_loss | 2.03 |
-----------------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.77 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.23 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.69 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.61 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.74 seconds
----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 30 |
| iterations | 3 |
| time_elapsed | 202 |
| total_timesteps | 163840 |
| train/ | |
| approx_kl | 0.03328496 |
| clip_fraction | 0.232 |
| clip_range | 0.2 |
| entropy_loss | -2.14 |
| explained_variance | 0.233 |
| learning_rate | 5e-05 |
| loss | 0.465 |
| n_updates | 790 |
| policy_gradient_loss | -0.0427 |
| std | 0.703 |
| value_loss | 2.17 |
----------------------------------------
[23:32:20] [45,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0045000.zip
[23:32:20] Reconnecting for fresh track layout...
INFO:gym_donkeycar.core.client:connecting to localhost:9091
INFO:gym_donkeycar.envs.donkey_sim:on need car config
INFO:gym_donkeycar.envs.donkey_sim:sending car config.
INFO:gym_donkeycar.envs.donkey_sim:sim started!
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.65 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.87 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.04 seconds
starting DonkeyGym env
Setting default: start_delay 5.0
Setting default: max_cte 8.0
Setting default: frame_skip 1
Setting default: cam_resolution (120, 160, 3)
Setting default: log_level 20
Setting default: steer_limit 1.0
Setting default: throttle_min 0.0
Setting default: throttle_max 1.0
[23:32:40] Eval: gentrack=159.5r/1024s ❌@1024
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.11 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.01 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.25 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.55 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.94 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.19 seconds
---------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 66 |
| iterations | 1 |
| time_elapsed | 30 |
| total_timesteps | 165888 |
---------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.73 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.72 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.16 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.74 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.18 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.08 seconds
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 40 |
| iterations | 2 |
| time_elapsed | 101 |
| total_timesteps | 167936 |
| train/ | |
| approx_kl | 0.025306746 |
| clip_fraction | 0.23 |
| clip_range | 0.2 |
| entropy_loss | -2.13 |
| explained_variance | 0.199 |
| learning_rate | 5e-05 |
| loss | 0.368 |
| n_updates | 810 |
| policy_gradient_loss | -0.0487 |
| std | 0.7 |
| value_loss | 2.04 |
-----------------------------------------
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.59 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.44 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 12.94 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.89 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.51 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.5 seconds
-----------------------------------------
| rollout/ | |
| ep_len_mean | 216 |
| ep_rew_mean | 158 |
| time/ | |
| fps | 35 |
| iterations | 3 |
| time_elapsed | 173 |
| total_timesteps | 169984 |
| train/ | |
| approx_kl | 0.030002665 |
| clip_fraction | 0.239 |
| clip_range | 0.2 |
| entropy_loss | -2.12 |
| explained_variance | -0.0123 |
| learning_rate | 5e-05 |
| loss | 0.488 |
| n_updates | 820 |
| policy_gradient_loss | -0.0548 |
| std | 0.696 |
| value_loss | 1.09 |
-----------------------------------------
[23:36:23] [50,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0050000.zip
[23:36:23] Reconnecting for fresh track layout...
INFO:gym_donkeycar.core.client:connecting to localhost:9091
INFO:gym_donkeycar.envs.donkey_sim:on need car config
INFO:gym_donkeycar.envs.donkey_sim:sending car config.
INFO:gym_donkeycar.envs.donkey_sim:sim started!
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.51 seconds
starting DonkeyGym env
Setting default: start_delay 5.0
Setting default: max_cte 8.0
Setting default: frame_skip 1
Setting default: cam_resolution (120, 160, 3)
Setting default: log_level 20
Setting default: steer_limit 1.0
Setting default: throttle_min 0.0
Setting default: throttle_max 1.0
[23:36:37] Eval: gentrack=64.5r/440s ❌@440
[23:36:37] Training complete.
[23:36:37]
[23:36:37] ============================================================
[23:36:37] ZERO-SHOT EVAL: best_model on donkey-minimonaco-track-v0
[23:36:37] ============================================================
INFO:gym_donkeycar.core.client:connecting to localhost:9091
INFO:gym_donkeycar.envs.donkey_sim:on need car config
INFO:gym_donkeycar.envs.donkey_sim:sending car config.
INFO:gym_donkeycar.envs.donkey_sim:sim started!
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.38 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.61 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.37 seconds
starting DonkeyGym env
Setting default: start_delay 5.0
Setting default: max_cte 8.0
Setting default: frame_skip 1
Setting default: cam_resolution (120, 160, 3)
Setting default: log_level 20
Setting default: steer_limit 1.0
Setting default: throttle_min 0.0
Setting default: throttle_max 1.0
[23:36:58] ep1: 158.8r / 999s ❌ hit=none@999
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.27 seconds
[23:37:05] ep2: 46.7r / 338s ❌ hit=right_barrier_seg0@338
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.09 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.25 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.11 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.43 seconds
[23:37:23] ep3: 206.2r / 1286s ❌ hit=none@1286
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.74 seconds
[23:37:33] ep4: 63.8r / 444s ❌ hit=right_barrier_seg56@444
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.9 seconds
INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.1 seconds
[23:37:46] ep5: 108.0r / 700s ❌ hit=right_barrier_seg49@700
[23:37:46] Mean: 753 steps / 116.7 reward
[23:37:46] ✅ GENERALIZES
[23:37:46]
[23:37:46] Exp 29 complete.
[23:37:46] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/run_2026-05-06_225559_wave4_finetune.log