From 1d53bf613fc126566bf43f1a3a65c0a70912481a Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Thu, 14 May 2026 15:32:43 -0400 Subject: [PATCH] feat(exp29): fine-tune wave4-trial-0009 on generated track (continuous actions) Warm-starts from wave4-trial-0009/model.zip (best mini-monaco model, completed laps). Fine-tunes on generated track with continuous Box action space preserved (no DiscretizedActionWrapper) at LR=0.00005. 50k steps, checkpoint every 5k, zero-shot mini-monaco eval at end. Tests whether additional generated-track exposure improves corner handling on mini-monaco without catastrophic forgetting of driving skill. Co-Authored-By: Claude Sonnet 4.6 --- .../exp29_wave4_gentrack_finetune.py | 275 ++++++ agent/models/exp29-wave4-finetune/current.pid | 1 + .../run_2026-05-06_225559_wave4_finetune.log | 55 ++ agent/models/exp29-wave4-finetune/stdout.log | 906 ++++++++++++++++++ 4 files changed, 1237 insertions(+) create mode 100644 agent/experiments/exp29_wave4_gentrack_finetune.py create mode 100644 agent/models/exp29-wave4-finetune/current.pid create mode 100644 agent/models/exp29-wave4-finetune/run_2026-05-06_225559_wave4_finetune.log create mode 100644 agent/models/exp29-wave4-finetune/stdout.log diff --git a/agent/experiments/exp29_wave4_gentrack_finetune.py b/agent/experiments/exp29_wave4_gentrack_finetune.py new file mode 100644 index 0000000..269686f --- /dev/null +++ b/agent/experiments/exp29_wave4_gentrack_finetune.py @@ -0,0 +1,275 @@ +""" +Exp 29: Fine-tune wave4-trial-0009 on generated track. + +wave4-trial-0009 is our best mini-monaco model (completed laps in eval). +It was trained on gentrack+mountain with continuous actions and LR≈0.00073. + +What this experiment does: + - Warm-start from wave4-trial-0009/model.zip (continuous Box action space) + - Fine-tune on generated track only, very low LR to preserve driving skill + - Same wrapper stack as wave4: ThrottleClampWrapper → StuckTermination → SpeedReward + - NO DiscretizedActionWrapper — continuous actions throughout + - 50K steps, checkpoint every 5K + - Zero-shot eval on mini-monaco at the end + +Goal: does additional generated-track exposure improve mini-monaco corner handling? +""" +import os +import sys +import time +from datetime import datetime + +sys.path.insert(0, '/home/paulh/projects/donkeycar-rl-autoresearch/agent') + +_SAVE_DIR = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune' +_PIDFILE = os.path.join(_SAVE_DIR, 'current.pid') +_WARM_MODEL = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0009/model.zip' +os.makedirs(_SAVE_DIR, exist_ok=True) + +if os.path.exists(_PIDFILE): + try: + _old = int(open(_PIDFILE).read().strip()) + if _old != os.getpid(): + import signal + os.kill(_old, 0) + print(f'[exp29] Another instance already running (PID {_old}). Exiting.', flush=True) + sys.exit(1) + except (OSError, ValueError): + pass + +import gymnasium as gym +import numpy as np +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage +from stable_baselines3.common.utils import get_schedule_fn + +from donkeycar_sb3_runner import ThrottleClampWrapper +from multitrack_runner import StuckTerminationWrapper +from reward_wrapper import SpeedRewardWrapper + + +HOST = 'localhost' +PORT = 9091 +THROTTLE_MIN = 0.2 +LR = 0.00005 +TOTAL_STEPS = 50_000 +CHECKPOINT_EVERY = 5_000 +SCENE_RELOAD_WAIT = 5.0 + +TRAIN_TRACK = 'donkey-generated-track-v0' +EVAL_TRACK = 'donkey-minimonaco-track-v0' + +STUCK_STEPS = 40 +MIN_DISPLACEMENT = 0.5 +MAX_STUCK_SECONDS = 12.0 +MAX_EPISODE_SECONDS = 30.0 +LOW_SPEED_THRESHOLD = 0.5 +MAX_LOW_SPEED_SECONDS = 3.0 +MAX_CTE = 5.0 +MAX_HIGH_CTE_SECONDS = 1.0 + +EFFICIENCY_WINDOW = 30 +MIN_EFFICIENCY = 0.15 +REWARD_MAX_CTE = 8.0 +MIN_LAP_TIME = 12.0 +PROGRESS_PATIENCE = 100 + + +def log(msg): + print(f'[{datetime.now().strftime("%H:%M:%S")}] {msg}', flush=True) + + +def make_env(track_id, port): + def _init(): + raw = gym.make(track_id, conf={'host': HOST, 'port': port}) + env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) + env = StuckTerminationWrapper( + env, + stuck_steps=STUCK_STEPS, + min_displacement=MIN_DISPLACEMENT, + max_stuck_seconds=MAX_STUCK_SECONDS, + max_episode_seconds=MAX_EPISODE_SECONDS, + low_speed_threshold=LOW_SPEED_THRESHOLD, + max_low_speed_seconds=MAX_LOW_SPEED_SECONDS, + max_cte=MAX_CTE, + max_high_cte_seconds=MAX_HIGH_CTE_SECONDS, + ) + env = SpeedRewardWrapper( + env, + window_size=EFFICIENCY_WINDOW, + min_efficiency=MIN_EFFICIENCY, + max_cte=REWARD_MAX_CTE, + min_lap_time=MIN_LAP_TIME, + progress_patience=PROGRESS_PATIENCE, + ) + return env + return _init + + +def connect_env(track_id=TRAIN_TRACK): + vec = DummyVecEnv([make_env(track_id, PORT)]) + return VecTransposeImage(vec) + + +def reconnect_env(old_env, track_id=TRAIN_TRACK): + try: + old_env.close() + except Exception as e: + log(f' env.close() warning: {e}') + time.sleep(SCENE_RELOAD_WAIT) + return connect_env(track_id) + + +log('=' * 60) +log('Exp 29: wave4-trial-0009 fine-tune on generated track') +log(f' Sim: {HOST}:{PORT} -> {TRAIN_TRACK}') +log(f' Warm model: {_WARM_MODEL}') +log(f' Action space: continuous Box (no discretization)') +log(f' LR={LR}, total={TOTAL_STEPS:,}, checkpoint every {CHECKPOINT_EVERY:,}') +log(f' After training: zero-shot eval on {EVAL_TRACK}') +log('=' * 60) + +log('Connecting to sim...') +env = connect_env() +log(f' obs={env.observation_space.shape}, action={env.action_space}') + +log('Loading warm-start model from wave4-trial-0009...') +model = PPO.load(_WARM_MODEL, env=env, device='cpu') +# Must update lr_schedule — PPO.load restores the optimizer and schedule from +# the checkpoint. model.learning_rate = LR alone doesn't update the optimizer. +model.learning_rate = LR +model.lr_schedule = get_schedule_fn(LR) +for pg in model.policy.optimizer.param_groups: + pg['lr'] = LR +log(f' Warm model loaded. action={model.action_space} LR={LR}') + +with open(_PIDFILE, 'w') as f: + f.write(str(os.getpid())) + +best_total_steps = float('-inf') +best_total_reward = float('-inf') +steps_done = 0 +run_tag = datetime.now().strftime('%Y-%m-%d_%H%M%S') + '_wave4_finetune' +log_path = os.path.join(_SAVE_DIR, f'run_{run_tag}.log') +best_model_path = os.path.join(_SAVE_DIR, 'best_model.zip') + +import logging +_fh = logging.FileHandler(log_path) +_fh.setFormatter(logging.Formatter('%(message)s')) +_sh = logging.StreamHandler(sys.stdout) +_sh.setFormatter(logging.Formatter('%(message)s')) +file_log = logging.getLogger('exp29') +file_log.setLevel(logging.INFO) +file_log.propagate = False +file_log.addHandler(_fh) +file_log.addHandler(_sh) + + +def flog(msg): + file_log.info(f'[{datetime.now().strftime("%H:%M:%S")}] {msg}') + + +flog('=' * 60) +flog(f'Exp 29 started — PID {os.getpid()}') +flog(f'Log: {log_path}') +flog(f'Warm start: wave4-trial-0009 | LR={LR}') +flog(f'Track: {TRAIN_TRACK} | continuous actions') +flog('=' * 60) + +# ── Training loop ───────────────────────────────────────────────────────────── +while steps_done < TOTAL_STEPS: + seg_steps = min(CHECKPOINT_EVERY, TOTAL_STEPS - steps_done) + model.learn(total_timesteps=seg_steps, reset_num_timesteps=False) + steps_done += seg_steps + + ckpt = os.path.join(_SAVE_DIR, f'checkpoint_{steps_done:07d}') + model.save(ckpt) + model.save(os.path.join(_SAVE_DIR, 'model')) + flog(f'[{steps_done:,}/{TOTAL_STEPS:,}] Checkpoint saved: {ckpt}.zip') + + flog(' Reconnecting for fresh track layout...') + env = reconnect_env(env) + model.set_env(env) + + try: + obs = env.reset() + ep_rewards = np.zeros(env.num_envs) + ep_steps = np.zeros(env.num_envs) + done_mask = np.zeros(env.num_envs, dtype=bool) + + for _ in range(2000): + action, _ = model.predict(obs, deterministic=True) + obs, rewards, dones, infos = env.step(action) + for i in range(env.num_envs): + if not done_mask[i]: + ep_rewards[i] += rewards[i] + ep_steps[i] += 1 + if dones[i]: + done_mask[i] = True + if done_mask.all(): + break + + total_steps_eval = int(ep_steps.sum()) + total_reward_eval = float(ep_rewards.sum()) + status = '✅' if ep_steps[0] >= 2000 else f'❌@{int(ep_steps[0])}' + flog(f' Eval: gentrack={total_reward_eval:.1f}r/{int(ep_steps[0])}s {status}') + + if (total_steps_eval > best_total_steps + or (total_steps_eval == best_total_steps + and total_reward_eval > best_total_reward)): + best_total_steps = total_steps_eval + best_total_reward = total_reward_eval + model.save(best_model_path) + flog(f' NEW BEST: steps={best_total_steps} reward={best_total_reward:.1f}') + + except Exception as e: + flog(f' Eval error: {e}') + +env.close() +flog('Training complete.') + +# ── Zero-shot eval on mini-monaco ───────────────────────────────────────────── +flog('') +flog('=' * 60) +flog(f'ZERO-SHOT EVAL: best_model on {EVAL_TRACK}') +flog('=' * 60) + +MINI_EPISODES = 5 +MINI_MAX_STEPS = 3000 + +time.sleep(SCENE_RELOAD_WAIT) +eval_env = connect_env(track_id=EVAL_TRACK) + +try: + eval_model = PPO.load(best_model_path, env=eval_env, device='cpu') + rewards_mini, steps_mini = [], [] + + for ep in range(1, MINI_EPISODES + 1): + obs = eval_env.reset() + total_r, steps, done = 0.0, 0, False + while not done and steps < MINI_MAX_STEPS: + action, _ = eval_model.predict(obs, deterministic=True) + obs, r, d, info = eval_env.step(action) + total_r += float(r[0]) + steps += 1 + done = bool(d[0]) + + raw_info = info[0] if isinstance(info, (list, tuple)) else info + hit = raw_info.get('hit', '?') if isinstance(raw_info, dict) else '?' + status = '✅ timeout' if steps >= MINI_MAX_STEPS else f'❌ hit={hit}@{steps}' + flog(f' ep{ep}: {total_r:.1f}r / {steps}s {status}') + rewards_mini.append(total_r) + steps_mini.append(steps) + time.sleep(0.3) + + flog(f' Mean: {np.mean(steps_mini):.0f} steps / {np.mean(rewards_mini):.1f} reward') + flog(f' {"✅ GENERALIZES" if np.mean(steps_mini) > 500 else "❌ DOES NOT GENERALIZE"}') + +except Exception as e: + flog(f' Mini-monaco eval error: {e}') +finally: + eval_env.close() + +flog('') +flog('Exp 29 complete.') +flog(f'Log: {log_path}') diff --git a/agent/models/exp29-wave4-finetune/current.pid b/agent/models/exp29-wave4-finetune/current.pid new file mode 100644 index 0000000..e057bce --- /dev/null +++ b/agent/models/exp29-wave4-finetune/current.pid @@ -0,0 +1 @@ +1263940 \ No newline at end of file diff --git a/agent/models/exp29-wave4-finetune/run_2026-05-06_225559_wave4_finetune.log b/agent/models/exp29-wave4-finetune/run_2026-05-06_225559_wave4_finetune.log new file mode 100644 index 0000000..bde316d --- /dev/null +++ b/agent/models/exp29-wave4-finetune/run_2026-05-06_225559_wave4_finetune.log @@ -0,0 +1,55 @@ +[22:55:59] ============================================================ +[22:55:59] Exp 29 started — PID 1263940 +[22:55:59] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/run_2026-05-06_225559_wave4_finetune.log +[22:55:59] Warm start: wave4-trial-0009 | LR=5e-05 +[22:55:59] Track: donkey-generated-track-v0 | continuous actions +[22:55:59] ============================================================ +[23:00:17] [5,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0005000.zip +[23:00:17] Reconnecting for fresh track layout... +[23:00:28] Eval: gentrack=34.4r/279s ❌@279 +[23:00:28] NEW BEST: steps=279 reward=34.4 +[23:04:32] [10,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0010000.zip +[23:04:32] Reconnecting for fresh track layout... +[23:04:45] Eval: gentrack=63.4r/463s ❌@463 +[23:04:45] NEW BEST: steps=463 reward=63.4 +[23:08:46] [15,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0015000.zip +[23:08:46] Reconnecting for fresh track layout... +[23:08:57] Eval: gentrack=33.1r/268s ❌@268 +[23:12:32] [20,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0020000.zip +[23:12:32] Reconnecting for fresh track layout... +[23:12:45] Eval: gentrack=62.4r/450s ❌@450 +[23:16:11] [25,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0025000.zip +[23:16:11] Reconnecting for fresh track layout... +[23:16:35] Eval: gentrack=203.3r/1317s ❌@1317 +[23:16:35] NEW BEST: steps=1317 reward=203.3 +[23:19:47] [30,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0030000.zip +[23:19:47] Reconnecting for fresh track layout... +[23:20:00] Eval: gentrack=64.0r/440s ❌@440 +[23:23:34] [35,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0035000.zip +[23:23:34] Reconnecting for fresh track layout... +[23:24:06] Eval: gentrack=323.8r/2000s ✅ +[23:24:06] NEW BEST: steps=2000 reward=323.8 +[23:27:59] [40,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0040000.zip +[23:27:59] Reconnecting for fresh track layout... +[23:28:12] Eval: gentrack=64.0r/450s ❌@450 +[23:32:20] [45,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0045000.zip +[23:32:20] Reconnecting for fresh track layout... +[23:32:40] Eval: gentrack=159.5r/1024s ❌@1024 +[23:36:23] [50,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0050000.zip +[23:36:23] Reconnecting for fresh track layout... +[23:36:37] Eval: gentrack=64.5r/440s ❌@440 +[23:36:37] Training complete. +[23:36:37] +[23:36:37] ============================================================ +[23:36:37] ZERO-SHOT EVAL: best_model on donkey-minimonaco-track-v0 +[23:36:37] ============================================================ +[23:36:58] ep1: 158.8r / 999s ❌ hit=none@999 +[23:37:05] ep2: 46.7r / 338s ❌ hit=right_barrier_seg0@338 +[23:37:23] ep3: 206.2r / 1286s ❌ hit=none@1286 +[23:37:33] ep4: 63.8r / 444s ❌ hit=right_barrier_seg56@444 +[23:37:46] ep5: 108.0r / 700s ❌ hit=right_barrier_seg49@700 +[23:37:46] Mean: 753 steps / 116.7 reward +[23:37:46] ✅ GENERALIZES +[23:37:46] +[23:37:46] Exp 29 complete. +[23:37:46] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/run_2026-05-06_225559_wave4_finetune.log diff --git a/agent/models/exp29-wave4-finetune/stdout.log b/agent/models/exp29-wave4-finetune/stdout.log new file mode 100644 index 0000000..80f265b --- /dev/null +++ b/agent/models/exp29-wave4-finetune/stdout.log @@ -0,0 +1,906 @@ +/home/paulh/.local/lib/python3.10/site-packages/matplotlib/projections/__init__.py:63: UserWarning: Unable to import Axes3D. This may be due to multiple versions of Matplotlib being installed (e.g. as a system package and as a pip package). As a result, the 3D projection is not available. + warnings.warn("Unable to import Axes3D. This may be due to multiple versions of " +Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality. +Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade. +Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases. +See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information. +[22:55:56] ============================================================ +[22:55:56] Exp 29: wave4-trial-0009 fine-tune on generated track +[22:55:56] Sim: localhost:9091 -> donkey-generated-track-v0 +[22:55:56] Warm model: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0009/model.zip +[22:55:56] Action space: continuous Box (no discretization) +[22:55:56] LR=5e-05, total=50,000, checkpoint every 5,000 +[22:55:56] After training: zero-shot eval on donkey-minimonaco-track-v0 +[22:55:56] ============================================================ +[22:55:56] Connecting to sim... +INFO:gym_donkeycar.core.client:connecting to localhost:9091 +/home/paulh/.local/lib/python3.10/site-packages/gymnasium/spaces/box.py:236: UserWarning: WARN: Box low's precision lowered by casting to float32, current low.dtype=float64 + gym.logger.warn( +/home/paulh/.local/lib/python3.10/site-packages/gymnasium/spaces/box.py:306: UserWarning: WARN: Box high's precision lowered by casting to float32, current high.dtype=float64 + gym.logger.warn( +INFO:gym_donkeycar.envs.donkey_sim:on need car config +INFO:gym_donkeycar.envs.donkey_sim:sending car config. +INFO:gym_donkeycar.envs.donkey_sim:sim started! +starting DonkeyGym env +Setting default: start_delay 5.0 +Setting default: max_cte 8.0 +Setting default: frame_skip 1 +Setting default: cam_resolution (120, 160, 3) +Setting default: log_level 20 +Setting default: steer_limit 1.0 +Setting default: throttle_min 0.0 +Setting default: throttle_max 1.0 +[22:55:56] obs=(3, 120, 160), action=Box([-1. 0.2], 1.0, (2,), float32) +[22:55:56] Loading warm-start model from wave4-trial-0009... +/home/paulh/.local/lib/python3.10/site-packages/stable_baselines3/common/utils.py:166: UserWarning: get_schedule_fn() is deprecated, please use FloatSchedule() instead + warnings.warn("get_schedule_fn() is deprecated, please use FloatSchedule() instead") +/home/paulh/.local/lib/python3.10/site-packages/stable_baselines3/common/utils.py:212: UserWarning: constant_fn() is deprecated, please use ConstantSchedule() instead + warnings.warn("constant_fn() is deprecated, please use ConstantSchedule() instead") +[22:55:59] Warm model loaded. action=Box([-1. 0.2], 1.0, (2,), float32) LR=5e-05 +[22:55:59] ============================================================ +[22:55:59] Exp 29 started — PID 1263940 +[22:55:59] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/run_2026-05-06_225559_wave4_finetune.log +[22:55:59] Warm start: wave4-trial-0009 | LR=5e-05 +[22:55:59] Track: donkey-generated-track-v0 | continuous actions +[22:55:59] ============================================================ +--------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 41 | +| iterations | 1 | +| time_elapsed | 49 | +| total_timesteps | 110592 | +--------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 30 | +| iterations | 2 | +| time_elapsed | 134 | +| total_timesteps | 112640 | +| train/ | | +| approx_kl | 0.15397084 | +| clip_fraction | 0.504 | +| clip_range | 0.2 | +| entropy_loss | -2.27 | +| explained_variance | 0.364 | +| learning_rate | 5e-05 | +| loss | 8.65 | +| n_updates | 540 | +| policy_gradient_loss | 0.0492 | +| std | 0.754 | +| value_loss | 40.2 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 28 | +| iterations | 3 | +| time_elapsed | 215 | +| total_timesteps | 114688 | +| train/ | | +| approx_kl | 0.03846732 | +| clip_fraction | 0.307 | +| clip_range | 0.2 | +| entropy_loss | -2.27 | +| explained_variance | 0.593 | +| learning_rate | 5e-05 | +| loss | 4.59 | +| n_updates | 550 | +| policy_gradient_loss | 0.00024 | +| std | 0.753 | +| value_loss | 18.2 | +---------------------------------------- +[23:00:17] [5,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0005000.zip +[23:00:17] Reconnecting for fresh track layout... +INFO:gym_donkeycar.core.client:connecting to localhost:9091 +/home/paulh/.local/lib/python3.10/site-packages/gymnasium/spaces/box.py:236: UserWarning: WARN: Box low's precision lowered by casting to float32, current low.dtype=float64 + gym.logger.warn( +/home/paulh/.local/lib/python3.10/site-packages/gymnasium/spaces/box.py:306: UserWarning: WARN: Box high's precision lowered by casting to float32, current high.dtype=float64 + gym.logger.warn( +INFO:gym_donkeycar.envs.donkey_sim:on need car config +INFO:gym_donkeycar.envs.donkey_sim:sending car config. +INFO:gym_donkeycar.envs.donkey_sim:sim started! +starting DonkeyGym env +Setting default: start_delay 5.0 +Setting default: max_cte 8.0 +Setting default: frame_skip 1 +Setting default: cam_resolution (120, 160, 3) +Setting default: log_level 20 +Setting default: steer_limit 1.0 +Setting default: throttle_min 0.0 +Setting default: throttle_max 1.0 +[23:00:28] Eval: gentrack=34.4r/279s ❌@279 +[23:00:28] NEW BEST: steps=279 reward=34.4 +--------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 54 | +| iterations | 1 | +| time_elapsed | 37 | +| total_timesteps | 116736 | +--------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 34 | +| iterations | 2 | +| time_elapsed | 117 | +| total_timesteps | 118784 | +| train/ | | +| approx_kl | 0.026151573 | +| clip_fraction | 0.286 | +| clip_range | 0.2 | +| entropy_loss | -2.27 | +| explained_variance | 0.352 | +| learning_rate | 5e-05 | +| loss | 4.15 | +| n_updates | 570 | +| policy_gradient_loss | -0.0105 | +| std | 0.752 | +| value_loss | 8.84 | +----------------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.23 seconds +---------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 31 | +| iterations | 3 | +| time_elapsed | 195 | +| total_timesteps | 120832 | +| train/ | | +| approx_kl | 0.02167065 | +| clip_fraction | 0.199 | +| clip_range | 0.2 | +| entropy_loss | -2.27 | +| explained_variance | 0.586 | +| learning_rate | 5e-05 | +| loss | 1.44 | +| n_updates | 580 | +| policy_gradient_loss | -0.0274 | +| std | 0.751 | +| value_loss | 4.3 | +---------------------------------------- +[23:04:32] [10,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0010000.zip +[23:04:32] Reconnecting for fresh track layout... +INFO:gym_donkeycar.core.client:connecting to localhost:9091 +INFO:gym_donkeycar.envs.donkey_sim:on need car config +INFO:gym_donkeycar.envs.donkey_sim:sending car config. +INFO:gym_donkeycar.envs.donkey_sim:sim started! +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.51 seconds +starting DonkeyGym env +Setting default: start_delay 5.0 +Setting default: max_cte 8.0 +Setting default: frame_skip 1 +Setting default: cam_resolution (120, 160, 3) +Setting default: log_level 20 +Setting default: steer_limit 1.0 +Setting default: throttle_min 0.0 +Setting default: throttle_max 1.0 +[23:04:45] Eval: gentrack=63.4r/463s ❌@463 +[23:04:45] NEW BEST: steps=463 reward=63.4 +--------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 55 | +| iterations | 1 | +| time_elapsed | 36 | +| total_timesteps | 122880 | +--------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.93 seconds +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 34 | +| iterations | 2 | +| time_elapsed | 119 | +| total_timesteps | 124928 | +| train/ | | +| approx_kl | 0.016491085 | +| clip_fraction | 0.151 | +| clip_range | 0.2 | +| entropy_loss | -2.25 | +| explained_variance | 0.741 | +| learning_rate | 5e-05 | +| loss | 1.06 | +| n_updates | 600 | +| policy_gradient_loss | -0.0361 | +| std | 0.746 | +| value_loss | 2.46 | +----------------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.21 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.93 seconds +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 31 | +| iterations | 3 | +| time_elapsed | 196 | +| total_timesteps | 126976 | +| train/ | | +| approx_kl | 0.021446671 | +| clip_fraction | 0.204 | +| clip_range | 0.2 | +| entropy_loss | -2.25 | +| explained_variance | 0.697 | +| learning_rate | 5e-05 | +| loss | 0.57 | +| n_updates | 610 | +| policy_gradient_loss | -0.036 | +| std | 0.742 | +| value_loss | 2.37 | +----------------------------------------- +[23:08:46] [15,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0015000.zip +[23:08:46] Reconnecting for fresh track layout... +INFO:gym_donkeycar.core.client:connecting to localhost:9091 +INFO:gym_donkeycar.envs.donkey_sim:on need car config +INFO:gym_donkeycar.envs.donkey_sim:sending car config. +INFO:gym_donkeycar.envs.donkey_sim:sim started! +starting DonkeyGym env +Setting default: start_delay 5.0 +Setting default: max_cte 8.0 +Setting default: frame_skip 1 +Setting default: cam_resolution (120, 160, 3) +Setting default: log_level 20 +Setting default: steer_limit 1.0 +Setting default: throttle_min 0.0 +Setting default: throttle_max 1.0 +[23:08:57] Eval: gentrack=33.1r/268s ❌@268 +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.87 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.5 seconds +--------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 59 | +| iterations | 1 | +| time_elapsed | 34 | +| total_timesteps | 129024 | +--------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.47 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.76 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.22 seconds +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 38 | +| iterations | 2 | +| time_elapsed | 105 | +| total_timesteps | 131072 | +| train/ | | +| approx_kl | 0.015673645 | +| clip_fraction | 0.143 | +| clip_range | 0.2 | +| entropy_loss | -2.23 | +| explained_variance | 0.581 | +| learning_rate | 5e-05 | +| loss | 0.689 | +| n_updates | 630 | +| policy_gradient_loss | -0.0369 | +| std | 0.737 | +| value_loss | 2.17 | +----------------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.5 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.47 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.07 seconds +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 34 | +| iterations | 3 | +| time_elapsed | 176 | +| total_timesteps | 133120 | +| train/ | | +| approx_kl | 0.016333869 | +| clip_fraction | 0.163 | +| clip_range | 0.2 | +| entropy_loss | -2.23 | +| explained_variance | 0.526 | +| learning_rate | 5e-05 | +| loss | 0.683 | +| n_updates | 640 | +| policy_gradient_loss | -0.0348 | +| std | 0.735 | +| value_loss | 2.01 | +----------------------------------------- +[23:12:32] [20,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0020000.zip +[23:12:32] Reconnecting for fresh track layout... +INFO:gym_donkeycar.core.client:connecting to localhost:9091 +INFO:gym_donkeycar.envs.donkey_sim:on need car config +INFO:gym_donkeycar.envs.donkey_sim:sending car config. +INFO:gym_donkeycar.envs.donkey_sim:sim started! +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.91 seconds +starting DonkeyGym env +Setting default: start_delay 5.0 +Setting default: max_cte 8.0 +Setting default: frame_skip 1 +Setting default: cam_resolution (120, 160, 3) +Setting default: log_level 20 +Setting default: steer_limit 1.0 +Setting default: throttle_min 0.0 +Setting default: throttle_max 1.0 +[23:12:45] Eval: gentrack=62.4r/450s ❌@450 +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.96 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.58 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.28 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.38 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.18 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.32 seconds +--------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 72 | +| iterations | 1 | +| time_elapsed | 28 | +| total_timesteps | 135168 | +--------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.71 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.67 seconds +---------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 40 | +| iterations | 2 | +| time_elapsed | 100 | +| total_timesteps | 137216 | +| train/ | | +| approx_kl | 0.02011627 | +| clip_fraction | 0.181 | +| clip_range | 0.2 | +| entropy_loss | -2.21 | +| explained_variance | 0.205 | +| learning_rate | 5e-05 | +| loss | 0.231 | +| n_updates | 660 | +| policy_gradient_loss | -0.0454 | +| std | 0.728 | +| value_loss | 0.893 | +---------------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.8 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.21 seconds +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 35 | +| iterations | 3 | +| time_elapsed | 171 | +| total_timesteps | 139264 | +| train/ | | +| approx_kl | 0.019535454 | +| clip_fraction | 0.203 | +| clip_range | 0.2 | +| entropy_loss | -2.2 | +| explained_variance | 0.506 | +| learning_rate | 5e-05 | +| loss | 0.497 | +| n_updates | 670 | +| policy_gradient_loss | -0.0364 | +| std | 0.726 | +| value_loss | 1.54 | +----------------------------------------- +[23:16:11] [25,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0025000.zip +[23:16:11] Reconnecting for fresh track layout... +INFO:gym_donkeycar.core.client:connecting to localhost:9091 +INFO:gym_donkeycar.envs.donkey_sim:on need car config +INFO:gym_donkeycar.envs.donkey_sim:sending car config. +INFO:gym_donkeycar.envs.donkey_sim:sim started! +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.99 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.13 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.56 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.6 seconds +starting DonkeyGym env +Setting default: start_delay 5.0 +Setting default: max_cte 8.0 +Setting default: frame_skip 1 +Setting default: cam_resolution (120, 160, 3) +Setting default: log_level 20 +Setting default: steer_limit 1.0 +Setting default: throttle_min 0.0 +Setting default: throttle_max 1.0 +[23:16:35] Eval: gentrack=203.3r/1317s ❌@1317 +[23:16:35] NEW BEST: steps=1317 reward=203.3 +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.74 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.22 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.52 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.2 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.18 seconds +--------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 67 | +| iterations | 1 | +| time_elapsed | 30 | +| total_timesteps | 141312 | +--------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.53 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.34 seconds +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 42 | +| iterations | 2 | +| time_elapsed | 97 | +| total_timesteps | 143360 | +| train/ | | +| approx_kl | 0.020319626 | +| clip_fraction | 0.189 | +| clip_range | 0.2 | +| entropy_loss | -2.19 | +| explained_variance | 0.2 | +| learning_rate | 5e-05 | +| loss | 0.163 | +| n_updates | 690 | +| policy_gradient_loss | -0.0378 | +| std | 0.723 | +| value_loss | 1.83 | +----------------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.92 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.24 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.49 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.94 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.18 seconds +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 38 | +| iterations | 3 | +| time_elapsed | 160 | +| total_timesteps | 145408 | +| train/ | | +| approx_kl | 0.027640326 | +| clip_fraction | 0.24 | +| clip_range | 0.2 | +| entropy_loss | -2.19 | +| explained_variance | 0.468 | +| learning_rate | 5e-05 | +| loss | 0.271 | +| n_updates | 700 | +| policy_gradient_loss | -0.0384 | +| std | 0.722 | +| value_loss | 1.49 | +----------------------------------------- +[23:19:47] [30,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0030000.zip +[23:19:47] Reconnecting for fresh track layout... +INFO:gym_donkeycar.core.client:connecting to localhost:9091 +INFO:gym_donkeycar.envs.donkey_sim:on need car config +INFO:gym_donkeycar.envs.donkey_sim:sending car config. +INFO:gym_donkeycar.envs.donkey_sim:sim started! +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.39 seconds +starting DonkeyGym env +Setting default: start_delay 5.0 +Setting default: max_cte 8.0 +Setting default: frame_skip 1 +Setting default: cam_resolution (120, 160, 3) +Setting default: log_level 20 +Setting default: steer_limit 1.0 +Setting default: throttle_min 0.0 +Setting default: throttle_max 1.0 +[23:20:00] Eval: gentrack=64.0r/440s ❌@440 +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.8 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.74 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.0 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.81 seconds +--------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 64 | +| iterations | 1 | +| time_elapsed | 31 | +| total_timesteps | 147456 | +--------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.62 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.41 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.12 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 18.01 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.88 seconds +---------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 42 | +| iterations | 2 | +| time_elapsed | 96 | +| total_timesteps | 149504 | +| train/ | | +| approx_kl | 0.01823772 | +| clip_fraction | 0.159 | +| clip_range | 0.2 | +| entropy_loss | -2.18 | +| explained_variance | 0.202 | +| learning_rate | 5e-05 | +| loss | 1.07 | +| n_updates | 720 | +| policy_gradient_loss | -0.0387 | +| std | 0.719 | +| value_loss | 2.17 | +---------------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.73 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.67 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.58 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.21 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.58 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.51 seconds +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 36 | +| iterations | 3 | +| time_elapsed | 166 | +| total_timesteps | 151552 | +| train/ | | +| approx_kl | 0.023359878 | +| clip_fraction | 0.185 | +| clip_range | 0.2 | +| entropy_loss | -2.18 | +| explained_variance | 0.23 | +| learning_rate | 5e-05 | +| loss | 1.14 | +| n_updates | 730 | +| policy_gradient_loss | -0.0453 | +| std | 0.717 | +| value_loss | 1.55 | +----------------------------------------- +[23:23:34] [35,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0035000.zip +[23:23:34] Reconnecting for fresh track layout... +INFO:gym_donkeycar.core.client:connecting to localhost:9091 +INFO:gym_donkeycar.envs.donkey_sim:on need car config +INFO:gym_donkeycar.envs.donkey_sim:sending car config. +INFO:gym_donkeycar.envs.donkey_sim:sim started! +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.39 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.46 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.84 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.55 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.19 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.01 seconds +starting DonkeyGym env +Setting default: start_delay 5.0 +Setting default: max_cte 8.0 +Setting default: frame_skip 1 +Setting default: cam_resolution (120, 160, 3) +Setting default: log_level 20 +Setting default: steer_limit 1.0 +Setting default: throttle_min 0.0 +Setting default: throttle_max 1.0 +[23:24:06] Eval: gentrack=323.8r/2000s ✅ +[23:24:06] NEW BEST: steps=2000 reward=323.8 +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.43 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.17 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.72 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.03 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.67 seconds +--------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 64 | +| iterations | 1 | +| time_elapsed | 31 | +| total_timesteps | 153600 | +--------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.74 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.52 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.76 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.74 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.48 seconds +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 38 | +| iterations | 2 | +| time_elapsed | 107 | +| total_timesteps | 155648 | +| train/ | | +| approx_kl | 0.021391733 | +| clip_fraction | 0.181 | +| clip_range | 0.2 | +| entropy_loss | -2.16 | +| explained_variance | 0.202 | +| learning_rate | 5e-05 | +| loss | 0.583 | +| n_updates | 750 | +| policy_gradient_loss | -0.0397 | +| std | 0.711 | +| value_loss | 2.25 | +----------------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.87 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.07 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.1 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.72 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.87 seconds +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 32 | +| iterations | 3 | +| time_elapsed | 190 | +| total_timesteps | 157696 | +| train/ | | +| approx_kl | 0.023227856 | +| clip_fraction | 0.2 | +| clip_range | 0.2 | +| entropy_loss | -2.15 | +| explained_variance | 0.266 | +| learning_rate | 5e-05 | +| loss | 0.503 | +| n_updates | 760 | +| policy_gradient_loss | -0.0421 | +| std | 0.708 | +| value_loss | 1.82 | +----------------------------------------- +[23:27:59] [40,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0040000.zip +[23:27:59] Reconnecting for fresh track layout... +INFO:gym_donkeycar.core.client:connecting to localhost:9091 +INFO:gym_donkeycar.envs.donkey_sim:on need car config +INFO:gym_donkeycar.envs.donkey_sim:sending car config. +INFO:gym_donkeycar.envs.donkey_sim:sim started! +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.9 seconds +starting DonkeyGym env +Setting default: start_delay 5.0 +Setting default: max_cte 8.0 +Setting default: frame_skip 1 +Setting default: cam_resolution (120, 160, 3) +Setting default: log_level 20 +Setting default: steer_limit 1.0 +Setting default: throttle_min 0.0 +Setting default: throttle_max 1.0 +[23:28:12] Eval: gentrack=64.0r/450s ❌@450 +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.63 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.51 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.73 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.39 seconds +--------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 64 | +| iterations | 1 | +| time_elapsed | 31 | +| total_timesteps | 159744 | +--------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.34 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.42 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.69 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 12.78 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.86 seconds +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 37 | +| iterations | 2 | +| time_elapsed | 108 | +| total_timesteps | 161792 | +| train/ | | +| approx_kl | 0.023146037 | +| clip_fraction | 0.197 | +| clip_range | 0.2 | +| entropy_loss | -2.14 | +| explained_variance | 0.262 | +| learning_rate | 5e-05 | +| loss | 0.563 | +| n_updates | 780 | +| policy_gradient_loss | -0.0403 | +| std | 0.706 | +| value_loss | 2.03 | +----------------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.77 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.23 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.69 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 17.61 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.74 seconds +---------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 30 | +| iterations | 3 | +| time_elapsed | 202 | +| total_timesteps | 163840 | +| train/ | | +| approx_kl | 0.03328496 | +| clip_fraction | 0.232 | +| clip_range | 0.2 | +| entropy_loss | -2.14 | +| explained_variance | 0.233 | +| learning_rate | 5e-05 | +| loss | 0.465 | +| n_updates | 790 | +| policy_gradient_loss | -0.0427 | +| std | 0.703 | +| value_loss | 2.17 | +---------------------------------------- +[23:32:20] [45,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0045000.zip +[23:32:20] Reconnecting for fresh track layout... +INFO:gym_donkeycar.core.client:connecting to localhost:9091 +INFO:gym_donkeycar.envs.donkey_sim:on need car config +INFO:gym_donkeycar.envs.donkey_sim:sending car config. +INFO:gym_donkeycar.envs.donkey_sim:sim started! +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.65 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.87 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.04 seconds +starting DonkeyGym env +Setting default: start_delay 5.0 +Setting default: max_cte 8.0 +Setting default: frame_skip 1 +Setting default: cam_resolution (120, 160, 3) +Setting default: log_level 20 +Setting default: steer_limit 1.0 +Setting default: throttle_min 0.0 +Setting default: throttle_max 1.0 +[23:32:40] Eval: gentrack=159.5r/1024s ❌@1024 +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.11 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.01 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.25 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.55 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.94 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.19 seconds +--------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 66 | +| iterations | 1 | +| time_elapsed | 30 | +| total_timesteps | 165888 | +--------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.73 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.72 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.16 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.74 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.18 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.08 seconds +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 40 | +| iterations | 2 | +| time_elapsed | 101 | +| total_timesteps | 167936 | +| train/ | | +| approx_kl | 0.025306746 | +| clip_fraction | 0.23 | +| clip_range | 0.2 | +| entropy_loss | -2.13 | +| explained_variance | 0.199 | +| learning_rate | 5e-05 | +| loss | 0.368 | +| n_updates | 810 | +| policy_gradient_loss | -0.0487 | +| std | 0.7 | +| value_loss | 2.04 | +----------------------------------------- +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.59 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.44 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 12.94 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.89 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 15.51 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.5 seconds +----------------------------------------- +| rollout/ | | +| ep_len_mean | 216 | +| ep_rew_mean | 158 | +| time/ | | +| fps | 35 | +| iterations | 3 | +| time_elapsed | 173 | +| total_timesteps | 169984 | +| train/ | | +| approx_kl | 0.030002665 | +| clip_fraction | 0.239 | +| clip_range | 0.2 | +| entropy_loss | -2.12 | +| explained_variance | -0.0123 | +| learning_rate | 5e-05 | +| loss | 0.488 | +| n_updates | 820 | +| policy_gradient_loss | -0.0548 | +| std | 0.696 | +| value_loss | 1.09 | +----------------------------------------- +[23:36:23] [50,000/50,000] Checkpoint saved: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/checkpoint_0050000.zip +[23:36:23] Reconnecting for fresh track layout... +INFO:gym_donkeycar.core.client:connecting to localhost:9091 +INFO:gym_donkeycar.envs.donkey_sim:on need car config +INFO:gym_donkeycar.envs.donkey_sim:sending car config. +INFO:gym_donkeycar.envs.donkey_sim:sim started! +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.51 seconds +starting DonkeyGym env +Setting default: start_delay 5.0 +Setting default: max_cte 8.0 +Setting default: frame_skip 1 +Setting default: cam_resolution (120, 160, 3) +Setting default: log_level 20 +Setting default: steer_limit 1.0 +Setting default: throttle_min 0.0 +Setting default: throttle_max 1.0 +[23:36:37] Eval: gentrack=64.5r/440s ❌@440 +[23:36:37] Training complete. +[23:36:37] +[23:36:37] ============================================================ +[23:36:37] ZERO-SHOT EVAL: best_model on donkey-minimonaco-track-v0 +[23:36:37] ============================================================ +INFO:gym_donkeycar.core.client:connecting to localhost:9091 +INFO:gym_donkeycar.envs.donkey_sim:on need car config +INFO:gym_donkeycar.envs.donkey_sim:sending car config. +INFO:gym_donkeycar.envs.donkey_sim:sim started! +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.38 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 13.61 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.37 seconds +starting DonkeyGym env +Setting default: start_delay 5.0 +Setting default: max_cte 8.0 +Setting default: frame_skip 1 +Setting default: cam_resolution (120, 160, 3) +Setting default: log_level 20 +Setting default: steer_limit 1.0 +Setting default: throttle_min 0.0 +Setting default: throttle_max 1.0 +[23:36:58] ep1: 158.8r / 999s ❌ hit=none@999 +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.27 seconds +[23:37:05] ep2: 46.7r / 338s ❌ hit=right_barrier_seg0@338 +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.09 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.25 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.11 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.43 seconds +[23:37:23] ep3: 206.2r / 1286s ❌ hit=none@1286 +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.74 seconds +[23:37:33] ep4: 63.8r / 444s ❌ hit=right_barrier_seg56@444 +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 16.9 seconds +INFO:gym_donkeycar.envs.donkey_sim:New lap time: 14.1 seconds +[23:37:46] ep5: 108.0r / 700s ❌ hit=right_barrier_seg49@700 +[23:37:46] Mean: 753 steps / 116.7 reward +[23:37:46] ✅ GENERALIZES +[23:37:46] +[23:37:46] Exp 29 complete. +[23:37:46] Log: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/exp29-wave4-finetune/run_2026-05-06_225559_wave4_finetune.log