feat: shuttle-exploit detection in mini_monaco eval
Samples car position every 100 steps during eval. Computes macro efficiency = net_displacement / total_sampled_path. If < 0.3 with >= 500 steps, logs WARNING: SHUTTLE EXPLOIT? with the efficiency value. Also logs reward/step per episode so anomalously high-scoring long episodes can be diagnosed immediately. This will tell us definitively whether Trials 9 and 14 (1435/1573 scores, 2000 steps each) were genuine driving or back-and-forth shuttling on a mini_monaco straight. Agent: pi Tests: 102 passed Tests-Added: 0 TypeScript: N/A
This commit is contained in:
parent
96c49dd057
commit
a5577fb3e7
|
|
@ -432,6 +432,7 @@ def evaluate_test_tracks(model, current_env, eval_episodes):
|
|||
total_reward = 0.0
|
||||
steps = 0
|
||||
done = False
|
||||
pos_samples = [] # sample position every 100 steps to detect shuttling
|
||||
|
||||
while not done and steps < 2000:
|
||||
action, _ = model.predict(obs, deterministic=True)
|
||||
|
|
@ -443,10 +444,32 @@ def evaluate_test_tracks(model, current_env, eval_episodes):
|
|||
obs, reward, done, info = result
|
||||
total_reward += reward
|
||||
steps += 1
|
||||
# Sample position every 100 steps for shuttle-exploit detection
|
||||
if steps % 100 == 0:
|
||||
raw_info = info[0] if isinstance(info, (list, tuple)) else info
|
||||
pos = raw_info.get('pos', None) if isinstance(raw_info, dict) else None
|
||||
speed = raw_info.get('speed', 0) if isinstance(raw_info, dict) else 0
|
||||
if pos is not None:
|
||||
pos_samples.append(np.array(list(pos)[:3], dtype=np.float64))
|
||||
|
||||
# Detect shuttle: check if position oscillates rather than progresses
|
||||
shuttle_warning = ''
|
||||
if len(pos_samples) >= 3:
|
||||
# Compute net progress: total displacement from start to end
|
||||
net_dist = float(np.linalg.norm(pos_samples[-1] - pos_samples[0]))
|
||||
# Compute total path between samples
|
||||
total_sampled = sum(
|
||||
float(np.linalg.norm(pos_samples[i+1] - pos_samples[i]))
|
||||
for i in range(len(pos_samples) - 1)
|
||||
)
|
||||
macro_eff = net_dist / total_sampled if total_sampled > 0.1 else 1.0
|
||||
if macro_eff < 0.3 and steps >= 500:
|
||||
shuttle_warning = f' ⚠️ SHUTTLE EXPLOIT? macro_efficiency={macro_eff:.2f}'
|
||||
|
||||
all_rewards.append(total_reward)
|
||||
all_steps.append(steps)
|
||||
log(f'[W3 Runner] {track_name} ep{ep+1}: reward={total_reward:.1f} steps={steps}')
|
||||
log(f'[W3 Runner] {track_name} ep{ep+1}: reward={total_reward:.1f} steps={steps}'
|
||||
f' ({total_reward/max(steps,1):.2f}/step){shuttle_warning}')
|
||||
time.sleep(0.5)
|
||||
|
||||
mean_reward = float(np.mean(all_rewards))
|
||||
|
|
|
|||
|
|
@ -710,3 +710,16 @@
|
|||
[2026-04-15 22:47:03] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||
[2026-04-15 22:47:03] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||
[2026-04-15 22:47:03] [AutoResearch] Only 1 results — using random proposal.
|
||||
[2026-04-16 17:28:47] [AutoResearch] GP UCB top-5 candidates:
|
||||
[2026-04-16 17:28:47] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
|
||||
[2026-04-16 17:28:47] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
|
||||
[2026-04-16 17:28:47] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
|
||||
[2026-04-16 17:28:47] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
|
||||
[2026-04-16 17:28:47] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
|
||||
[2026-04-16 17:28:47] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
|
||||
[2026-04-16 17:28:47] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
|
||||
[2026-04-16 17:28:47] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
|
||||
[2026-04-16 17:28:47] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
|
||||
[2026-04-16 17:28:47] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||
[2026-04-16 17:28:47] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||
[2026-04-16 17:28:47] [AutoResearch] Only 1 results — using random proposal.
|
||||
|
|
|
|||
|
|
@ -385,3 +385,8 @@
|
|||
[2026-04-15 22:47:16] [Wave3] Only 0 results — using random proposal.
|
||||
[2026-04-15 22:47:16] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
||||
[2026-04-15 22:47:16] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
|
||||
[2026-04-16 17:29:20] [Wave3] Seed trial 1/2: using hardcoded params.
|
||||
[2026-04-16 17:29:20] [Wave3] Seed trial 2/2: using hardcoded params.
|
||||
[2026-04-16 17:29:20] [Wave3] Only 0 results — using random proposal.
|
||||
[2026-04-16 17:29:20] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
||||
[2026-04-16 17:29:20] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
|
||||
|
|
|
|||
|
|
@ -710,3 +710,59 @@
|
|||
[2026-04-16 14:10:06] score=1435.04 params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893}
|
||||
[2026-04-16 14:10:06] score=230.98 params={'learning_rate': 0.0006672844816013197, 'steps_per_switch': 4747, 'total_timesteps': 64179}
|
||||
[2026-04-16 14:10:06] score=176.67 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
|
||||
[2026-04-16 14:10:08] [Wave4] ✅ Git push complete after trial 20
|
||||
[2026-04-16 14:10:10]
|
||||
[Wave4] ========== Trial 21/25 ==========
|
||||
[2026-04-16 14:10:10] [Wave4] GP UCB top-5 proposals:
|
||||
[2026-04-16 14:10:10] UCB=2.6140 mu=2.2621 σ=0.1760 params={'learning_rate': 0.0006240263317913531, 'steps_per_switch': 6248, 'total_timesteps': 81430}
|
||||
[2026-04-16 14:10:10] UCB=2.4442 mu=2.0635 σ=0.1903 params={'learning_rate': 0.0008059149094586216, 'steps_per_switch': 5668, 'total_timesteps': 84976}
|
||||
[2026-04-16 14:10:10] UCB=2.4147 mu=2.0379 σ=0.1884 params={'learning_rate': 0.0005106809671527766, 'steps_per_switch': 7044, 'total_timesteps': 86259}
|
||||
[2026-04-16 14:10:10] UCB=2.2869 mu=1.1164 σ=0.5852 params={'learning_rate': 0.001932234805776913, 'steps_per_switch': 3270, 'total_timesteps': 81124}
|
||||
[2026-04-16 14:10:10] UCB=2.1881 mu=1.7486 σ=0.2198 params={'learning_rate': 0.0006026876192105071, 'steps_per_switch': 14736, 'total_timesteps': 86970}
|
||||
[2026-04-16 14:10:10] [Wave4] Proposed params: {'learning_rate': 0.0006240263317913531, 'steps_per_switch': 6248, 'total_timesteps': 81430}
|
||||
[2026-04-16 14:10:12] [Wave4] Launching trial 21: {'learning_rate': 0.0006240263317913531, 'steps_per_switch': 6248, 'total_timesteps': 81430}
|
||||
[2026-04-16 14:10:12] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 81430 --steps-per-switch 6248 --learning-rate 0.0006240263317913531 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0021
|
||||
[2026-04-16 15:41:17] [Wave4] Trial 21 finished in 5465.6s, rc=0
|
||||
[2026-04-16 15:41:17] [Wave4] Parsed: combined=86.1012 mini_monaco=86.1012
|
||||
[2026-04-16 15:41:17] [Wave4] ===== Trial 21 Summary =====
|
||||
[2026-04-16 15:41:17] GP data points : 21
|
||||
[2026-04-16 15:41:17] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
|
||||
[2026-04-16 15:41:17] Top 5:
|
||||
[2026-04-16 15:41:17] score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000}
|
||||
[2026-04-16 15:41:17] score=1573.07 params={'learning_rate': 0.00033896311195395496, 'steps_per_switch': 5448, 'total_timesteps': 69359}
|
||||
[2026-04-16 15:41:17] score=1435.04 params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893}
|
||||
[2026-04-16 15:41:17] score=230.98 params={'learning_rate': 0.0006672844816013197, 'steps_per_switch': 4747, 'total_timesteps': 64179}
|
||||
[2026-04-16 15:41:17] score=176.67 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
|
||||
[2026-04-16 15:41:19]
|
||||
[Wave4] ========== Trial 22/25 ==========
|
||||
[2026-04-16 15:41:19] [Wave4] GP UCB top-5 proposals:
|
||||
[2026-04-16 15:41:19] UCB=2.9276 mu=1.9840 σ=0.4718 params={'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912}
|
||||
[2026-04-16 15:41:19] UCB=2.8979 mu=1.8081 σ=0.5449 params={'learning_rate': 0.0014475574559088072, 'steps_per_switch': 3057, 'total_timesteps': 77220}
|
||||
[2026-04-16 15:41:19] UCB=2.7394 mu=1.7758 σ=0.4818 params={'learning_rate': 0.0010706637693188694, 'steps_per_switch': 3430, 'total_timesteps': 79924}
|
||||
[2026-04-16 15:41:19] UCB=2.7361 mu=1.9188 σ=0.4086 params={'learning_rate': 0.0003137950754813831, 'steps_per_switch': 7355, 'total_timesteps': 63965}
|
||||
[2026-04-16 15:41:19] UCB=2.6672 mu=1.6715 σ=0.4978 params={'learning_rate': 0.0013764156246110833, 'steps_per_switch': 3205, 'total_timesteps': 83531}
|
||||
[2026-04-16 15:41:19] [Wave4] Proposed params: {'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912}
|
||||
[2026-04-16 15:41:21] [Wave4] Launching trial 22: {'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912}
|
||||
[2026-04-16 15:41:21] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 81912 --steps-per-switch 3269 --learning-rate 0.0010680884757571496 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0022
|
||||
[2026-04-16 17:13:22] [Wave4] Trial 22 finished in 5520.7s, rc=0
|
||||
[2026-04-16 17:13:22] [Wave4] Parsed: combined=193.2341 mini_monaco=193.2341
|
||||
[2026-04-16 17:13:22] [Wave4] ===== Trial 22 Summary =====
|
||||
[2026-04-16 17:13:22] GP data points : 22
|
||||
[2026-04-16 17:13:22] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
|
||||
[2026-04-16 17:13:22] Top 5:
|
||||
[2026-04-16 17:13:22] score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000}
|
||||
[2026-04-16 17:13:22] score=1573.07 params={'learning_rate': 0.00033896311195395496, 'steps_per_switch': 5448, 'total_timesteps': 69359}
|
||||
[2026-04-16 17:13:22] score=1435.04 params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893}
|
||||
[2026-04-16 17:13:22] score=230.98 params={'learning_rate': 0.0006672844816013197, 'steps_per_switch': 4747, 'total_timesteps': 64179}
|
||||
[2026-04-16 17:13:22] score=193.23 params={'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912}
|
||||
[2026-04-16 17:13:24]
|
||||
[Wave4] ========== Trial 23/25 ==========
|
||||
[2026-04-16 17:13:24] [Wave4] GP UCB top-5 proposals:
|
||||
[2026-04-16 17:13:24] UCB=2.5284 mu=2.2838 σ=0.1223 params={'learning_rate': 0.0003194840316570061, 'steps_per_switch': 4507, 'total_timesteps': 66965}
|
||||
[2026-04-16 17:13:24] UCB=2.4192 mu=1.9135 σ=0.2529 params={'learning_rate': 0.0005202514734965685, 'steps_per_switch': 6358, 'total_timesteps': 64937}
|
||||
[2026-04-16 17:13:24] UCB=2.3057 mu=2.0781 σ=0.1138 params={'learning_rate': 0.00028748663178259926, 'steps_per_switch': 4629, 'total_timesteps': 64690}
|
||||
[2026-04-16 17:13:24] UCB=2.2310 mu=1.9780 σ=0.1265 params={'learning_rate': 0.0005977134566763185, 'steps_per_switch': 7805, 'total_timesteps': 89342}
|
||||
[2026-04-16 17:13:24] UCB=2.1094 mu=1.5506 σ=0.2794 params={'learning_rate': 0.0001403862307566014, 'steps_per_switch': 6141, 'total_timesteps': 64341}
|
||||
[2026-04-16 17:13:24] [Wave4] Proposed params: {'learning_rate': 0.0003194840316570061, 'steps_per_switch': 4507, 'total_timesteps': 66965}
|
||||
[2026-04-16 17:13:26] [Wave4] Launching trial 23: {'learning_rate': 0.0003194840316570061, 'steps_per_switch': 4507, 'total_timesteps': 66965}
|
||||
[2026-04-16 17:13:26] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 66965 --steps-per-switch 4507 --learning-rate 0.0003194840316570061 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0023
|
||||
|
|
|
|||
|
|
@ -18,3 +18,5 @@
|
|||
{"trial": 18, "timestamp": "2026-04-16T11:58:07.842111", "params": {"learning_rate": 0.00014739138140971148, "steps_per_switch": 5692, "total_timesteps": 73312}, "combined_test_score": 115.6774, "mini_monaco_reward": 115.6774, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0018/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 3806.9049928188324}
|
||||
{"trial": 19, "timestamp": "2026-04-16T13:10:23.540177", "params": {"learning_rate": 0.0006672844816013197, "steps_per_switch": 4747, "total_timesteps": 64179}, "combined_test_score": 230.9827, "mini_monaco_reward": 230.9827, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0019/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4331.6394629478455}
|
||||
{"trial": 20, "timestamp": "2026-04-16T14:10:06.493399", "params": {"learning_rate": 0.0007104769870597334, "steps_per_switch": 5822, "total_timesteps": 73160}, "combined_test_score": 20.9587, "mini_monaco_reward": 20.9587, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0020/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 3578.8967142105103}
|
||||
{"trial": 21, "timestamp": "2026-04-16T15:41:17.902221", "params": {"learning_rate": 0.0006240263317913531, "steps_per_switch": 6248, "total_timesteps": 81430}, "combined_test_score": 86.1012, "mini_monaco_reward": 86.1012, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0021/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5465.586006164551}
|
||||
{"trial": 22, "timestamp": "2026-04-16T17:13:22.634461", "params": {"learning_rate": 0.0010680884757571496, "steps_per_switch": 3269, "total_timesteps": 81912}, "combined_test_score": 193.2341, "mini_monaco_reward": 193.2341, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0022/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5520.664301395416}
|
||||
|
|
|
|||
Loading…
Reference in New Issue