feat: shuttle-exploit detection in mini_monaco eval

Samples car position every 100 steps during eval. Computes macro efficiency = net_displacement / total_sampled_path. If < 0.3 with >= 500 steps, logs WARNING: SHUTTLE EXPLOIT? with the efficiency value. Also logs reward/step per episode so anomalously high-scoring long episodes can be diagnosed immediately. This will tell us definitively whether Trials 9 and 14 (1435/1573 scores, 2000 steps each) were genuine driving or back-and-forth shuttling on a mini_monaco straight. Agent: pi Tests: 102 passed Tests-Added: 0 TypeScript: N/A
2026-04-16 17:29:30 -04:00 · 2026-04-16 17:29:30 -04:00 · a5577fb3e7
parent 96c49dd057
commit a5577fb3e7
5 changed files with 100 additions and 1 deletions
--- a/agent/multitrack_runner.py
+++ b/agent/multitrack_runner.py
@ -432,6 +432,7 @@ def evaluate_test_tracks(model, current_env, eval_episodes):
            total_reward = 0.0
            steps = 0
            done = False
+            pos_samples = []   # sample position every 100 steps to detect shuttling

            while not done and steps < 2000:
                action, _ = model.predict(obs, deterministic=True)
@ -443,10 +444,32 @@ def evaluate_test_tracks(model, current_env, eval_episodes):
                    obs, reward, done, info = result
                total_reward += reward
                steps += 1
+                # Sample position every 100 steps for shuttle-exploit detection
+                if steps % 100 == 0:
+                    raw_info = info[0] if isinstance(info, (list, tuple)) else info
+                    pos = raw_info.get('pos', None) if isinstance(raw_info, dict) else None
+                    speed = raw_info.get('speed', 0) if isinstance(raw_info, dict) else 0
+                    if pos is not None:
+                        pos_samples.append(np.array(list(pos)[:3], dtype=np.float64))
+
+            # Detect shuttle: check if position oscillates rather than progresses
+            shuttle_warning = ''
+            if len(pos_samples) >= 3:
+                # Compute net progress: total displacement from start to end
+                net_dist = float(np.linalg.norm(pos_samples[-1] - pos_samples[0]))
+                # Compute total path between samples
+                total_sampled = sum(
+                    float(np.linalg.norm(pos_samples[i+1] - pos_samples[i]))
+                    for i in range(len(pos_samples) - 1)
+                )
+                macro_eff = net_dist / total_sampled if total_sampled > 0.1 else 1.0
+                if macro_eff < 0.3 and steps >= 500:
+                    shuttle_warning = f' ⚠️  SHUTTLE EXPLOIT? macro_efficiency={macro_eff:.2f}'

            all_rewards.append(total_reward)
            all_steps.append(steps)
-            log(f'[W3 Runner]   {track_name} ep{ep+1}: reward={total_reward:.1f} steps={steps}')
+            log(f'[W3 Runner]   {track_name} ep{ep+1}: reward={total_reward:.1f} steps={steps}'
+                f' ({total_reward/max(steps,1):.2f}/step){shuttle_warning}')
            time.sleep(0.5)

        mean_reward = float(np.mean(all_rewards))
--- a/agent/outerloop-results/autoresearch_phase2_log.txt
+++ b/agent/outerloop-results/autoresearch_phase2_log.txt
@ -710,3 +710,16 @@
 [2026-04-15 22:47:03] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
 [2026-04-15 22:47:03] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
 [2026-04-15 22:47:03] [AutoResearch] Only 1 results — using random proposal.
+[2026-04-16 17:28:47] [AutoResearch] GP UCB top-5 candidates:
+[2026-04-16 17:28:47]   UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
+[2026-04-16 17:28:47]   UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
+[2026-04-16 17:28:47]   UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
+[2026-04-16 17:28:47]   UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
+[2026-04-16 17:28:47]   UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
+[2026-04-16 17:28:47] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
+[2026-04-16 17:28:47] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
+[2026-04-16 17:28:47] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
+[2026-04-16 17:28:47] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
+[2026-04-16 17:28:47] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
+[2026-04-16 17:28:47] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
+[2026-04-16 17:28:47] [AutoResearch] Only 1 results — using random proposal.
--- a/agent/outerloop-results/autoresearch_phase3_log.txt
+++ b/agent/outerloop-results/autoresearch_phase3_log.txt
@ -385,3 +385,8 @@
 [2026-04-15 22:47:16] [Wave3] Only 0 results — using random proposal.
 [2026-04-15 22:47:16] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
 [2026-04-15 22:47:16] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
+[2026-04-16 17:29:20] [Wave3] Seed trial 1/2: using hardcoded params.
+[2026-04-16 17:29:20] [Wave3] Seed trial 2/2: using hardcoded params.
+[2026-04-16 17:29:20] [Wave3] Only 0 results — using random proposal.
+[2026-04-16 17:29:20] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
+[2026-04-16 17:29:20] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
--- a/agent/outerloop-results/autoresearch_phase4_log.txt
+++ b/agent/outerloop-results/autoresearch_phase4_log.txt
@ -710,3 +710,59 @@
 [2026-04-16 14:10:06]     score=1435.04  params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893}
 [2026-04-16 14:10:06]     score=230.98  params={'learning_rate': 0.0006672844816013197, 'steps_per_switch': 4747, 'total_timesteps': 64179}
 [2026-04-16 14:10:06]     score=176.67  params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
+[2026-04-16 14:10:08] [Wave4] ✅ Git push complete after trial 20
+[2026-04-16 14:10:10] 
+[Wave4] ========== Trial 21/25 ==========
+[2026-04-16 14:10:10] [Wave4] GP UCB top-5 proposals:
+[2026-04-16 14:10:10]   UCB=2.6140 mu=2.2621 σ=0.1760 params={'learning_rate': 0.0006240263317913531, 'steps_per_switch': 6248, 'total_timesteps': 81430}
+[2026-04-16 14:10:10]   UCB=2.4442 mu=2.0635 σ=0.1903 params={'learning_rate': 0.0008059149094586216, 'steps_per_switch': 5668, 'total_timesteps': 84976}
+[2026-04-16 14:10:10]   UCB=2.4147 mu=2.0379 σ=0.1884 params={'learning_rate': 0.0005106809671527766, 'steps_per_switch': 7044, 'total_timesteps': 86259}
+[2026-04-16 14:10:10]   UCB=2.2869 mu=1.1164 σ=0.5852 params={'learning_rate': 0.001932234805776913, 'steps_per_switch': 3270, 'total_timesteps': 81124}
+[2026-04-16 14:10:10]   UCB=2.1881 mu=1.7486 σ=0.2198 params={'learning_rate': 0.0006026876192105071, 'steps_per_switch': 14736, 'total_timesteps': 86970}
+[2026-04-16 14:10:10] [Wave4] Proposed params: {'learning_rate': 0.0006240263317913531, 'steps_per_switch': 6248, 'total_timesteps': 81430}
+[2026-04-16 14:10:12] [Wave4] Launching trial 21: {'learning_rate': 0.0006240263317913531, 'steps_per_switch': 6248, 'total_timesteps': 81430}
+[2026-04-16 14:10:12] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 81430 --steps-per-switch 6248 --learning-rate 0.0006240263317913531 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0021
+[2026-04-16 15:41:17] [Wave4] Trial 21 finished in 5465.6s, rc=0
+[2026-04-16 15:41:17] [Wave4] Parsed: combined=86.1012 mini_monaco=86.1012
+[2026-04-16 15:41:17] [Wave4] ===== Trial 21 Summary =====
+[2026-04-16 15:41:17]   GP data points : 21
+[2026-04-16 15:41:17]   Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
+[2026-04-16 15:41:17]   Top 5:
+[2026-04-16 15:41:17]     score=1943.10  params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000}
+[2026-04-16 15:41:17]     score=1573.07  params={'learning_rate': 0.00033896311195395496, 'steps_per_switch': 5448, 'total_timesteps': 69359}
+[2026-04-16 15:41:17]     score=1435.04  params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893}
+[2026-04-16 15:41:17]     score=230.98  params={'learning_rate': 0.0006672844816013197, 'steps_per_switch': 4747, 'total_timesteps': 64179}
+[2026-04-16 15:41:17]     score=176.67  params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
+[2026-04-16 15:41:19] 
+[Wave4] ========== Trial 22/25 ==========
+[2026-04-16 15:41:19] [Wave4] GP UCB top-5 proposals:
+[2026-04-16 15:41:19]   UCB=2.9276 mu=1.9840 σ=0.4718 params={'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912}
+[2026-04-16 15:41:19]   UCB=2.8979 mu=1.8081 σ=0.5449 params={'learning_rate': 0.0014475574559088072, 'steps_per_switch': 3057, 'total_timesteps': 77220}
+[2026-04-16 15:41:19]   UCB=2.7394 mu=1.7758 σ=0.4818 params={'learning_rate': 0.0010706637693188694, 'steps_per_switch': 3430, 'total_timesteps': 79924}
+[2026-04-16 15:41:19]   UCB=2.7361 mu=1.9188 σ=0.4086 params={'learning_rate': 0.0003137950754813831, 'steps_per_switch': 7355, 'total_timesteps': 63965}
+[2026-04-16 15:41:19]   UCB=2.6672 mu=1.6715 σ=0.4978 params={'learning_rate': 0.0013764156246110833, 'steps_per_switch': 3205, 'total_timesteps': 83531}
+[2026-04-16 15:41:19] [Wave4] Proposed params: {'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912}
+[2026-04-16 15:41:21] [Wave4] Launching trial 22: {'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912}
+[2026-04-16 15:41:21] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 81912 --steps-per-switch 3269 --learning-rate 0.0010680884757571496 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0022
+[2026-04-16 17:13:22] [Wave4] Trial 22 finished in 5520.7s, rc=0
+[2026-04-16 17:13:22] [Wave4] Parsed: combined=193.2341 mini_monaco=193.2341
+[2026-04-16 17:13:22] [Wave4] ===== Trial 22 Summary =====
+[2026-04-16 17:13:22]   GP data points : 22
+[2026-04-16 17:13:22]   Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
+[2026-04-16 17:13:22]   Top 5:
+[2026-04-16 17:13:22]     score=1943.10  params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000}
+[2026-04-16 17:13:22]     score=1573.07  params={'learning_rate': 0.00033896311195395496, 'steps_per_switch': 5448, 'total_timesteps': 69359}
+[2026-04-16 17:13:22]     score=1435.04  params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893}
+[2026-04-16 17:13:22]     score=230.98  params={'learning_rate': 0.0006672844816013197, 'steps_per_switch': 4747, 'total_timesteps': 64179}
+[2026-04-16 17:13:22]     score=193.23  params={'learning_rate': 0.0010680884757571496, 'steps_per_switch': 3269, 'total_timesteps': 81912}
+[2026-04-16 17:13:24] 
+[Wave4] ========== Trial 23/25 ==========
+[2026-04-16 17:13:24] [Wave4] GP UCB top-5 proposals:
+[2026-04-16 17:13:24]   UCB=2.5284 mu=2.2838 σ=0.1223 params={'learning_rate': 0.0003194840316570061, 'steps_per_switch': 4507, 'total_timesteps': 66965}
+[2026-04-16 17:13:24]   UCB=2.4192 mu=1.9135 σ=0.2529 params={'learning_rate': 0.0005202514734965685, 'steps_per_switch': 6358, 'total_timesteps': 64937}
+[2026-04-16 17:13:24]   UCB=2.3057 mu=2.0781 σ=0.1138 params={'learning_rate': 0.00028748663178259926, 'steps_per_switch': 4629, 'total_timesteps': 64690}
+[2026-04-16 17:13:24]   UCB=2.2310 mu=1.9780 σ=0.1265 params={'learning_rate': 0.0005977134566763185, 'steps_per_switch': 7805, 'total_timesteps': 89342}
+[2026-04-16 17:13:24]   UCB=2.1094 mu=1.5506 σ=0.2794 params={'learning_rate': 0.0001403862307566014, 'steps_per_switch': 6141, 'total_timesteps': 64341}
+[2026-04-16 17:13:24] [Wave4] Proposed params: {'learning_rate': 0.0003194840316570061, 'steps_per_switch': 4507, 'total_timesteps': 66965}
+[2026-04-16 17:13:26] [Wave4] Launching trial 23: {'learning_rate': 0.0003194840316570061, 'steps_per_switch': 4507, 'total_timesteps': 66965}
+[2026-04-16 17:13:26] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 66965 --steps-per-switch 4507 --learning-rate 0.0003194840316570061 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0023
--- a/agent/outerloop-results/autoresearch_results_phase4.jsonl
+++ b/agent/outerloop-results/autoresearch_results_phase4.jsonl
@ -18,3 +18,5 @@
 {"trial": 18, "timestamp": "2026-04-16T11:58:07.842111", "params": {"learning_rate": 0.00014739138140971148, "steps_per_switch": 5692, "total_timesteps": 73312}, "combined_test_score": 115.6774, "mini_monaco_reward": 115.6774, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0018/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 3806.9049928188324}
 {"trial": 19, "timestamp": "2026-04-16T13:10:23.540177", "params": {"learning_rate": 0.0006672844816013197, "steps_per_switch": 4747, "total_timesteps": 64179}, "combined_test_score": 230.9827, "mini_monaco_reward": 230.9827, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0019/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4331.6394629478455}
 {"trial": 20, "timestamp": "2026-04-16T14:10:06.493399", "params": {"learning_rate": 0.0007104769870597334, "steps_per_switch": 5822, "total_timesteps": 73160}, "combined_test_score": 20.9587, "mini_monaco_reward": 20.9587, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0020/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 3578.8967142105103}
+{"trial": 21, "timestamp": "2026-04-16T15:41:17.902221", "params": {"learning_rate": 0.0006240263317913531, "steps_per_switch": 6248, "total_timesteps": 81430}, "combined_test_score": 86.1012, "mini_monaco_reward": 86.1012, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0021/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5465.586006164551}
+{"trial": 22, "timestamp": "2026-04-16T17:13:22.634461", "params": {"learning_rate": 0.0010680884757571496, "steps_per_switch": 3269, "total_timesteps": 81912}, "combined_test_score": 193.2341, "mini_monaco_reward": 193.2341, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0022/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5520.664301395416}