diff --git a/DECISIONS.md b/DECISIONS.md index 4fd0567..a5be01e 100644 --- a/DECISIONS.md +++ b/DECISIONS.md @@ -350,3 +350,26 @@ not the best. **Implementation:** See `train_multitrack()` in multitrack_runner.py — the `best_segment_reward` tracking and `best_model.zip` save logic added 2026-04-17. + +## ADR-018: StuckTerminationWrapper is the correct collision fix — NOT OnCollisionStay + +**Date:** 2026-04-18 +**Status:** Active + +**Decision:** Do NOT add OnCollisionStay to the Unity simulator. +Use StuckTerminationWrapper (displacement < 0.5m over N steps → terminate). + +**Why OnCollisionStay is wrong:** +The car legitimately rubs against barriers while cornering — this should +be allowed to continue. OnCollisionStay would fire on BOTH rubbing AND +stuck scenarios, terminating valid driving attempts. + +**Why StuckTerminationWrapper is right:** +- Rubbing + still moving forward: displacement > 0.5m in 80 steps → continues ✅ +- Stuck perpendicular, wheels spinning: displacement < 0.5m in 80 steps → terminates ✅ + +The distinction between "rubbing" and "stuck" is made by checking +positional progress, not collision contact. This is the correct signal. + +**Tuning note:** stuck_steps=80 (~5 seconds at 16 steps/sec). Could be +reduced to 40 (~2.5 seconds) if stuck periods are observably long. diff --git a/agent/multitrack_runner.py b/agent/multitrack_runner.py index b449662..4d840fe 100644 --- a/agent/multitrack_runner.py +++ b/agent/multitrack_runner.py @@ -60,6 +60,7 @@ from stable_baselines3 import PPO from stable_baselines3.common.utils import get_schedule_fn from stable_baselines3.common.evaluation import evaluate_policy from stable_baselines3.common.callbacks import BaseCallback +from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage # ---- Project paths ---- AGENT_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -567,7 +568,7 @@ def main(): env = None try: raw_env = gym.make(first_env_id) - env = wrap_env(raw_env) + env = VecTransposeImage(DummyVecEnv([lambda: wrap_env(gym.make(first_env_id))])) log(f'[W3 Runner] ✅ Connected to {first_env_id}') except Exception as e: log(f'[W3 Runner] ❌ Failed to connect to first training track: {e}') diff --git a/agent/outerloop-results/autoresearch_phase2_log.txt b/agent/outerloop-results/autoresearch_phase2_log.txt index 06845fb..032ada0 100644 --- a/agent/outerloop-results/autoresearch_phase2_log.txt +++ b/agent/outerloop-results/autoresearch_phase2_log.txt @@ -788,3 +788,16 @@ [2026-04-18 10:41:59] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} [2026-04-18 10:41:59] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} [2026-04-18 10:41:59] [AutoResearch] Only 1 results — using random proposal. +[2026-04-18 18:33:04] [AutoResearch] GP UCB top-5 candidates: +[2026-04-18 18:33:04] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173} +[2026-04-18 18:33:04] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198} +[2026-04-18 18:33:04] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887} +[2026-04-18 18:33:04] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199} +[2026-04-18 18:33:04] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035} +[2026-04-18 18:33:04] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5} +[2026-04-18 18:33:04] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7} +[2026-04-18 18:33:04] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50} +[2026-04-18 18:33:04] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80} +[2026-04-18 18:33:04] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} +[2026-04-18 18:33:04] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} +[2026-04-18 18:33:04] [AutoResearch] Only 1 results — using random proposal. diff --git a/agent/outerloop-results/autoresearch_phase3_log.txt b/agent/outerloop-results/autoresearch_phase3_log.txt index 09e407e..c862781 100644 --- a/agent/outerloop-results/autoresearch_phase3_log.txt +++ b/agent/outerloop-results/autoresearch_phase3_log.txt @@ -415,3 +415,8 @@ [2026-04-18 10:42:10] [Wave3] Only 0 results — using random proposal. [2026-04-18 10:42:10] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} [2026-04-18 10:42:10] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} +[2026-04-18 18:33:18] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-18 18:33:18] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-18 18:33:18] [Wave3] Only 0 results — using random proposal. +[2026-04-18 18:33:18] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-18 18:33:18] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}