diff --git a/agent/experiments/exp11d_parallel_v61.py b/agent/experiments/exp11d_parallel_v61.py index dacb2b5..b41d5c1 100644 --- a/agent/experiments/exp11d_parallel_v61.py +++ b/agent/experiments/exp11d_parallel_v61.py @@ -43,7 +43,6 @@ def make_env(track_id, port): env = SpeedRewardWrapper(env, max_cte_terminate=4.0, # terminate if CTE > 4m for 20 steps (grass fix) cte_patience=20, - progress_patience=60, # terminate if no node advance for 60 steps (rollback fix) ) return env return _init @@ -53,9 +52,8 @@ log('Exp 11d: Parallel DummyVecEnv, v6.1 reward, 180k steps') log(f' Sim 1: {HOST}:9091 → generated_track') log(f' Sim 2: {HOST}:9093 → mountain_track') log(f' throttle_min={THROTTLE_MIN}, lr={LR}, total={TOTAL_STEPS:,}') -log(f' Reward v6.1: speed×CTE + efficiency gate + grass/rollback terminators') -log(f' max_cte_terminate=4.0, cte_patience=20 (grass fix)') -log(f' progress_patience=60 (mountain rollback fix)') +log(f' Reward v6.1: speed×CTE + efficiency gate + grass exploit terminator') +log(f' max_cte_terminate=4.0, cte_patience=20 (grass fix only)') log(f' Stuck: 40 steps') log('='*60) @@ -141,8 +139,7 @@ for track_id, track_name in EVAL_TRACKS: raw = gym.make(track_id, conf={'host': HOST, 'port': EVAL_PORT}) ei = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN) ei = StuckTerminationWrapper(ei, stuck_steps=40, min_displacement=0.5) - ei = SpeedRewardWrapper(ei, max_cte_terminate=4.0, cte_patience=20, - progress_patience=60) + ei = SpeedRewardWrapper(ei, max_cte_terminate=4.0, cte_patience=20) ev = VecTransposeImage(DummyVecEnv([lambda e=ei: e])) m = PPO.load(best_model_path, env=ev, device='cpu')