diff --git a/agent/experiments/exp11_parallel_envs.py b/agent/experiments/exp11_parallel_envs.py
index 077413b..d10e7a0 100644
--- a/agent/experiments/exp11_parallel_envs.py
+++ b/agent/experiments/exp11_parallel_envs.py
@@ -36,7 +36,7 @@ def make_env(track_id, port):
     def _init():
         raw = gym.make(track_id, conf={'host': HOST, 'port': port})
         env = ThrottleClampWrapper(raw, throttle_min=THROTTLE_MIN)
-        env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
+        env = StuckTerminationWrapper(env, stuck_steps=40, min_displacement=0.5)
         env = SpeedRewardWrapper(env)
         return env
     return _init
diff --git a/agent/multitrack_runner.py b/agent/multitrack_runner.py
index 4d840fe..a02de6e 100644
--- a/agent/multitrack_runner.py
+++ b/agent/multitrack_runner.py
@@ -177,7 +177,7 @@ class StuckTerminationWrapper(gym.Wrapper):
 def wrap_env(raw_env):
     """Apply standard wrappers: throttle clamp + stuck detection + speed reward."""
     env = ThrottleClampWrapper(raw_env, throttle_min=THROTTLE_MIN)
-    env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
+    env = StuckTerminationWrapper(env, stuck_steps=40, min_displacement=0.5)
     env = SpeedRewardWrapper(env, speed_scale=SPEED_SCALE)
     return env
 
diff --git a/agent/reward_wrapper.py b/agent/reward_wrapper.py
index 566d925..c50e6c7 100644
--- a/agent/reward_wrapper.py
+++ b/agent/reward_wrapper.py
@@ -1,6 +1,6 @@
 """
-Speed + Progress Reward Wrapper for DonkeyCar RL — v4 (Full Bypass)
-====================================================================
+Speed + Progress Reward Wrapper for DonkeyCar RL — v6 (Speed×CTE + Efficiency Gate)
+=====================================================================================
 
 REWARD HACKING HISTORY:
   v1 additive:      speed × (1-cte/max_cte)         → boundary oscillation
@@ -8,9 +8,15 @@ REWARD HACKING HISTORY:
   v3 path efficiency: original × (1+speed×eff×scale) → still circling!
      WHY v3 failed: efficiency killed the SPEED BONUS but not the BASE reward.
      A spinning car at CTE≈0 still earns 1.0/step × thousands of steps.
-
-  v4 (THIS VERSION): Completely bypass sim's reward. Multiply base reward by
-     efficiency so circling yields ZERO reward regardless of CTE.
+  v4: base × eff × (1 + speed_scale × speed)        → zero gradient on hills!
+     WHY v4 failed on hills: speed≈0 AND eff≈0 AND cte_quality varies → all
+     three terms near zero simultaneously → no gradient to push ANY term up.
+  v5: speed × CTE_quality (no efficiency)            → circular driving returns!
+     WHY v5 failed: dropped efficiency entirely. Circular driving at CTE≈0
+     with speed>0 earns positive reward indefinitely. Observed in Exp 11.
+  v6 (THIS VERSION): v5 reward + efficiency GATE.
+     Keeps v5's gradient properties (non-zero gradient on hills) but adds
+     a binary efficiency check that zeros reward when car is circling.
 
 ROOT CAUSE OF CIRCLING:
   The sim's own calc_reward() uses `forward_vel` = dot(car_heading, velocity).
@@ -18,24 +24,35 @@ ROOT CAUSE OF CIRCLING:
   so forward_vel > 0 always, giving positive reward while circling indefinitely.
   We bypass this entirely.
 
-FORMULA (v4):
-    base     = 1.0 - min(abs(cte) / max_cte, 1.0)    # CTE quality [0,1]
-    eff      = net_displacement / total_path_length    # Forward progress [0,1]
-    shaped   = base × eff × (1 + speed_scale × speed) # All three must be high
+FORMULA (v6):
+    cte_quality = 1.0 - min(|cte| / max_cte, 1.0)   # [0,1] centred=1
+    speed_norm  = min(speed / 10.0, 1.0)              # [0,1] normalised
+    efficiency  = net_displacement / total_path        # [0,1] straight=1, circle=0
 
-    On done/crash: shaped = -1.0
+    if efficiency < min_efficiency:
+        reward = 0.0     # GATE: circling → zero reward (but not negative)
+    else:
+        reward = cte_quality × speed_norm    # v5 formula (gradient on hills)
+
+    On done/crash: reward = -1.0
+
+WHY GATE NOT MULTIPLIER:
+    v4 used efficiency as a multiplier: reward = base × eff × speed_bonus.
+    On a hill: speed≈0, eff≈0, base≈0.5 → reward≈0 and ∂reward/∂speed≈0.
+    No gradient to push speed up — car stays stuck.
+
+    v6 gate: efficiency is either PASS or FAIL. When efficiency > threshold
+    (car moving forward at all), reward = speed × CTE_quality. On a hill:
+    car is stuck but still has eff > 0 (not literally circling), so the gate
+    passes and the reward = speed × CTE_quality. ∂reward/∂speed > 0 → gradient
+    pushes toward more throttle. Circle has eff ≈ 0 → gate fails → reward = 0.
 
 PROPERTIES:
-    - Spinning (eff≈0):           shaped ≈ 0          (no reward)
-    - On track, slow (eff≈1):     shaped ≈ base       (CTE reward only)
-    - On track, fast (eff≈1):     shaped > base       (CTE + speed bonus)
-    - Off track (base≈0):         shaped ≈ 0          (penalty via done)
-    - Cannot be gamed:            ALL THREE terms must be high simultaneously
-
-RESEARCH NOTE (2026-04-13):
-    v3 was insufficient — circling at start gave 1.0/step × 47k steps = 47k reward.
-    v4 makes efficiency a multiplier on the entire reward, not just the speed bonus.
-    See docs/RESEARCH_LOG.md for full hacking history.
+    - Circling (eff<threshold): reward = 0  (no incentive to circle)
+    - On track, stuck (eff>0):  reward = speed × CTE (gradient toward unstuck)
+    - On track, fast:           reward = high       (speed + centred)
+    - Off track:                reward ≈ 0          (CTE_quality → 0)
+    - Crash:                    reward = -1.0
 """
 
 import gymnasium as gym
@@ -62,8 +79,8 @@ class SpeedRewardWrapper(gym.Wrapper):
         self,
         env,
         speed_scale: float = 0.1,
-        window_size: int = 60,        # increased from 30 — catches slower circles
-        min_efficiency: float = 0.05,
+        window_size: int = 30,         # captures 2+ full circles at typical circling speed
+        min_efficiency: float = 0.15,  # gate threshold: circles ≈ 0.13, wobbly straight ≈ 0.98
         max_cte: float = 8.0,
         min_lap_time: float = 5.0,    # laps faster than this are penalised as exploits
     ):
@@ -109,26 +126,36 @@ class SpeedRewardWrapper(gym.Wrapper):
 
     def _compute_reward_and_done(self, done: bool, info: dict):
         """
-        v5: speed × CTE-quality reward.
+        v6: speed × CTE-quality + efficiency gate.
 
-        reward = speed × (1 - |cte| / max_cte)
+        reward = speed_norm × cte_quality   (when efficiency >= threshold)
+        reward = 0.0                        (when efficiency < threshold — circling)
+        reward = -1.0                       (on crash/done)
 
-        Simpler than v4.  Directly incentivises going FAST while staying
-        centred.  On a hill: car slows → reward drops → clear gradient
-        signal to apply more throttle.  v4's efficiency term gave zero
-        gradient when the car was stuck (all three terms collapsed to zero
-        simultaneously, so no direction to improve).
+        The efficiency gate prevents circular driving (eff≈0 for circles)
+        without killing gradient on hills (eff>0 for a stuck-but-not-circling
+        car, so the gate passes and speed×CTE gradient pushes toward unstuck).
 
-        Exploit protection (unchanged):
-        - Short-lap penalty: laps < min_lap_time → large negative reward
-        - StuckTerminationWrapper: done=True after 80 steps of <0.5m movement
+        Exploit protection:
+        - Efficiency gate: circles → reward = 0
+        - Short-lap penalty: laps < min_lap_time → large negative + terminate
+        - StuckTerminationWrapper: done=True after stuck_steps of no movement
         - Crash: done=True → -1.0
         """
+        # Track position for efficiency calculation
+        try:
+            pos = info.get('pos', (0.0, 0.0, 0.0))
+            pos_x = float(pos[0])
+            pos_z = float(pos[2])  # z is forward in Unity coordinate system
+            self._pos_history.append(np.array([pos_x, pos_z]))
+        except (TypeError, ValueError, IndexError):
+            pass
+
         # Crash / episode over
         if done:
             return -1.0, False
 
-        # --- Short-lap exploit detection (unchanged) ---
+        # --- Short-lap exploit detection ---
         try:
             current_lap_count = int(info.get('lap_count', 0) or 0)
         except (TypeError, ValueError):
@@ -141,13 +168,16 @@ class SpeedRewardWrapper(gym.Wrapper):
             except (TypeError, ValueError):
                 lap_time = 999.0
             if lap_time < self.min_lap_time:
-                # Short-lap exploit: penalty AND terminate episode immediately.
-                # Penalty alone is insufficient — the model stays alive and
-                # keeps accumulating small rewards between laps.
-                # Termination removes that loophole completely.
                 penalty = -10.0 * (self.min_lap_time / max(lap_time, 0.1))
                 return penalty, True   # (reward, force_terminate)
-            # Legitimate lap — fall through to normal reward
+
+        # --- Efficiency gate: detect circular driving ---
+        efficiency = self._compute_efficiency()
+        if efficiency < self.min_efficiency:
+            # Car is circling — zero reward but don't terminate.
+            # Zero (not negative) so there's no perverse incentive to crash
+            # early to avoid accumulating penalties.
+            return 0.0, False
 
         # --- CTE quality: how centred is the car? ---
         try:
@@ -162,10 +192,7 @@ class SpeedRewardWrapper(gym.Wrapper):
         except (TypeError, ValueError):
             speed = 0.0
 
-        # --- v5 reward: speed × CTE quality ---
-        # Fast + centred = high reward.  Slow (hill) = low reward → gradient
-        # pushes policy toward higher throttle.  Off-track = near-zero.
-        # Normalise speed so max reward ≈ 1.0 at reasonable speed (10 m/s).
+        # --- v6 reward: speed × CTE quality (same as v5, but gated) ---
         speed_norm = min(speed / 10.0, 1.0)
         return cte_quality * speed_norm, False
 
diff --git a/agent/run_eval.py b/agent/run_eval.py
index c345665..4279bc3 100644
--- a/agent/run_eval.py
+++ b/agent/run_eval.py
@@ -56,7 +56,7 @@ log(f'Log file: {log_path}')
 def make_env(track_id, throttle_min):
     raw = gym.make(track_id)
     env = ThrottleClampWrapper(raw, throttle_min=throttle_min)
-    env = StuckTerminationWrapper(env, stuck_steps=80, min_displacement=0.5)
+    env = StuckTerminationWrapper(env, stuck_steps=40, min_displacement=0.5)
     env = SpeedRewardWrapper(env)
     return env
 
diff --git a/tests/test_reward_wrapper.py b/tests/test_reward_wrapper.py
index a204558..2408866 100644
--- a/tests/test_reward_wrapper.py
+++ b/tests/test_reward_wrapper.py
@@ -69,20 +69,28 @@ def test_sim_reward_is_completely_ignored():
 
 def test_circling_at_zero_cte_gives_near_zero_reward():
     """
-    v5: circling protection is handled by lap-time penalty + StuckTermination,
-    NOT by the reward formula.  A circling car at CTE=0 with speed CAN earn
-    reward per step.  This test verifies the formula works as designed:
-    reward = speed_norm * cte_quality.  Circling is stopped by other mechanisms.
+    v6: circling (low efficiency) should yield zero reward via the efficiency gate.
+    After enough steps of circular motion, the efficiency drops below threshold
+    and the gate zeros the reward.
     """
     env = MockEnv(speed=3.0, cte=0.0)
-    wrapped = SpeedRewardWrapper(env, speed_scale=0.1, window_size=20)
+    wrapped = SpeedRewardWrapper(env, speed_scale=0.1, window_size=30, min_efficiency=0.15)
     wrapped.reset()
 
-    # At CTE=0 and speed=3, expected reward = (3/10) * 1.0 = 0.3
-    _, r, _, _, _ = wrapped.step(0)
-    expected = (3.0 / 10.0) * 1.0
-    assert abs(r - expected) < 0.05, (
-        f"v5: reward at CTE=0, speed=3 should be ~{expected:.2f}, got {r:.4f}")
+    # Drive in a circle for enough steps to fill the position window
+    rewards = []
+    for i in range(40):
+        angle = 2 * math.pi * i / 12  # completes circle every 12 steps
+        env.set_pos([0.5 * math.cos(angle), 0., 0.5 * math.sin(angle)])
+        _, r, _, _, _ = wrapped.step(0)
+        rewards.append(r)
+
+    # After 20+ steps of circular motion, efficiency gate should kick in
+    # Last few rewards should be 0.0
+    assert rewards[-1] == 0.0, (
+        f"v6: circular driving should yield 0.0 reward via efficiency gate, got {rewards[-1]:.4f}")
+    assert sum(1 for r in rewards[-5:] if r == 0.0) >= 3, (
+        f"v6: most of last 5 rewards during circle should be 0.0, got {rewards[-5:]}")
 
 
 def test_forward_driving_earns_positive_reward():
@@ -97,23 +105,29 @@ def test_forward_driving_earns_positive_reward():
 
 def test_forward_beats_circling_by_large_margin():
     """
-    v5: forward driving at moderate CTE should beat driving with high CTE.
-    The reward directly penalises being off-centre.
+    v6: forward driving earns positive reward; circular driving earns zero.
+    The efficiency gate ensures this gap.
     """
-    # On track (CTE=1m) at speed=5
-    env_on = MockEnv(speed=5.0, cte=1.0)
-    wrapped_on = SpeedRewardWrapper(env_on, speed_scale=0.1)
-    wrapped_on.reset()
-    _, r_on, _, _, _ = wrapped_on.step(0)
+    # Forward driving at CTE=1m, speed=5
+    env_fwd = MockEnv(speed=5.0, cte=1.0)
+    wrapped_fwd = SpeedRewardWrapper(env_fwd, speed_scale=0.1, window_size=30)
+    wrapped_fwd.reset()
+    for i in range(35):
+        env_fwd.set_pos([i * 0.5, 0., 0.])  # straight line
+        _, r_fwd, _, _, _ = wrapped_fwd.step(0)
 
-    # Off track (CTE=7m) at same speed
-    env_off = MockEnv(speed=5.0, cte=7.0)
-    wrapped_off = SpeedRewardWrapper(env_off, speed_scale=0.1)
-    wrapped_off.reset()
-    _, r_off, _, _, _ = wrapped_off.step(0)
+    # Circular driving at CTE=0, speed=5
+    env_circ = MockEnv(speed=5.0, cte=0.0)
+    wrapped_circ = SpeedRewardWrapper(env_circ, speed_scale=0.1, window_size=30)
+    wrapped_circ.reset()
+    for i in range(35):
+        angle = 2 * math.pi * i / 12
+        env_circ.set_pos([0.5 * math.cos(angle), 0., 0.5 * math.sin(angle)])
+        _, r_circ, _, _, _ = wrapped_circ.step(0)
 
-    assert r_on > r_off * 3, (
-        f"On-track ({r_on:.2f}) should beat off-track ({r_off:.2f}) by 3x")
+    assert r_fwd > 0, f"Forward driving should earn positive reward, got {r_fwd}"
+    assert r_circ == 0.0, f"Circular driving should earn 0 reward, got {r_circ}"
+    assert r_fwd > r_circ, f"Forward ({r_fwd:.3f}) must beat circling ({r_circ:.3f})"
 
 
 def test_crash_gives_negative_reward():