diff --git a/agent/experiments/exp14_finetune_v5.py b/agent/experiments/exp14_finetune_v5.py
index d9cb899..6fc6b13 100644
--- a/agent/experiments/exp14_finetune_v5.py
+++ b/agent/experiments/exp14_finetune_v5.py
@@ -91,10 +91,33 @@ class V5RewardWrapper(gym.Wrapper):
         return obs, reward, terminated or force_terminate, info
 
 # env factory
-def make_env(throttle_min):
+def make_env_base(base_throttle=0.2, throttle_floor=None):
+    """Create env with underlying action space based on base_throttle (must match saved model).
+    If throttle_floor is provided, wrap the env to enforce a minimum throttle at action runtime
+    without changing the action_space (so model loading is compatible).
+    """
     def _init():
         raw = gym.make('donkey-mountain-track-v0', conf={'host': HOST, 'port': PORT})
-        env = ThrottleClampWrapper(raw, throttle_min=throttle_min)
+        env = ThrottleClampWrapper(raw, throttle_min=base_throttle)
+        # If a runtime throttle floor is requested, apply wrapper that enforces it
+        if throttle_floor is not None:
+            class ThrottleFloorWrapper(gym.Wrapper):
+                def __init__(self, env, floor):
+                    super().__init__(env)
+                    self.floor = floor
+                def step(self, action):
+                    # action is [steer, throttle]
+                    act = np.array(action)
+                    # Ensure throttle element >= floor (maps in [-1,1]? assume throttle in [0,1])
+                    try:
+                        # clamp second element
+                        act[1] = max(act[1], self.floor)
+                    except Exception:
+                        pass
+                    return self.env.step(act)
+                def reset(self, **kwargs):
+                    return self.env.reset(**kwargs)
+            env = ThrottleFloorWrapper(env, throttle_floor)
         env = V5RewardWrapper(env)
         return env
     return _init
@@ -134,27 +157,24 @@ def log(s):
 phase_defs = [ (PH1_STEPS, 0.4), (PH2_STEPS, 0.2) ]
 
 # create initial env and model (warm start)
-# Important: load the warm-start model using the SAME action space it was trained with
-# (throttle_min=0.2) so we can then switch envs for phase 1 if needed.
-loaded_env = VecTransposeImage(DummyVecEnv([make_env(0.2)]))
+# Load model with base action space (throttle_min=0.2). We'll enforce a runtime
+# throttle FLOOR during phase 1 via a wrapper, but keep the action space unchanged.
+loaded_env = VecTransposeImage(DummyVecEnv([make_env_base(0.2, throttle_floor=None)]))
 if os.path.exists(WARM_PATH):
-    log(f'Loading warm-start model from {WARM_PATH} using throttle_min=0.2 env')
+    log(f'Loading warm-start model from {WARM_PATH} using base throttle_min=0.2 env')
     model = PPO.load(WARM_PATH, env=loaded_env, device='cpu')
     # override lr and schedules
     model.learning_rate = LR
     model.lr_schedule = model.get_schedule_fn(LR) if hasattr(model,'get_schedule_fn') else None
     for pg in getattr(getattr(model.policy,'optimizer',None) or [], 'param_groups', []):
         pg['lr'] = LR
-    # Now create the actual training env with the first throttle setting
-    first_throttle = phase_defs[0][1]
-    env0 = VecTransposeImage(DummyVecEnv([make_env(first_throttle)]))
-    if first_throttle != 0.2:
-        log(f'Switching model to env with throttle_min={first_throttle}')
-        model.set_env(env0)
+    # Create the training env using base action space but enforce throttle_floor at runtime
+    first_throttle_floor = phase_defs[0][1]
+    env0 = VecTransposeImage(DummyVecEnv([make_env_base(0.2, throttle_floor=first_throttle_floor)]))
+    model.set_env(env0)
 else:
-    log('No warm-start found — creating fresh model with first throttle')
-    first_throttle = phase_defs[0][1]
-    env0 = VecTransposeImage(DummyVecEnv([make_env(first_throttle)]))
+    log('No warm-start found — creating fresh model with base throttle_min=0.2')
+    env0 = VecTransposeImage(DummyVecEnv([make_env_base(0.2, throttle_floor=phase_defs[0][1])]))
     model = PPO('CnnPolicy', env0, learning_rate=LR, verbose=1, device='cpu')
     loaded_env.close()