""" Tests for behavioral_wrappers.py — no simulator required. """ import sys, os, math, pytest import numpy as np import gymnasium as gym sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'agent')) from behavioral_wrappers import LanePositionWrapper, AntiOscillationWrapper, AsymmetricCTEWrapper, CombinedBehavioralWrapper class MockEnv(gym.Env): metadata = {'render_modes': []} def __init__(self, reward=0.8, cte=0.0, done=False): super().__init__() self.action_space = gym.spaces.Box(low=np.array([-1.0, 0.2]), high=np.array([1.0, 1.0]), dtype=np.float32) self.observation_space = gym.spaces.Box(0, 255, (120, 160, 3), dtype=np.uint8) self._reward = reward self._cte = cte self._done = done def set(self, reward=None, cte=None): if reward is not None: self._reward = reward if cte is not None: self._cte = cte def reset(self, seed=None, **kwargs): return np.zeros((120, 160, 3), dtype=np.uint8), {} def step(self, action): obs = np.zeros((120, 160, 3), dtype=np.uint8) info = {'cte': self._cte, 'speed': 2.0, 'lap_count': 0, 'last_lap_time': 0.0} return obs, self._reward, self._done, False, info def close(self): pass # ---- LanePositionWrapper Tests ---- def test_lane_position_bonus_at_target(): """At the target CTE, position bonus is maximized.""" env = MockEnv(reward=0.8, cte=-0.5) # Car at CTE=-0.5 wrapped = LanePositionWrapper(env, target_cte=-0.5, position_weight=0.2) wrapped.reset() _, r, _, _, _ = wrapped.step(np.array([0.0, 0.5])) # Should get max bonus: reward + 0.2 * 1.0 = 1.0 assert r == pytest.approx(1.0, abs=0.01) def test_lane_position_reduces_reward_away_from_target(): """Away from target CTE, position bonus is smaller.""" env_near = MockEnv(reward=0.8, cte=-0.5) env_far = MockEnv(reward=0.8, cte=2.0) wrapped_near = LanePositionWrapper(env_near, target_cte=-0.5, position_weight=0.2) wrapped_far = LanePositionWrapper(env_far, target_cte=-0.5, position_weight=0.2) wrapped_near.reset() wrapped_far.reset() _, r_near, _, _, _ = wrapped_near.step(np.array([0.0, 0.5])) _, r_far, _, _, _ = wrapped_far.step(np.array([0.0, 0.5])) assert r_near > r_far def test_lane_position_no_bonus_when_off_track(): """No position bonus when original reward <= 0 (off track).""" env = MockEnv(reward=-1.0, cte=0.0) # Crashed, perfect CTE wrapped = LanePositionWrapper(env, target_cte=0.0, position_weight=0.5) wrapped.reset() _, r, _, _, _ = wrapped.step(np.array([0.0, 0.5])) assert r == -1.0 def test_right_of_centre_target_biases_right(): """Setting target_cte=-0.5 (right) gives higher reward for right-of-centre.""" env_right = MockEnv(reward=0.8, cte=-0.5) # Right of centre env_left = MockEnv(reward=0.8, cte=+0.5) # Left of centre wrapped_right = LanePositionWrapper(env_right, target_cte=-0.5) wrapped_left = LanePositionWrapper(env_left, target_cte=-0.5) wrapped_right.reset() wrapped_left.reset() _, r_right, _, _, _ = wrapped_right.step(np.array([0.0, 0.5])) _, r_left, _, _, _ = wrapped_left.step(np.array([0.0, 0.5])) assert r_right > r_left, "Right-of-centre should reward more when target_cte is negative" # ---- AntiOscillationWrapper Tests ---- def test_no_penalty_on_first_step(): """No oscillation penalty on the very first step (no previous action).""" env = MockEnv(reward=0.8) wrapped = AntiOscillationWrapper(env, oscillation_penalty=0.5) wrapped.reset() _, r, _, _, _ = wrapped.step(np.array([1.0, 0.5])) # Large steer — no penalty yet assert r == pytest.approx(0.8, abs=0.01) def test_large_steering_change_penalised(): """Rapid steering reversal should get a penalty.""" env = MockEnv(reward=0.8) wrapped = AntiOscillationWrapper(env, oscillation_penalty=0.3) wrapped.reset() wrapped.step(np.array([-1.0, 0.5])) # Full left _, r, _, _, _ = wrapped.step(np.array([+1.0, 0.5])) # Full right — delta=2.0 # Penalty = 0.3 * 2.0 = 0.6 → reward = 0.8 - 0.6 = 0.2 assert r < 0.8, "Large steering change should be penalised" assert r == pytest.approx(0.8 - 0.3 * 2.0, abs=0.05) def test_no_steering_change_no_penalty(): """Consistent steering should get no penalty.""" env = MockEnv(reward=0.8) wrapped = AntiOscillationWrapper(env, oscillation_penalty=0.3) wrapped.reset() wrapped.step(np.array([0.3, 0.5])) _, r, _, _, _ = wrapped.step(np.array([0.3, 0.5])) # Same action — delta=0 assert r == pytest.approx(0.8, abs=0.01) def test_oscillation_penalty_not_applied_off_track(): """Off-track (negative reward) should not get oscillation penalty.""" env = MockEnv(reward=-1.0) wrapped = AntiOscillationWrapper(env, oscillation_penalty=0.5) wrapped.reset() wrapped.step(np.array([-1.0, 0.5])) _, r, _, _, _ = wrapped.step(np.array([+1.0, 0.5])) # Large change, but off-track assert r == -1.0, "Off-track reward should stay -1.0" def test_oscillation_score_zero_for_consistent_driving(): """Constant steering → oscillation score ≈ 0.""" env = MockEnv(reward=0.8) wrapped = AntiOscillationWrapper(env, oscillation_penalty=0.1) wrapped.reset() for _ in range(15): wrapped.step(np.array([0.2, 0.5])) # Constant steer assert wrapped.current_oscillation_score() == pytest.approx(0.0, abs=0.01) # ---- AsymmetricCTEWrapper Tests ---- def test_left_of_centre_penalised(): """Left of centre (positive CTE) should earn less reward than right.""" env_left = MockEnv(reward=0.8, cte=+1.0) env_right = MockEnv(reward=0.8, cte=-1.0) wrapped_left = AsymmetricCTEWrapper(env_left) wrapped_right = AsymmetricCTEWrapper(env_right) wrapped_left.reset() wrapped_right.reset() _, r_left, _, _, _ = wrapped_left.step(np.array([0.0, 0.5])) _, r_right, _, _, _ = wrapped_right.step(np.array([0.0, 0.5])) assert r_right > r_left, "Right-of-centre should reward more than left" def test_crash_unaffected_by_asymmetric(): """Crash (reward=-1) should not be modified.""" env = MockEnv(reward=-1.0, cte=+2.0) wrapped = AsymmetricCTEWrapper(env, left_penalty=0.9) wrapped.reset() _, r, _, _, _ = wrapped.step(np.array([0.0, 0.5])) assert r == -1.0 # ---- CombinedBehavioralWrapper Tests ---- def test_combined_wrapper_gives_positive_reward_on_track(): """Combined wrapper should give positive reward when on track.""" env = MockEnv(reward=0.8, cte=0.0) wrapped = CombinedBehavioralWrapper(env, target_cte=0.0, oscillation_penalty=0.0) wrapped.reset() _, r, _, _, _ = wrapped.step(np.array([0.0, 0.5])) assert r > 0 def test_combined_wrapper_crash_still_negative(): """Crash should remain negative through combined wrapper.""" env = MockEnv(reward=-1.0, cte=0.0) wrapped = CombinedBehavioralWrapper(env) wrapped.reset() _, r, _, _, _ = wrapped.step(np.array([0.0, 0.5])) assert r < 0