180 lines
6.9 KiB
Python
180 lines
6.9 KiB
Python
"""
|
|
Tests for behavioral_wrappers.py — no simulator required.
|
|
"""
|
|
|
|
import sys, os, math, pytest
|
|
import numpy as np
|
|
import gymnasium as gym
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'agent'))
|
|
from behavioral_wrappers import LanePositionWrapper, AntiOscillationWrapper, AsymmetricCTEWrapper, CombinedBehavioralWrapper
|
|
|
|
|
|
class MockEnv(gym.Env):
|
|
metadata = {'render_modes': []}
|
|
def __init__(self, reward=0.8, cte=0.0, done=False):
|
|
super().__init__()
|
|
self.action_space = gym.spaces.Box(low=np.array([-1.0, 0.2]), high=np.array([1.0, 1.0]), dtype=np.float32)
|
|
self.observation_space = gym.spaces.Box(0, 255, (120, 160, 3), dtype=np.uint8)
|
|
self._reward = reward
|
|
self._cte = cte
|
|
self._done = done
|
|
|
|
def set(self, reward=None, cte=None):
|
|
if reward is not None: self._reward = reward
|
|
if cte is not None: self._cte = cte
|
|
|
|
def reset(self, seed=None, **kwargs):
|
|
return np.zeros((120, 160, 3), dtype=np.uint8), {}
|
|
|
|
def step(self, action):
|
|
obs = np.zeros((120, 160, 3), dtype=np.uint8)
|
|
info = {'cte': self._cte, 'speed': 2.0, 'lap_count': 0, 'last_lap_time': 0.0}
|
|
return obs, self._reward, self._done, False, info
|
|
|
|
def close(self): pass
|
|
|
|
|
|
# ---- LanePositionWrapper Tests ----
|
|
|
|
def test_lane_position_bonus_at_target():
|
|
"""At the target CTE, position bonus is maximized."""
|
|
env = MockEnv(reward=0.8, cte=-0.5) # Car at CTE=-0.5
|
|
wrapped = LanePositionWrapper(env, target_cte=-0.5, position_weight=0.2)
|
|
wrapped.reset()
|
|
_, r, _, _, _ = wrapped.step(np.array([0.0, 0.5]))
|
|
# Should get max bonus: reward + 0.2 * 1.0 = 1.0
|
|
assert r == pytest.approx(1.0, abs=0.01)
|
|
|
|
|
|
def test_lane_position_reduces_reward_away_from_target():
|
|
"""Away from target CTE, position bonus is smaller."""
|
|
env_near = MockEnv(reward=0.8, cte=-0.5)
|
|
env_far = MockEnv(reward=0.8, cte=2.0)
|
|
wrapped_near = LanePositionWrapper(env_near, target_cte=-0.5, position_weight=0.2)
|
|
wrapped_far = LanePositionWrapper(env_far, target_cte=-0.5, position_weight=0.2)
|
|
wrapped_near.reset()
|
|
wrapped_far.reset()
|
|
_, r_near, _, _, _ = wrapped_near.step(np.array([0.0, 0.5]))
|
|
_, r_far, _, _, _ = wrapped_far.step(np.array([0.0, 0.5]))
|
|
assert r_near > r_far
|
|
|
|
|
|
def test_lane_position_no_bonus_when_off_track():
|
|
"""No position bonus when original reward <= 0 (off track)."""
|
|
env = MockEnv(reward=-1.0, cte=0.0) # Crashed, perfect CTE
|
|
wrapped = LanePositionWrapper(env, target_cte=0.0, position_weight=0.5)
|
|
wrapped.reset()
|
|
_, r, _, _, _ = wrapped.step(np.array([0.0, 0.5]))
|
|
assert r == -1.0
|
|
|
|
|
|
def test_right_of_centre_target_biases_right():
|
|
"""Setting target_cte=-0.5 (right) gives higher reward for right-of-centre."""
|
|
env_right = MockEnv(reward=0.8, cte=-0.5) # Right of centre
|
|
env_left = MockEnv(reward=0.8, cte=+0.5) # Left of centre
|
|
wrapped_right = LanePositionWrapper(env_right, target_cte=-0.5)
|
|
wrapped_left = LanePositionWrapper(env_left, target_cte=-0.5)
|
|
wrapped_right.reset()
|
|
wrapped_left.reset()
|
|
_, r_right, _, _, _ = wrapped_right.step(np.array([0.0, 0.5]))
|
|
_, r_left, _, _, _ = wrapped_left.step(np.array([0.0, 0.5]))
|
|
assert r_right > r_left, "Right-of-centre should reward more when target_cte is negative"
|
|
|
|
|
|
# ---- AntiOscillationWrapper Tests ----
|
|
|
|
def test_no_penalty_on_first_step():
|
|
"""No oscillation penalty on the very first step (no previous action)."""
|
|
env = MockEnv(reward=0.8)
|
|
wrapped = AntiOscillationWrapper(env, oscillation_penalty=0.5)
|
|
wrapped.reset()
|
|
_, r, _, _, _ = wrapped.step(np.array([1.0, 0.5])) # Large steer — no penalty yet
|
|
assert r == pytest.approx(0.8, abs=0.01)
|
|
|
|
|
|
def test_large_steering_change_penalised():
|
|
"""Rapid steering reversal should get a penalty."""
|
|
env = MockEnv(reward=0.8)
|
|
wrapped = AntiOscillationWrapper(env, oscillation_penalty=0.3)
|
|
wrapped.reset()
|
|
wrapped.step(np.array([-1.0, 0.5])) # Full left
|
|
_, r, _, _, _ = wrapped.step(np.array([+1.0, 0.5])) # Full right — delta=2.0
|
|
# Penalty = 0.3 * 2.0 = 0.6 → reward = 0.8 - 0.6 = 0.2
|
|
assert r < 0.8, "Large steering change should be penalised"
|
|
assert r == pytest.approx(0.8 - 0.3 * 2.0, abs=0.05)
|
|
|
|
|
|
def test_no_steering_change_no_penalty():
|
|
"""Consistent steering should get no penalty."""
|
|
env = MockEnv(reward=0.8)
|
|
wrapped = AntiOscillationWrapper(env, oscillation_penalty=0.3)
|
|
wrapped.reset()
|
|
wrapped.step(np.array([0.3, 0.5]))
|
|
_, r, _, _, _ = wrapped.step(np.array([0.3, 0.5])) # Same action — delta=0
|
|
assert r == pytest.approx(0.8, abs=0.01)
|
|
|
|
|
|
def test_oscillation_penalty_not_applied_off_track():
|
|
"""Off-track (negative reward) should not get oscillation penalty."""
|
|
env = MockEnv(reward=-1.0)
|
|
wrapped = AntiOscillationWrapper(env, oscillation_penalty=0.5)
|
|
wrapped.reset()
|
|
wrapped.step(np.array([-1.0, 0.5]))
|
|
_, r, _, _, _ = wrapped.step(np.array([+1.0, 0.5])) # Large change, but off-track
|
|
assert r == -1.0, "Off-track reward should stay -1.0"
|
|
|
|
|
|
def test_oscillation_score_zero_for_consistent_driving():
|
|
"""Constant steering → oscillation score ≈ 0."""
|
|
env = MockEnv(reward=0.8)
|
|
wrapped = AntiOscillationWrapper(env, oscillation_penalty=0.1)
|
|
wrapped.reset()
|
|
for _ in range(15):
|
|
wrapped.step(np.array([0.2, 0.5])) # Constant steer
|
|
assert wrapped.current_oscillation_score() == pytest.approx(0.0, abs=0.01)
|
|
|
|
|
|
# ---- AsymmetricCTEWrapper Tests ----
|
|
|
|
def test_left_of_centre_penalised():
|
|
"""Left of centre (positive CTE) should earn less reward than right."""
|
|
env_left = MockEnv(reward=0.8, cte=+1.0)
|
|
env_right = MockEnv(reward=0.8, cte=-1.0)
|
|
wrapped_left = AsymmetricCTEWrapper(env_left)
|
|
wrapped_right = AsymmetricCTEWrapper(env_right)
|
|
wrapped_left.reset()
|
|
wrapped_right.reset()
|
|
_, r_left, _, _, _ = wrapped_left.step(np.array([0.0, 0.5]))
|
|
_, r_right, _, _, _ = wrapped_right.step(np.array([0.0, 0.5]))
|
|
assert r_right > r_left, "Right-of-centre should reward more than left"
|
|
|
|
|
|
def test_crash_unaffected_by_asymmetric():
|
|
"""Crash (reward=-1) should not be modified."""
|
|
env = MockEnv(reward=-1.0, cte=+2.0)
|
|
wrapped = AsymmetricCTEWrapper(env, left_penalty=0.9)
|
|
wrapped.reset()
|
|
_, r, _, _, _ = wrapped.step(np.array([0.0, 0.5]))
|
|
assert r == -1.0
|
|
|
|
|
|
# ---- CombinedBehavioralWrapper Tests ----
|
|
|
|
def test_combined_wrapper_gives_positive_reward_on_track():
|
|
"""Combined wrapper should give positive reward when on track."""
|
|
env = MockEnv(reward=0.8, cte=0.0)
|
|
wrapped = CombinedBehavioralWrapper(env, target_cte=0.0, oscillation_penalty=0.0)
|
|
wrapped.reset()
|
|
_, r, _, _, _ = wrapped.step(np.array([0.0, 0.5]))
|
|
assert r > 0
|
|
|
|
|
|
def test_combined_wrapper_crash_still_negative():
|
|
"""Crash should remain negative through combined wrapper."""
|
|
env = MockEnv(reward=-1.0, cte=0.0)
|
|
wrapped = CombinedBehavioralWrapper(env)
|
|
wrapped.reset()
|
|
_, r, _, _, _ = wrapped.step(np.array([0.0, 0.5]))
|
|
assert r < 0
|