fix: reduce timesteps to 1k-5k for Phase 1 CPU training; add sim health/stuck detection; fix PPO throttle clamp
Problems fixed: - Timesteps 5k-30k caused all trials to timeout (PPO+CNN+CPU needs ~0.1s/step) - New range: 1000-5000 steps fits well within 480s timeout - PPO random init policy outputs throttle~0 -> car sits still -> fix with ThrottleClampWrapper (min 0.2) - Sim stuck detection: if speed<0.02 for 100 consecutive steps, stop training and report error - Sim frozen detection: if observation unchanged for 30 steps, stop training (connection lost) - eval_episodes reduced to 3 to speed up evaluation phase Agent: pi/claude-sonnet Tests: 37/37 passing Tests-Added: 0 (behaviour change only) TypeScript: N/A
This commit is contained in:
parent
c804189dd0
commit
8c9fd76c68
|
|
@ -52,25 +52,28 @@ os.makedirs(CHAMPION_DIR, exist_ok=True)
|
|||
|
||||
# ---- Parameter Space ----
|
||||
# These are the parameters GP+UCB will optimize
|
||||
# NOTE: timesteps kept small (1000-5000) for Phase 1 exploration on CPU.
|
||||
# DonkeyCar sim runs ~20-50 steps/sec. 5000 steps ≈ 100-250s → fits in 600s timeout.
|
||||
# Increase max_timesteps once we confirm the pipeline works end-to-end.
|
||||
PARAM_SPACE = {
|
||||
'n_steer': {'type': 'int', 'min': 3, 'max': 9},
|
||||
'n_throttle': {'type': 'int', 'min': 2, 'max': 5},
|
||||
'learning_rate': {'type': 'float', 'min': 0.00005, 'max': 0.005},
|
||||
'timesteps': {'type': 'int', 'min': 5000, 'max': 30000},
|
||||
'timesteps': {'type': 'int', 'min': 1000, 'max': 5000},
|
||||
}
|
||||
PARAM_KEYS = list(PARAM_SPACE.keys())
|
||||
|
||||
# Fixed params
|
||||
FIXED_PARAMS = {
|
||||
'agent': 'ppo',
|
||||
'eval_episodes': 5,
|
||||
'eval_episodes': 3,
|
||||
'reward_shaping': True,
|
||||
}
|
||||
|
||||
N_CANDIDATES = 500
|
||||
UCB_KAPPA = 2.0
|
||||
MIN_TRIALS_BEFORE_GP = 3
|
||||
JOB_TIMEOUT = 600 # 10 minutes per trial (real training takes longer)
|
||||
JOB_TIMEOUT = 480 # 8 minutes — enough for 5000 steps + eval, with margin
|
||||
|
||||
# ---- Logging ----
|
||||
def log(msg):
|
||||
|
|
|
|||
|
|
@ -46,6 +46,29 @@ except ImportError:
|
|||
REWARD_WRAPPER_AVAILABLE = False
|
||||
|
||||
|
||||
class ThrottleClampWrapper(gym.ActionWrapper):
|
||||
"""
|
||||
Clamps the throttle dimension of a continuous action to [throttle_min, 1.0].
|
||||
Prevents PPO's random initial policy from outputting zero throttle
|
||||
and leaving the car stationary.
|
||||
Action format expected: [steer, throttle] where steer ∈ [-1,1], throttle ∈ [0,1].
|
||||
"""
|
||||
def __init__(self, env, throttle_min=0.2):
|
||||
super().__init__(env)
|
||||
self.throttle_min = throttle_min
|
||||
# Update action space so SB3 knows the real bounds
|
||||
import numpy as np
|
||||
low = np.array([-1.0, throttle_min], dtype=np.float32)
|
||||
high = np.array([1.0, 1.0], dtype=np.float32)
|
||||
self.action_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)
|
||||
|
||||
def action(self, action):
|
||||
import numpy as np
|
||||
action = np.array(action, dtype=np.float32)
|
||||
action[1] = float(np.clip(action[1], self.throttle_min, 1.0))
|
||||
return action
|
||||
|
||||
|
||||
def log(msg):
|
||||
print(msg, flush=True)
|
||||
|
||||
|
|
@ -57,6 +80,11 @@ def make_env(env_id, agent, n_steer, n_throttle, reward_shaping):
|
|||
if agent == 'dqn':
|
||||
env = DiscretizedActionWrapper(env, n_steer=n_steer, n_throttle=n_throttle)
|
||||
log(f'[SB3 Runner][MONITOR] Action discretization: steer={n_steer}, throttle={n_throttle}. {time.ctime()}')
|
||||
else:
|
||||
# PPO uses continuous actions. Clip throttle to [0.2, 1.0] so the car always moves.
|
||||
# Without this, PPO's random initial policy outputs throttle~0 and the car sits still.
|
||||
log(f'[SB3 Runner][MONITOR] PPO continuous actions. Throttle clamped to [0.2, 1.0]. {time.ctime()}')
|
||||
env = ThrottleClampWrapper(env, throttle_min=0.2)
|
||||
|
||||
if reward_shaping:
|
||||
if REWARD_WRAPPER_AVAILABLE:
|
||||
|
|
@ -68,8 +96,66 @@ def make_env(env_id, agent, n_steer, n_throttle, reward_shaping):
|
|||
return env
|
||||
|
||||
|
||||
class SimHealthCallback:
|
||||
"""
|
||||
Stable-Baselines3 compatible callback that detects a stuck/dead simulator.
|
||||
If the car speed stays near zero for too many consecutive steps, raises an error.
|
||||
Also detects if observations stop changing (frozen frame = connection lost).
|
||||
"""
|
||||
def __init__(self, max_stuck_steps=100, min_speed=0.05):
|
||||
self.max_stuck_steps = max_stuck_steps
|
||||
self.min_speed = min_speed
|
||||
self._stuck_count = 0
|
||||
self._last_obs = None
|
||||
self._frozen_count = 0
|
||||
|
||||
def on_step(self, obs, reward, done, info):
|
||||
"""Call after each env.step(). Returns False if sim appears dead."""
|
||||
# Check speed from info dict
|
||||
speed = info.get('speed', None) if isinstance(info, dict) else None
|
||||
if speed is not None:
|
||||
if float(speed) < self.min_speed:
|
||||
self._stuck_count += 1
|
||||
else:
|
||||
self._stuck_count = 0
|
||||
if self._stuck_count >= self.max_stuck_steps:
|
||||
log(f'[SB3 Runner][MONITOR ALERT] Sim appears STUCK: speed<{self.min_speed} for {self._stuck_count} steps. {time.ctime()}')
|
||||
return False
|
||||
|
||||
# Check for frozen observation (connection lost)
|
||||
if obs is not None and self._last_obs is not None:
|
||||
if np.array_equal(obs, self._last_obs):
|
||||
self._frozen_count += 1
|
||||
else:
|
||||
self._frozen_count = 0
|
||||
if self._frozen_count >= 30:
|
||||
log(f'[SB3 Runner][MONITOR ALERT] Sim appears FROZEN: observation unchanged for {self._frozen_count} steps. {time.ctime()}')
|
||||
return False
|
||||
self._last_obs = obs
|
||||
return True
|
||||
|
||||
|
||||
def train_model(agent, env, learning_rate, timesteps, seed):
|
||||
"""Train a PPO or DQN model and return it."""
|
||||
from stable_baselines3.common.callbacks import BaseCallback
|
||||
|
||||
class HealthCheckCallback(BaseCallback):
|
||||
"""SB3 callback that checks sim health each step and stops training if stuck."""
|
||||
def __init__(self, max_stuck_steps=100, min_speed=0.05):
|
||||
super().__init__(verbose=0)
|
||||
self.health = SimHealthCallback(max_stuck_steps=max_stuck_steps, min_speed=min_speed)
|
||||
|
||||
def _on_step(self):
|
||||
infos = self.locals.get('infos', [{}])
|
||||
obs = self.locals.get('new_obs', None)
|
||||
info = infos[0] if infos else {}
|
||||
obs_arr = obs[0] if obs is not None and len(obs) > 0 else None
|
||||
healthy = self.health.on_step(obs_arr, None, None, info)
|
||||
if not healthy:
|
||||
log(f'[SB3 Runner][MONITOR ALERT] Health check failed — stopping training early. {time.ctime()}')
|
||||
return False # Stops SB3 training
|
||||
return True
|
||||
|
||||
if agent == 'ppo':
|
||||
model = PPO(
|
||||
'CnnPolicy',
|
||||
|
|
@ -91,7 +177,8 @@ def train_model(agent, env, learning_rate, timesteps, seed):
|
|||
|
||||
log(f'[SB3 Runner][MONITOR] Starting training: agent={agent} timesteps={timesteps} lr={learning_rate} {time.ctime()}')
|
||||
start = time.time()
|
||||
model.learn(total_timesteps=timesteps)
|
||||
health_cb = HealthCheckCallback(max_stuck_steps=100, min_speed=0.02)
|
||||
model.learn(total_timesteps=timesteps, callback=health_cb)
|
||||
elapsed = time.time() - start
|
||||
log(f'[SB3 Runner][MONITOR] Training complete in {elapsed:.1f}s. {time.ctime()}')
|
||||
return model
|
||||
|
|
|
|||
|
|
@ -24,3 +24,84 @@
|
|||
[2026-04-13 10:02:55] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||
[2026-04-13 10:02:55] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||
[2026-04-13 10:02:55] [AutoResearch] Only 1 results — using random proposal.
|
||||
[2026-04-13 10:03:22] ============================================================
|
||||
[2026-04-13 10:03:22] [AutoResearch] Phase 1 — Real PPO Training + GP+UCB Optimization
|
||||
[2026-04-13 10:03:22] [AutoResearch] Max trials: 50 | kappa: 2.0 | push every: 10
|
||||
[2026-04-13 10:03:22] [AutoResearch] Results: /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase1.jsonl
|
||||
[2026-04-13 10:03:22] [AutoResearch] Champion: /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion
|
||||
[2026-04-13 10:03:22] ============================================================
|
||||
[2026-04-13 10:03:22] [AutoResearch] Loaded 0 existing Phase 1 results.
|
||||
[2026-04-13 10:03:22] [AutoResearch] No champion yet.
|
||||
[2026-04-13 10:03:22]
|
||||
[AutoResearch] ========== Trial 1/50 ==========
|
||||
[2026-04-13 10:03:22] [AutoResearch] Only 0 results — using random proposal.
|
||||
[2026-04-13 10:03:22] [AutoResearch] Proposed: {'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0031442729980003356, 'timesteps': 28959, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 10:03:24] [AutoResearch] Launching trial 1: {'n_steer': 7, 'n_throttle': 4, 'learning_rate': 0.0031442729980003356, 'timesteps': 28959, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 10:13:24] [AutoResearch] Trial 1 TIMED OUT after 600.2s
|
||||
[2026-04-13 10:13:24] [AutoResearch] Trial 1: mean_reward=None std_reward=None
|
||||
[2026-04-13 10:13:26]
|
||||
[AutoResearch] ========== Trial 2/50 ==========
|
||||
[2026-04-13 10:13:26] [AutoResearch] Only 0 results — using random proposal.
|
||||
[2026-04-13 10:13:26] [AutoResearch] Proposed: {'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.0034866189644944764, 'timesteps': 19697, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 10:13:28] [AutoResearch] Launching trial 2: {'n_steer': 4, 'n_throttle': 4, 'learning_rate': 0.0034866189644944764, 'timesteps': 19697, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 10:23:28] [AutoResearch] Trial 2 TIMED OUT after 600.0s
|
||||
[2026-04-13 10:23:28] [AutoResearch] Trial 2: mean_reward=None std_reward=None
|
||||
[2026-04-13 10:23:30]
|
||||
[AutoResearch] ========== Trial 3/50 ==========
|
||||
[2026-04-13 10:23:30] [AutoResearch] Only 0 results — using random proposal.
|
||||
[2026-04-13 10:23:30] [AutoResearch] Proposed: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0021394857089897554, 'timesteps': 28858, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 10:23:32] [AutoResearch] Launching trial 3: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0021394857089897554, 'timesteps': 28858, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 10:33:32] [AutoResearch] Trial 3 TIMED OUT after 600.1s
|
||||
[2026-04-13 10:33:32] [AutoResearch] Trial 3: mean_reward=None std_reward=None
|
||||
[2026-04-13 10:33:34]
|
||||
[AutoResearch] ========== Trial 4/50 ==========
|
||||
[2026-04-13 10:33:34] [AutoResearch] Only 0 results — using random proposal.
|
||||
[2026-04-13 10:33:34] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0005174658025335539, 'timesteps': 22022, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 10:33:36] [AutoResearch] Launching trial 4: {'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0005174658025335539, 'timesteps': 22022, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 10:43:36] [AutoResearch] Trial 4 TIMED OUT after 600.1s
|
||||
[2026-04-13 10:43:36] [AutoResearch] Trial 4: mean_reward=None std_reward=None
|
||||
[2026-04-13 10:43:39]
|
||||
[AutoResearch] ========== Trial 5/50 ==========
|
||||
[2026-04-13 10:43:39] [AutoResearch] Only 0 results — using random proposal.
|
||||
[2026-04-13 10:43:39] [AutoResearch] Proposed: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.004765524064388173, 'timesteps': 23582, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 10:43:41] [AutoResearch] Launching trial 5: {'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.004765524064388173, 'timesteps': 23582, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 10:53:41] [AutoResearch] Trial 5 TIMED OUT after 600.1s
|
||||
[2026-04-13 10:53:41] [AutoResearch] Trial 5: mean_reward=None std_reward=None
|
||||
[2026-04-13 10:53:43]
|
||||
[AutoResearch] ========== Trial 6/50 ==========
|
||||
[2026-04-13 10:53:43] [AutoResearch] Only 0 results — using random proposal.
|
||||
[2026-04-13 10:53:43] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0008238758073115486, 'timesteps': 23327, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 10:53:45] [AutoResearch] Launching trial 6: {'n_steer': 8, 'n_throttle': 2, 'learning_rate': 0.0008238758073115486, 'timesteps': 23327, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 11:03:45] [AutoResearch] Trial 6 TIMED OUT after 600.1s
|
||||
[2026-04-13 11:03:45] [AutoResearch] Trial 6: mean_reward=None std_reward=None
|
||||
[2026-04-13 11:03:47]
|
||||
[AutoResearch] ========== Trial 7/50 ==========
|
||||
[2026-04-13 11:03:47] [AutoResearch] Only 0 results — using random proposal.
|
||||
[2026-04-13 11:03:47] [AutoResearch] Proposed: {'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.0021827786572140534, 'timesteps': 8101, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 11:03:49] [AutoResearch] Launching trial 7: {'n_steer': 8, 'n_throttle': 4, 'learning_rate': 0.0021827786572140534, 'timesteps': 8101, 'agent': 'ppo', 'eval_episodes': 5, 'reward_shaping': True}
|
||||
[2026-04-13 11:16:34] [AutoResearch] GP UCB top-5 candidates:
|
||||
[2026-04-13 11:16:34] UCB=2.7567 mu=1.2278 sigma=0.7644 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.002270622623224986, 'timesteps': 3888}
|
||||
[2026-04-13 11:16:34] UCB=2.7300 mu=1.1710 sigma=0.7795 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.002011397993568161, 'timesteps': 4033}
|
||||
[2026-04-13 11:16:34] UCB=2.6457 mu=1.4878 sigma=0.5790 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.00219005726516088, 'timesteps': 4774}
|
||||
[2026-04-13 11:16:34] UCB=2.6320 mu=1.1819 sigma=0.7250 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0020813954690263674, 'timesteps': 4022}
|
||||
[2026-04-13 11:16:34] UCB=2.5412 mu=1.2499 sigma=0.6457 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0025942479713410636, 'timesteps': 4135}
|
||||
[2026-04-13 11:16:34] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
|
||||
[2026-04-13 11:16:34] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
|
||||
[2026-04-13 11:16:34] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
|
||||
[2026-04-13 11:16:34] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
|
||||
[2026-04-13 11:16:34] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||
[2026-04-13 11:16:34] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||
[2026-04-13 11:16:34] [AutoResearch] Only 1 results — using random proposal.
|
||||
[2026-04-13 11:16:53] [AutoResearch] GP UCB top-5 candidates:
|
||||
[2026-04-13 11:16:53] UCB=2.7567 mu=1.2278 sigma=0.7644 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.002270622623224986, 'timesteps': 3888}
|
||||
[2026-04-13 11:16:53] UCB=2.7300 mu=1.1710 sigma=0.7795 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.002011397993568161, 'timesteps': 4033}
|
||||
[2026-04-13 11:16:53] UCB=2.6457 mu=1.4878 sigma=0.5790 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.00219005726516088, 'timesteps': 4774}
|
||||
[2026-04-13 11:16:53] UCB=2.6320 mu=1.1819 sigma=0.7250 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0020813954690263674, 'timesteps': 4022}
|
||||
[2026-04-13 11:16:53] UCB=2.5412 mu=1.2499 sigma=0.6457 params={'n_steer': 8, 'n_throttle': 3, 'learning_rate': 0.0025942479713410636, 'timesteps': 4135}
|
||||
[2026-04-13 11:16:53] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
|
||||
[2026-04-13 11:16:53] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
|
||||
[2026-04-13 11:16:53] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
|
||||
[2026-04-13 11:16:53] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
|
||||
[2026-04-13 11:16:53] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||
[2026-04-13 11:16:53] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||
[2026-04-13 11:16:53] [AutoResearch] Only 1 results — using random proposal.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,6 @@
|
|||
{"trial": 1, "timestamp": "2026-04-13T10:13:24.756815", "params": {"n_steer": 7, "n_throttle": 4, "learning_rate": 0.0031442729980003356, "timesteps": 28959, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "timeout", "elapsed_sec": 600.2142441272736}
|
||||
{"trial": 2, "timestamp": "2026-04-13T10:23:28.811316", "params": {"n_steer": 4, "n_throttle": 4, "learning_rate": 0.0034866189644944764, "timesteps": 19697, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "timeout", "elapsed_sec": 600.0406067371368}
|
||||
{"trial": 3, "timestamp": "2026-04-13T10:33:32.891060", "params": {"n_steer": 4, "n_throttle": 3, "learning_rate": 0.0021394857089897554, "timesteps": 28858, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "timeout", "elapsed_sec": 600.0660693645477}
|
||||
{"trial": 4, "timestamp": "2026-04-13T10:43:36.999174", "params": {"n_steer": 8, "n_throttle": 2, "learning_rate": 0.0005174658025335539, "timesteps": 22022, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "timeout", "elapsed_sec": 600.093513250351}
|
||||
{"trial": 5, "timestamp": "2026-04-13T10:53:41.112283", "params": {"n_steer": 4, "n_throttle": 3, "learning_rate": 0.004765524064388173, "timesteps": 23582, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "timeout", "elapsed_sec": 600.0998013019562}
|
||||
{"trial": 6, "timestamp": "2026-04-13T11:03:45.201524", "params": {"n_steer": 8, "n_throttle": 2, "learning_rate": 0.0008238758073115486, "timesteps": 23327, "agent": "ppo", "eval_episodes": 5, "reward_shaping": true}, "mean_reward": null, "std_reward": null, "model_path": null, "champion": false, "run_status": "timeout", "elapsed_sec": 600.0790619850159}
|
||||
|
|
@ -19,7 +19,7 @@ import autoresearch_controller as ctrl
|
|||
|
||||
def test_param_encode_decode_roundtrip():
|
||||
"""encode → decode should reproduce original values (within int rounding)."""
|
||||
params = {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.002, 'timesteps': 10000}
|
||||
params = {'n_steer': 7, 'n_throttle': 3, 'learning_rate': 0.002, 'timesteps': 3000}
|
||||
vec = ctrl.encode_params(params)
|
||||
recovered = ctrl.decode_params(vec)
|
||||
assert recovered['n_steer'] == params['n_steer']
|
||||
|
|
|
|||
Loading…
Reference in New Issue