From 6c5623e881ecd3ff659fde872b14b71af013d7a5 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Sun, 19 Apr 2026 20:24:33 -0400 Subject: [PATCH] fix: exp14 finetune load warm-start model without temp env to prevent second spawned car --- agent/experiments/exp14_finetune_v5.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/agent/experiments/exp14_finetune_v5.py b/agent/experiments/exp14_finetune_v5.py index 655071a..19e4ff9 100644 --- a/agent/experiments/exp14_finetune_v5.py +++ b/agent/experiments/exp14_finetune_v5.py @@ -158,39 +158,31 @@ def log(s): phase_defs = [ (PH1_STEPS, 0.4), (PH2_STEPS, 0.2) ] # create initial env and model (warm start) -# Load model with base action space (throttle_min=0.2). We'll enforce a runtime -# throttle FLOOR during phase 1 via a wrapper, but keep the action space unchanged. -loaded_env = VecTransposeImage(DummyVecEnv([make_env_base(0.2, throttle_floor=None)])) +# IMPORTANT: load the model WITHOUT an env, then attach exactly one env. +# Creating a temporary env just for loading opens a second TCP connection and +# spawns a second car in the sim (right lane + left lane issue). if os.path.exists(WARM_PATH): - log(f'Loading warm-start model from {WARM_PATH} using base throttle_min=0.2 env') - model = PPO.load(WARM_PATH, env=loaded_env, device='cpu') + log(f'Loading warm-start model from {WARM_PATH} without creating a temp env') + model = PPO.load(WARM_PATH, device='cpu') # override lr and schedules — ensure lr_schedule callable exists model.learning_rate = LR try: model.lr_schedule = get_schedule_fn(LR) except Exception: model.lr_schedule = None - # update optimizer param groups to new LR try: for pg in model.policy.optimizer.param_groups: pg['lr'] = LR except Exception: pass - # Create the training env using base action space but enforce throttle_floor at runtime + # Create exactly one training env and attach it first_throttle_floor = phase_defs[0][1] env0 = VecTransposeImage(DummyVecEnv([make_env_base(0.2, throttle_floor=first_throttle_floor)])) model.set_env(env0) - # Close the loaded_env used only for model loading to avoid leaving a stale - # TCP connection (which would create an extra vehicle in the simulator). - try: - loaded_env.close() - except Exception: - pass else: log('No warm-start found — creating fresh model with base throttle_min=0.2') env0 = VecTransposeImage(DummyVecEnv([make_env_base(0.2, throttle_floor=phase_defs[0][1])])) model = PPO('CnnPolicy', env0, learning_rate=LR, verbose=1, device='cpu') - loaded_env.close() steps_done = 0 best_reward = float('-inf')