fix: exp14 finetune load warm-start model without temp env to prevent second spawned car
This commit is contained in:
parent
0c3a37f877
commit
6c5623e881
|
|
@ -158,39 +158,31 @@ def log(s):
|
||||||
phase_defs = [ (PH1_STEPS, 0.4), (PH2_STEPS, 0.2) ]
|
phase_defs = [ (PH1_STEPS, 0.4), (PH2_STEPS, 0.2) ]
|
||||||
|
|
||||||
# create initial env and model (warm start)
|
# create initial env and model (warm start)
|
||||||
# Load model with base action space (throttle_min=0.2). We'll enforce a runtime
|
# IMPORTANT: load the model WITHOUT an env, then attach exactly one env.
|
||||||
# throttle FLOOR during phase 1 via a wrapper, but keep the action space unchanged.
|
# Creating a temporary env just for loading opens a second TCP connection and
|
||||||
loaded_env = VecTransposeImage(DummyVecEnv([make_env_base(0.2, throttle_floor=None)]))
|
# spawns a second car in the sim (right lane + left lane issue).
|
||||||
if os.path.exists(WARM_PATH):
|
if os.path.exists(WARM_PATH):
|
||||||
log(f'Loading warm-start model from {WARM_PATH} using base throttle_min=0.2 env')
|
log(f'Loading warm-start model from {WARM_PATH} without creating a temp env')
|
||||||
model = PPO.load(WARM_PATH, env=loaded_env, device='cpu')
|
model = PPO.load(WARM_PATH, device='cpu')
|
||||||
# override lr and schedules — ensure lr_schedule callable exists
|
# override lr and schedules — ensure lr_schedule callable exists
|
||||||
model.learning_rate = LR
|
model.learning_rate = LR
|
||||||
try:
|
try:
|
||||||
model.lr_schedule = get_schedule_fn(LR)
|
model.lr_schedule = get_schedule_fn(LR)
|
||||||
except Exception:
|
except Exception:
|
||||||
model.lr_schedule = None
|
model.lr_schedule = None
|
||||||
# update optimizer param groups to new LR
|
|
||||||
try:
|
try:
|
||||||
for pg in model.policy.optimizer.param_groups:
|
for pg in model.policy.optimizer.param_groups:
|
||||||
pg['lr'] = LR
|
pg['lr'] = LR
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
# Create the training env using base action space but enforce throttle_floor at runtime
|
# Create exactly one training env and attach it
|
||||||
first_throttle_floor = phase_defs[0][1]
|
first_throttle_floor = phase_defs[0][1]
|
||||||
env0 = VecTransposeImage(DummyVecEnv([make_env_base(0.2, throttle_floor=first_throttle_floor)]))
|
env0 = VecTransposeImage(DummyVecEnv([make_env_base(0.2, throttle_floor=first_throttle_floor)]))
|
||||||
model.set_env(env0)
|
model.set_env(env0)
|
||||||
# Close the loaded_env used only for model loading to avoid leaving a stale
|
|
||||||
# TCP connection (which would create an extra vehicle in the simulator).
|
|
||||||
try:
|
|
||||||
loaded_env.close()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
else:
|
else:
|
||||||
log('No warm-start found — creating fresh model with base throttle_min=0.2')
|
log('No warm-start found — creating fresh model with base throttle_min=0.2')
|
||||||
env0 = VecTransposeImage(DummyVecEnv([make_env_base(0.2, throttle_floor=phase_defs[0][1])]))
|
env0 = VecTransposeImage(DummyVecEnv([make_env_base(0.2, throttle_floor=phase_defs[0][1])]))
|
||||||
model = PPO('CnnPolicy', env0, learning_rate=LR, verbose=1, device='cpu')
|
model = PPO('CnnPolicy', env0, learning_rate=LR, verbose=1, device='cpu')
|
||||||
loaded_env.close()
|
|
||||||
|
|
||||||
steps_done = 0
|
steps_done = 0
|
||||||
best_reward = float('-inf')
|
best_reward = float('-inf')
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue