fix: load warm-start with original action space (throttle_min=0.2), then switch env for phase1 throttle
This commit is contained in:
parent
bc23a316e0
commit
41d12dede2
|
|
@ -134,19 +134,29 @@ def log(s):
|
|||
phase_defs = [ (PH1_STEPS, 0.4), (PH2_STEPS, 0.2) ]
|
||||
|
||||
# create initial env and model (warm start)
|
||||
first_throttle = phase_defs[0][1]
|
||||
env0 = VecTransposeImage(DummyVecEnv([make_env(first_throttle)]))
|
||||
# Important: load the warm-start model using the SAME action space it was trained with
|
||||
# (throttle_min=0.2) so we can then switch envs for phase 1 if needed.
|
||||
loaded_env = VecTransposeImage(DummyVecEnv([make_env(0.2)]))
|
||||
if os.path.exists(WARM_PATH):
|
||||
log(f'Loading warm-start model from {WARM_PATH}')
|
||||
model = PPO.load(WARM_PATH, env=env0, device='cpu')
|
||||
log(f'Loading warm-start model from {WARM_PATH} using throttle_min=0.2 env')
|
||||
model = PPO.load(WARM_PATH, env=loaded_env, device='cpu')
|
||||
# override lr and schedules
|
||||
model.learning_rate = LR
|
||||
model.lr_schedule = model.get_schedule_fn(LR) if hasattr(model,'get_schedule_fn') else None
|
||||
for pg in getattr(getattr(model.policy,'optimizer',None) or [], 'param_groups', []):
|
||||
pg['lr'] = LR
|
||||
# Now create the actual training env with the first throttle setting
|
||||
first_throttle = phase_defs[0][1]
|
||||
env0 = VecTransposeImage(DummyVecEnv([make_env(first_throttle)]))
|
||||
if first_throttle != 0.2:
|
||||
log(f'Switching model to env with throttle_min={first_throttle}')
|
||||
model.set_env(env0)
|
||||
else:
|
||||
log('No warm-start found')
|
||||
log('No warm-start found — creating fresh model with first throttle')
|
||||
first_throttle = phase_defs[0][1]
|
||||
env0 = VecTransposeImage(DummyVecEnv([make_env(first_throttle)]))
|
||||
model = PPO('CnnPolicy', env0, learning_rate=LR, verbose=1, device='cpu')
|
||||
loaded_env.close()
|
||||
|
||||
steps_done = 0
|
||||
best_reward = float('-inf')
|
||||
|
|
|
|||
Loading…
Reference in New Issue