fix: prevent trial timeouts losing all data
Two changes: 1. Lower total_timesteps cap: 120k → 90k Actual throughput is 16 steps/sec (not 20 as estimated). 120k steps = 126 min training + 9 min overhead = 135 min > 2hr limit. 90k steps = 94 min + 8 min overhead = 102 min, safely within limit. 2. Per-segment checkpoint saves in multitrack_runner model.save() called after every segment so the latest weights are always on disk. If the runner is killed (timeout/crash/Ctrl+C), training data is never completely lost. 3. Timeout rescue eval in wave4_controller If JOB_TIMEOUT fires and a checkpoint exists, immediately runs a quick mini_monaco eval on the checkpoint so the trial still produces a GP data point despite the timeout. Agent: pi Tests: 102 passed Tests-Added: 0 TypeScript: N/A
This commit is contained in:
parent
5714a96bfb
commit
e61ebc5b38
|
|
@ -332,6 +332,17 @@ def train_multitrack(model, first_env, total_timesteps, steps_per_switch):
|
||||||
)
|
)
|
||||||
steps_done += segment_steps
|
steps_done += segment_steps
|
||||||
|
|
||||||
|
# --- Checkpoint after every segment ---
|
||||||
|
# If the trial is killed (timeout/crash) the latest model is always
|
||||||
|
# on disk so results are never completely lost.
|
||||||
|
if save_dir:
|
||||||
|
try:
|
||||||
|
os.makedirs(save_dir, exist_ok=True)
|
||||||
|
model.save(os.path.join(save_dir, 'model'))
|
||||||
|
log(f'[W3 Runner] Checkpoint saved at step {steps_done:,}')
|
||||||
|
except Exception as e:
|
||||||
|
log(f'[W3 Runner] WARNING: checkpoint save failed: {e}')
|
||||||
|
|
||||||
# Quick segment reward estimate — one deterministic episode,
|
# Quick segment reward estimate — one deterministic episode,
|
||||||
# capped at MAX_EVAL_STEPS to prevent non-terminating episodes
|
# capped at MAX_EVAL_STEPS to prevent non-terminating episodes
|
||||||
# (e.g. car driving forever on wide generated_track) inflating the metric.
|
# (e.g. car driving forever on wide generated_track) inflating the metric.
|
||||||
|
|
|
||||||
|
|
@ -671,3 +671,16 @@
|
||||||
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||||
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
[2026-04-15 09:16:53] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||||
[2026-04-15 09:16:53] [AutoResearch] Only 1 results — using random proposal.
|
[2026-04-15 09:16:53] [AutoResearch] Only 1 results — using random proposal.
|
||||||
|
[2026-04-15 21:54:16] [AutoResearch] GP UCB top-5 candidates:
|
||||||
|
[2026-04-15 21:54:16] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
|
||||||
|
[2026-04-15 21:54:16] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
|
||||||
|
[2026-04-15 21:54:16] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
|
||||||
|
[2026-04-15 21:54:16] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
|
||||||
|
[2026-04-15 21:54:16] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
|
||||||
|
[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
|
||||||
|
[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
|
||||||
|
[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
|
||||||
|
[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
|
||||||
|
[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||||
|
[2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||||
|
[2026-04-15 21:54:16] [AutoResearch] Only 1 results — using random proposal.
|
||||||
|
|
|
||||||
|
|
@ -370,3 +370,8 @@
|
||||||
[2026-04-15 09:17:10] [Wave3] Only 0 results — using random proposal.
|
[2026-04-15 09:17:10] [Wave3] Only 0 results — using random proposal.
|
||||||
[2026-04-15 09:17:10] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
[2026-04-15 09:17:10] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
||||||
[2026-04-15 09:17:10] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
|
[2026-04-15 09:17:10] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
|
||||||
|
[2026-04-15 21:54:37] [Wave3] Seed trial 1/2: using hardcoded params.
|
||||||
|
[2026-04-15 21:54:37] [Wave3] Seed trial 2/2: using hardcoded params.
|
||||||
|
[2026-04-15 21:54:37] [Wave3] Only 0 results — using random proposal.
|
||||||
|
[2026-04-15 21:54:37] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
||||||
|
[2026-04-15 21:54:37] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
|
||||||
|
|
|
||||||
|
|
@ -237,3 +237,60 @@
|
||||||
[2026-04-15 17:08:50] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000}
|
[2026-04-15 17:08:50] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000}
|
||||||
[2026-04-15 17:08:50] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000}
|
[2026-04-15 17:08:50] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000}
|
||||||
[2026-04-15 17:08:50] score=31.73 params={'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878}
|
[2026-04-15 17:08:50] score=31.73 params={'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878}
|
||||||
|
[2026-04-15 17:08:51] [Wave4] ✅ Git push complete after trial 5
|
||||||
|
[2026-04-15 17:08:53]
|
||||||
|
[Wave4] ========== Trial 6/25 ==========
|
||||||
|
[2026-04-15 17:08:53] [Wave4] GP UCB top-5 proposals:
|
||||||
|
[2026-04-15 17:08:53] UCB=2.9352 mu=1.3419 σ=0.7966 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
|
||||||
|
[2026-04-15 17:08:53] UCB=2.8442 mu=1.0827 σ=0.8808 params={'learning_rate': 0.0017357820530198068, 'steps_per_switch': 5009, 'total_timesteps': 87890}
|
||||||
|
[2026-04-15 17:08:53] UCB=2.8258 mu=1.0947 σ=0.8655 params={'learning_rate': 0.001668571948240882, 'steps_per_switch': 4814, 'total_timesteps': 101589}
|
||||||
|
[2026-04-15 17:08:53] UCB=2.8168 mu=0.9976 σ=0.9096 params={'learning_rate': 0.001788300003253932, 'steps_per_switch': 4215, 'total_timesteps': 80751}
|
||||||
|
[2026-04-15 17:08:53] UCB=2.8147 mu=1.5860 σ=0.6144 params={'learning_rate': 0.0012975326127189415, 'steps_per_switch': 3961, 'total_timesteps': 100527}
|
||||||
|
[2026-04-15 17:08:53] [Wave4] Proposed params: {'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
|
||||||
|
[2026-04-15 17:08:55] [Wave4] Launching trial 6: {'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
|
||||||
|
[2026-04-15 17:08:55] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 79532 --steps-per-switch 5524 --learning-rate 0.0016223486895735558 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006
|
||||||
|
[2026-04-15 18:19:30] [Wave4] Trial 6 finished in 4235.3s, rc=0
|
||||||
|
[2026-04-15 18:19:30] [Wave4] Parsed: combined=176.6721 mini_monaco=176.6721
|
||||||
|
[2026-04-15 18:19:31] [Wave4] ===== Trial 6 Summary =====
|
||||||
|
[2026-04-15 18:19:31] GP data points : 4
|
||||||
|
[2026-04-15 18:19:31] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
|
||||||
|
[2026-04-15 18:19:31] Top 5:
|
||||||
|
[2026-04-15 18:19:31] score=176.67 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
|
||||||
|
[2026-04-15 18:19:31] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000}
|
||||||
|
[2026-04-15 18:19:31] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000}
|
||||||
|
[2026-04-15 18:19:31] score=31.73 params={'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878}
|
||||||
|
[2026-04-15 18:19:33]
|
||||||
|
[Wave4] ========== Trial 7/25 ==========
|
||||||
|
[2026-04-15 18:19:33] [Wave4] GP UCB top-5 proposals:
|
||||||
|
[2026-04-15 18:19:33] UCB=2.5520 mu=1.1319 σ=0.7101 params={'learning_rate': 0.001779556625962812, 'steps_per_switch': 4226, 'total_timesteps': 123651}
|
||||||
|
[2026-04-15 18:19:33] UCB=2.5115 mu=0.7590 σ=0.8763 params={'learning_rate': 0.0017725367196782225, 'steps_per_switch': 3941, 'total_timesteps': 145375}
|
||||||
|
[2026-04-15 18:19:33] UCB=2.4988 mu=1.1176 σ=0.6906 params={'learning_rate': 0.0018568268138302447, 'steps_per_switch': 6910, 'total_timesteps': 119000}
|
||||||
|
[2026-04-15 18:19:33] UCB=2.4973 mu=0.8571 σ=0.8201 params={'learning_rate': 0.0019597767383017994, 'steps_per_switch': 9245, 'total_timesteps': 113699}
|
||||||
|
[2026-04-15 18:19:33] UCB=2.4874 mu=1.2498 σ=0.6188 params={'learning_rate': 0.001739834862935009, 'steps_per_switch': 4764, 'total_timesteps': 117423}
|
||||||
|
[2026-04-15 18:19:33] [Wave4] Proposed params: {'learning_rate': 0.001779556625962812, 'steps_per_switch': 4226, 'total_timesteps': 123651}
|
||||||
|
[2026-04-15 18:19:35] [Wave4] Launching trial 7: {'learning_rate': 0.001779556625962812, 'steps_per_switch': 4226, 'total_timesteps': 123651}
|
||||||
|
[2026-04-15 18:19:35] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 123651 --steps-per-switch 4226 --learning-rate 0.001779556625962812 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0007
|
||||||
|
[2026-04-15 20:19:38] [Wave4] Trial 7 TIMED OUT — killing runner.
|
||||||
|
[2026-04-15 20:19:38] [Wave4] Trial 7 finished in 7203.4s, rc=-9
|
||||||
|
[2026-04-15 20:19:38] [Wave4] Parsed: combined=None mini_monaco=None
|
||||||
|
[2026-04-15 20:19:38] [Wave4] ⚠️ No test score parsed — defaulting to 0.0
|
||||||
|
[2026-04-15 20:19:38] [Wave4] combined_test_score=0 — excluded from GP (crash/timeout).
|
||||||
|
[2026-04-15 20:19:38] [Wave4] ===== Trial 7 Summary =====
|
||||||
|
[2026-04-15 20:19:38] GP data points : 4
|
||||||
|
[2026-04-15 20:19:38] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743}
|
||||||
|
[2026-04-15 20:19:38] Top 5:
|
||||||
|
[2026-04-15 20:19:38] score=176.67 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532}
|
||||||
|
[2026-04-15 20:19:38] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000}
|
||||||
|
[2026-04-15 20:19:38] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000}
|
||||||
|
[2026-04-15 20:19:38] score=31.73 params={'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878}
|
||||||
|
[2026-04-15 20:19:40]
|
||||||
|
[Wave4] ========== Trial 8/25 ==========
|
||||||
|
[2026-04-15 20:19:40] [Wave4] GP UCB top-5 proposals:
|
||||||
|
[2026-04-15 20:19:40] UCB=2.5481 mu=1.2081 σ=0.6700 params={'learning_rate': 0.0019521225364821895, 'steps_per_switch': 4185, 'total_timesteps': 110996}
|
||||||
|
[2026-04-15 20:19:40] UCB=2.5357 mu=0.8887 σ=0.8235 params={'learning_rate': 0.001901474046587741, 'steps_per_switch': 5568, 'total_timesteps': 136179}
|
||||||
|
[2026-04-15 20:19:40] UCB=2.5188 mu=1.1408 σ=0.6890 params={'learning_rate': 0.0018359364507444984, 'steps_per_switch': 5746, 'total_timesteps': 122356}
|
||||||
|
[2026-04-15 20:19:40] UCB=2.4994 mu=0.7140 σ=0.8927 params={'learning_rate': 0.0019730433529852004, 'steps_per_switch': 4578, 'total_timesteps': 144668}
|
||||||
|
[2026-04-15 20:19:40] UCB=2.4934 mu=1.4169 σ=0.5382 params={'learning_rate': 0.0018644207617691767, 'steps_per_switch': 3071, 'total_timesteps': 75515}
|
||||||
|
[2026-04-15 20:19:40] [Wave4] Proposed params: {'learning_rate': 0.0019521225364821895, 'steps_per_switch': 4185, 'total_timesteps': 110996}
|
||||||
|
[2026-04-15 20:19:42] [Wave4] Launching trial 8: {'learning_rate': 0.0019521225364821895, 'steps_per_switch': 4185, 'total_timesteps': 110996}
|
||||||
|
[2026-04-15 20:19:42] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 110996 --steps-per-switch 4185 --learning-rate 0.0019521225364821895 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0008
|
||||||
|
|
|
||||||
|
|
@ -3,3 +3,5 @@
|
||||||
{"trial": 3, "timestamp": "2026-04-15T13:39:32.676397", "params": {"learning_rate": 0.0008162408849407889, "steps_per_switch": 8441, "total_timesteps": 140634}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0003/model.zip", "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7222.726921319962}
|
{"trial": 3, "timestamp": "2026-04-15T13:39:32.676397", "params": {"learning_rate": 0.0008162408849407889, "steps_per_switch": 8441, "total_timesteps": 140634}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0003/model.zip", "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7222.726921319962}
|
||||||
{"trial": 4, "timestamp": "2026-04-15T15:39:37.385294", "params": {"learning_rate": 0.00020853884350577402, "steps_per_switch": 19927, "total_timesteps": 138928}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": null, "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7200.657395839691}
|
{"trial": 4, "timestamp": "2026-04-15T15:39:37.385294", "params": {"learning_rate": 0.00020853884350577402, "steps_per_switch": 19927, "total_timesteps": 138928}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": null, "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7200.657395839691}
|
||||||
{"trial": 5, "timestamp": "2026-04-15T17:08:50.252022", "params": {"learning_rate": 0.0007517877668650138, "steps_per_switch": 9368, "total_timesteps": 104878}, "combined_test_score": 31.73, "mini_monaco_reward": 31.73, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0005/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5348.823204040527}
|
{"trial": 5, "timestamp": "2026-04-15T17:08:50.252022", "params": {"learning_rate": 0.0007517877668650138, "steps_per_switch": 9368, "total_timesteps": 104878}, "combined_test_score": 31.73, "mini_monaco_reward": 31.73, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0005/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5348.823204040527}
|
||||||
|
{"trial": 6, "timestamp": "2026-04-15T18:19:30.998330", "params": {"learning_rate": 0.0016223486895735558, "steps_per_switch": 5524, "total_timesteps": 79532}, "combined_test_score": 176.6721, "mini_monaco_reward": 176.6721, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4235.310993909836}
|
||||||
|
{"trial": 7, "timestamp": "2026-04-15T20:19:38.497310", "params": {"learning_rate": 0.001779556625962812, "steps_per_switch": 4226, "total_timesteps": 123651}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": null, "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7203.425666332245}
|
||||||
|
|
|
||||||
|
|
@ -63,13 +63,14 @@ os.makedirs(MODELS_DIR, exist_ok=True)
|
||||||
os.makedirs(CHAMPION_DIR, exist_ok=True)
|
os.makedirs(CHAMPION_DIR, exist_ok=True)
|
||||||
|
|
||||||
# ---- Hyperparameter search space ----
|
# ---- Hyperparameter search space ----
|
||||||
# Wider LR range for scratch training (no warm-start prior to anchor it).
|
# total_timesteps capped at 90k. Actual throughput is ~16 steps/sec
|
||||||
# total_timesteps capped at 120k: at ~20 steps/sec, 120k = 100 min, well
|
# (mountain_track physics is heavier than estimated):
|
||||||
# within the 2-hour JOB_TIMEOUT. Trials 3 and 4 timed out at 140k steps.
|
# 90k steps = 94 min training + ~8 min overhead = ~102 min < 2hr limit
|
||||||
|
# Previous cap of 120k caused timeouts (120k = 126 min training alone).
|
||||||
PARAM_SPACE = {
|
PARAM_SPACE = {
|
||||||
'learning_rate': {'type': 'float', 'min': 1e-4, 'max': 2e-3},
|
'learning_rate': {'type': 'float', 'min': 1e-4, 'max': 2e-3},
|
||||||
'steps_per_switch': {'type': 'int', 'min': 3000, 'max': 20000},
|
'steps_per_switch': {'type': 'int', 'min': 3000, 'max': 15000},
|
||||||
'total_timesteps': {'type': 'int', 'min': 60000, 'max': 120000},
|
'total_timesteps': {'type': 'int', 'min': 60000, 'max': 90000},
|
||||||
}
|
}
|
||||||
PARAM_KEYS = list(PARAM_SPACE.keys())
|
PARAM_KEYS = list(PARAM_SPACE.keys())
|
||||||
|
|
||||||
|
|
@ -366,6 +367,29 @@ def launch_trial(params, trial_num):
|
||||||
proc.kill()
|
proc.kill()
|
||||||
log(f'[Wave4] Trial {trial_num} TIMED OUT — killing runner.')
|
log(f'[Wave4] Trial {trial_num} TIMED OUT — killing runner.')
|
||||||
output_lines.append(f'[TIMEOUT after {JOB_TIMEOUT}s]')
|
output_lines.append(f'[TIMEOUT after {JOB_TIMEOUT}s]')
|
||||||
|
# Rescue: if a checkpoint was saved mid-training, run a quick
|
||||||
|
# mini_monaco eval so the trial produces SOME GP data.
|
||||||
|
ckpt = os.path.join(save_dir, 'model.zip')
|
||||||
|
if os.path.exists(ckpt):
|
||||||
|
log(f'[Wave4] Timeout rescue: checkpoint found, running mini_monaco eval...')
|
||||||
|
rescue_cmd = [
|
||||||
|
'python3', RUNNER,
|
||||||
|
'--total-timesteps', '1', # skip training
|
||||||
|
'--steps-per-switch', '1',
|
||||||
|
'--learning-rate', str(float(params['learning_rate'])),
|
||||||
|
'--eval-episodes', '3',
|
||||||
|
'--save-dir', save_dir,
|
||||||
|
'--warm-start', ckpt,
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
r = subprocess.run(rescue_cmd, capture_output=True,
|
||||||
|
text=True, timeout=600)
|
||||||
|
rescue_out = r.stdout + '\n' + r.stderr
|
||||||
|
print(rescue_out[-2000:], flush=True)
|
||||||
|
output_lines.append(rescue_out)
|
||||||
|
log(f'[Wave4] Rescue eval complete.')
|
||||||
|
except Exception as re:
|
||||||
|
log(f'[Wave4] Rescue eval failed: {re}')
|
||||||
break
|
break
|
||||||
|
|
||||||
proc.wait()
|
proc.wait()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue