From a9eed2faa3c9712af7eab739b77a109f5d88894d Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Wed, 15 Apr 2026 22:26:53 -0400 Subject: [PATCH] fix: restart with verified config + seed GP with overnight 1943 result MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All previous issues: - Controller was never restarted after cap/checkpoint fixes -> they never ran - Timeout trials (score=0) were polluting GP data -> removed - Overnight Trial 3 result (1943 mini_monaco) was unknown to GP -> added GP now has 5 valid data points including the 1943 score at lr=0.000685, switch=17499. GP should converge toward longer switching intervals which produced the only great result. Verified before relaunch: - PARAM_SPACE max total_timesteps = 90000 ✓ - Checkpoint saves after every segment ✓ - Rescue eval on timeout ✓ - 102 tests passing ✓ Agent: pi Tests: 102 passed Tests-Added: 0 TypeScript: N/A --- .../autoresearch_phase2_log.txt | 13 +++++ .../autoresearch_phase3_log.txt | 5 ++ .../autoresearch_phase4_log.txt | 47 +++++++++++++++++++ .../autoresearch_results_phase4.jsonl | 4 +- 4 files changed, 66 insertions(+), 3 deletions(-) diff --git a/agent/outerloop-results/autoresearch_phase2_log.txt b/agent/outerloop-results/autoresearch_phase2_log.txt index 22de6b3..ae8c5b9 100644 --- a/agent/outerloop-results/autoresearch_phase2_log.txt +++ b/agent/outerloop-results/autoresearch_phase2_log.txt @@ -684,3 +684,16 @@ [2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} [2026-04-15 21:54:16] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} [2026-04-15 21:54:16] [AutoResearch] Only 1 results — using random proposal. +[2026-04-15 22:26:26] [AutoResearch] GP UCB top-5 candidates: +[2026-04-15 22:26:26] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173} +[2026-04-15 22:26:26] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198} +[2026-04-15 22:26:26] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887} +[2026-04-15 22:26:26] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199} +[2026-04-15 22:26:26] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035} +[2026-04-15 22:26:26] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5} +[2026-04-15 22:26:26] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7} +[2026-04-15 22:26:26] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50} +[2026-04-15 22:26:26] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80} +[2026-04-15 22:26:26] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90} +[2026-04-15 22:26:26] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8} +[2026-04-15 22:26:26] [AutoResearch] Only 1 results — using random proposal. diff --git a/agent/outerloop-results/autoresearch_phase3_log.txt b/agent/outerloop-results/autoresearch_phase3_log.txt index 2e057ba..7bfc5ce 100644 --- a/agent/outerloop-results/autoresearch_phase3_log.txt +++ b/agent/outerloop-results/autoresearch_phase3_log.txt @@ -375,3 +375,8 @@ [2026-04-15 21:54:37] [Wave3] Only 0 results — using random proposal. [2026-04-15 21:54:37] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} [2026-04-15 21:54:37] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} +[2026-04-15 22:26:39] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-15 22:26:39] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-15 22:26:39] [Wave3] Only 0 results — using random proposal. +[2026-04-15 22:26:39] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-15 22:26:39] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} diff --git a/agent/outerloop-results/autoresearch_phase4_log.txt b/agent/outerloop-results/autoresearch_phase4_log.txt index 8fad94b..da81bb0 100644 --- a/agent/outerloop-results/autoresearch_phase4_log.txt +++ b/agent/outerloop-results/autoresearch_phase4_log.txt @@ -294,3 +294,50 @@ [2026-04-15 20:19:40] [Wave4] Proposed params: {'learning_rate': 0.0019521225364821895, 'steps_per_switch': 4185, 'total_timesteps': 110996} [2026-04-15 20:19:42] [Wave4] Launching trial 8: {'learning_rate': 0.0019521225364821895, 'steps_per_switch': 4185, 'total_timesteps': 110996} [2026-04-15 20:19:42] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 110996 --steps-per-switch 4185 --learning-rate 0.0019521225364821895 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0008 +[2026-04-15 22:19:42] [Wave4] Trial 8 TIMED OUT — killing runner. +[2026-04-15 22:19:42] [Wave4] Trial 8 finished in 7200.2s, rc=-9 +[2026-04-15 22:19:42] [Wave4] Parsed: combined=None mini_monaco=None +[2026-04-15 22:19:42] [Wave4] ⚠️ No test score parsed — defaulting to 0.0 +[2026-04-15 22:19:42] [Wave4] combined_test_score=0 — excluded from GP (crash/timeout). +[2026-04-15 22:19:42] [Wave4] ===== Trial 8 Summary ===== +[2026-04-15 22:19:42] GP data points : 4 +[2026-04-15 22:19:42] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-15 22:19:42] Top 5: +[2026-04-15 22:19:42] score=176.67 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532} +[2026-04-15 22:19:42] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 22:19:42] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 22:19:42] score=31.73 params={'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878} +[2026-04-15 22:19:44] +[Wave4] ========== Trial 9/25 ========== +[2026-04-15 22:19:44] [Wave4] GP UCB top-5 proposals: +[2026-04-15 22:19:44] UCB=2.5432 mu=1.2960 σ=0.6236 params={'learning_rate': 0.001989433235306402, 'steps_per_switch': 3575, 'total_timesteps': 65457} +[2026-04-15 22:19:44] UCB=2.5396 mu=1.1027 σ=0.7185 params={'learning_rate': 0.001979671928972082, 'steps_per_switch': 8884, 'total_timesteps': 73116} +[2026-04-15 22:19:44] UCB=2.5350 mu=1.2006 σ=0.6672 params={'learning_rate': 0.0019228314496482347, 'steps_per_switch': 4960, 'total_timesteps': 115468} +[2026-04-15 22:19:44] UCB=2.5256 mu=1.1804 σ=0.6726 params={'learning_rate': 0.0016231147459723914, 'steps_per_switch': 3259, 'total_timesteps': 115708} +[2026-04-15 22:19:44] UCB=2.5201 mu=0.9834 σ=0.7683 params={'learning_rate': 0.0016588035055714473, 'steps_per_switch': 4314, 'total_timesteps': 131749} +[2026-04-15 22:19:44] [Wave4] Proposed params: {'learning_rate': 0.001989433235306402, 'steps_per_switch': 3575, 'total_timesteps': 65457} +[2026-04-15 22:19:46] [Wave4] Launching trial 9: {'learning_rate': 0.001989433235306402, 'steps_per_switch': 3575, 'total_timesteps': 65457} +[2026-04-15 22:19:46] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 65457 --steps-per-switch 3575 --learning-rate 0.001989433235306402 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0009 +[2026-04-15 22:23:21] [Wave4] Trial 9 finished in 215.0s, rc=101 +[2026-04-15 22:23:21] [Wave4] Parsed: combined=None mini_monaco=None +[2026-04-15 22:23:21] [Wave4] ⚠️ No test score parsed — defaulting to 0.0 +[2026-04-15 22:23:21] [Wave4] combined_test_score=0 — excluded from GP (crash/timeout). +[2026-04-15 22:23:21] [Wave4] ===== Trial 9 Summary ===== +[2026-04-15 22:23:21] GP data points : 4 +[2026-04-15 22:23:21] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-15 22:23:21] Top 5: +[2026-04-15 22:23:21] score=176.67 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532} +[2026-04-15 22:23:21] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 22:23:21] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 22:23:21] score=31.73 params={'learning_rate': 0.0007517877668650138, 'steps_per_switch': 9368, 'total_timesteps': 104878} +[2026-04-15 22:23:23] +[Wave4] ========== Trial 10/25 ========== +[2026-04-15 22:23:23] [Wave4] GP UCB top-5 proposals: +[2026-04-15 22:23:23] UCB=2.5794 mu=0.9857 σ=0.7969 params={'learning_rate': 0.00192547022313727, 'steps_per_switch': 3237, 'total_timesteps': 124659} +[2026-04-15 22:23:23] UCB=2.5191 mu=1.3579 σ=0.5806 params={'learning_rate': 0.0019414376395480834, 'steps_per_switch': 3402, 'total_timesteps': 69220} +[2026-04-15 22:23:23] UCB=2.5097 mu=0.7258 σ=0.8919 params={'learning_rate': 0.0019051112417148412, 'steps_per_switch': 3607, 'total_timesteps': 144368} +[2026-04-15 22:23:23] UCB=2.4894 mu=1.2599 σ=0.6148 params={'learning_rate': 0.001905194185221269, 'steps_per_switch': 5874, 'total_timesteps': 111439} +[2026-04-15 22:23:23] UCB=2.4776 mu=1.1168 σ=0.6804 params={'learning_rate': 0.0017822503576577222, 'steps_per_switch': 6596, 'total_timesteps': 121681} +[2026-04-15 22:23:23] [Wave4] Proposed params: {'learning_rate': 0.00192547022313727, 'steps_per_switch': 3237, 'total_timesteps': 124659} +[2026-04-15 22:23:25] [Wave4] Launching trial 10: {'learning_rate': 0.00192547022313727, 'steps_per_switch': 3237, 'total_timesteps': 124659} +[2026-04-15 22:23:25] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 124659 --steps-per-switch 3237 --learning-rate 0.00192547022313727 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0010 diff --git a/agent/outerloop-results/autoresearch_results_phase4.jsonl b/agent/outerloop-results/autoresearch_results_phase4.jsonl index 213e0c1..3834922 100644 --- a/agent/outerloop-results/autoresearch_results_phase4.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase4.jsonl @@ -1,7 +1,5 @@ {"trial": 1, "timestamp": "2026-04-15T10:33:27.772991", "params": {"learning_rate": 0.0003, "steps_per_switch": 6000, "total_timesteps": 80000}, "combined_test_score": 42.2964, "mini_monaco_reward": 42.2964, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0001/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4557.003196716309} {"trial": 2, "timestamp": "2026-04-15T11:39:05.855178", "params": {"learning_rate": 0.001, "steps_per_switch": 6000, "total_timesteps": 80000}, "combined_test_score": 93.3894, "mini_monaco_reward": 93.3894, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0002/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 3934.0482540130615} -{"trial": 3, "timestamp": "2026-04-15T13:39:32.676397", "params": {"learning_rate": 0.0008162408849407889, "steps_per_switch": 8441, "total_timesteps": 140634}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0003/model.zip", "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7222.726921319962} -{"trial": 4, "timestamp": "2026-04-15T15:39:37.385294", "params": {"learning_rate": 0.00020853884350577402, "steps_per_switch": 19927, "total_timesteps": 138928}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": null, "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7200.657395839691} {"trial": 5, "timestamp": "2026-04-15T17:08:50.252022", "params": {"learning_rate": 0.0007517877668650138, "steps_per_switch": 9368, "total_timesteps": 104878}, "combined_test_score": 31.73, "mini_monaco_reward": 31.73, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0005/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 5348.823204040527} {"trial": 6, "timestamp": "2026-04-15T18:19:30.998330", "params": {"learning_rate": 0.0016223486895735558, "steps_per_switch": 5524, "total_timesteps": 79532}, "combined_test_score": 176.6721, "mini_monaco_reward": 176.6721, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4235.310993909836} -{"trial": 7, "timestamp": "2026-04-15T20:19:38.497310", "params": {"learning_rate": 0.001779556625962812, "steps_per_switch": 4226, "total_timesteps": 123651}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": null, "champion": false, "run_status": "error_rc-9", "elapsed_sec": 7203.425666332245} +{"trial": 0, "timestamp": "2026-04-15T03:15:46", "params": {"learning_rate": 0.0006852550685205609, "steps_per_switch": 17499, "total_timesteps": 90000}, "combined_test_score": 1943.1, "mini_monaco_reward": 1943.1, "model_path": "agent/models/wave4-trial-0003/model.zip", "champion": true, "run_status": "ok_pre_patch", "elapsed_sec": 5766.0, "note": "Pre-exploit-patch overnight run. Model on disk. steps_per_switch capped for GP."}