From 2a747bb97c33c2ec4080f94d16199c612f970d06 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Tue, 14 Apr 2026 18:22:44 -0400 Subject: [PATCH] wave3: autoresearch trial 5 results Agent: pi Tests: N/A Tests-Added: 0 TypeScript: N/A --- agent/models/wave3-champion/manifest.json | 14 +-- .../autoresearch_phase3_log.txt | 87 +++++++++++++++++++ .../autoresearch_results_phase3.jsonl | 4 + 3 files changed, 98 insertions(+), 7 deletions(-) diff --git a/agent/models/wave3-champion/manifest.json b/agent/models/wave3-champion/manifest.json index 03afa70..d0c9f96 100644 --- a/agent/models/wave3-champion/manifest.json +++ b/agent/models/wave3-champion/manifest.json @@ -1,12 +1,12 @@ { - "trial": 1, - "timestamp": "2026-04-14T14:34:25.738395", + "trial": 5, + "timestamp": "2026-04-14T18:22:44.799337", "params": { - "learning_rate": 0.000225, - "steps_per_switch": 5000, - "total_timesteps": 45000 + "learning_rate": 0.0008293130840877947, + "steps_per_switch": 7847, + "total_timesteps": 31625 }, - "combined_test_score": 24.7695, - "mini_monaco_reward": 24.7695, + "combined_test_score": 137.5814, + "mini_monaco_reward": 137.5814, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-champion/model.zip" } \ No newline at end of file diff --git a/agent/outerloop-results/autoresearch_phase3_log.txt b/agent/outerloop-results/autoresearch_phase3_log.txt index 08e2f1c..819b70b 100644 --- a/agent/outerloop-results/autoresearch_phase3_log.txt +++ b/agent/outerloop-results/autoresearch_phase3_log.txt @@ -111,3 +111,90 @@ [2026-04-14 15:12:53] [Wave3] Only 0 results — using random proposal. [2026-04-14 15:12:53] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} [2026-04-14 15:12:53] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} +[2026-04-14 15:13:16] ================================================================= +[2026-04-14 15:13:16] [Wave3] Multi-Track Autoresearch — GP+UCB Generalization Search +[2026-04-14 15:13:16] [Wave3] Training tracks : generated_road, generated_track, mountain_track +[2026-04-14 15:13:16] [Wave3] Test tracks : mini_monaco only (zero-shot; warren removed — broken done condition) +[2026-04-14 15:13:16] [Wave3] Max trials : 25 | kappa=2.0 | push every 5 +[2026-04-14 15:13:16] [Wave3] Results file : /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase3.jsonl +[2026-04-14 15:13:16] [Wave3] Champion dir : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-champion +[2026-04-14 15:13:16] [Wave3] Warm start : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 15:13:16] ================================================================= +[2026-04-14 15:13:16] [Wave3] Loaded 1 existing Phase 3 results. +[2026-04-14 15:13:16] [Wave3] Wave3 Champion: trial=1 score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 15:13:16] [Wave3] Starting from trial 2. +[2026-04-14 15:13:16] +[Wave3] ========== Trial 2/25 ========== +[2026-04-14 15:13:16] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 15:13:16] [Wave3] Proposed params: {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000} +[2026-04-14 15:13:18] [Wave3] Launching trial 2: {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000} +[2026-04-14 15:13:18] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 90000 --steps-per-switch 10000 --learning-rate 0.000225 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0002 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 16:33:15] [Wave3] Trial 2 finished in 4797.4s, rc=0 +[2026-04-14 16:33:15] [Wave3] Parsed: combined=14.61 mini_monaco=14.61 +[2026-04-14 16:33:15] [Wave3] ===== Trial 2 Summary ===== +[2026-04-14 16:33:15] GP data points : 2 +[2026-04-14 16:33:15] Wave3 Champion: trial=1 score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 16:33:15] Top 5: +[2026-04-14 16:33:15] score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 16:33:15] score=14.61 params={'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000} +[2026-04-14 16:33:17] +[Wave3] ========== Trial 3/25 ========== +[2026-04-14 16:33:17] [Wave3] Only 2 results — using random proposal. +[2026-04-14 16:33:17] [Wave3] Proposed params: {'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040} +[2026-04-14 16:33:19] [Wave3] Launching trial 3: {'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040} +[2026-04-14 16:33:19] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 31040 --steps-per-switch 6993 --learning-rate 0.0004302041414294587 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0003 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 17:07:08] [Wave3] Trial 3 finished in 2029.0s, rc=0 +[2026-04-14 17:07:08] [Wave3] Parsed: combined=27.6387 mini_monaco=27.6387 +[2026-04-14 17:07:09] [Champion] 🏆 NEW BEST! Trial 3: score=27.64 (mini_monaco=27.6) params={'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040} +[2026-04-14 17:07:09] [Wave3] ===== Trial 3 Summary ===== +[2026-04-14 17:07:09] GP data points : 3 +[2026-04-14 17:07:09] Wave3 Champion: trial=3 score=27.64 params={'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040} +[2026-04-14 17:07:09] Top 5: +[2026-04-14 17:07:09] score=27.64 params={'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040} +[2026-04-14 17:07:09] score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 17:07:09] score=14.61 params={'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000} +[2026-04-14 17:07:11] +[Wave3] ========== Trial 4/25 ========== +[2026-04-14 17:07:11] [Wave3] GP UCB top-5 proposals: +[2026-04-14 17:07:11] UCB=2.2675 mu=0.3935 σ=0.9370 params={'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110} +[2026-04-14 17:07:11] UCB=2.2527 mu=0.5001 σ=0.8763 params={'learning_rate': 0.0006709448038149618, 'steps_per_switch': 4056, 'total_timesteps': 46285} +[2026-04-14 17:07:11] UCB=2.2459 mu=0.6168 σ=0.8145 params={'learning_rate': 0.0006364798923287126, 'steps_per_switch': 4185, 'total_timesteps': 39120} +[2026-04-14 17:07:11] UCB=2.2458 mu=0.6302 σ=0.8078 params={'learning_rate': 0.0006330433971389486, 'steps_per_switch': 4177, 'total_timesteps': 37673} +[2026-04-14 17:07:11] UCB=2.2435 mu=0.5485 σ=0.8475 params={'learning_rate': 0.0005264768982970893, 'steps_per_switch': 2587, 'total_timesteps': 32853} +[2026-04-14 17:07:11] [Wave3] Proposed params: {'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110} +[2026-04-14 17:07:13] [Wave3] Launching trial 4: {'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110} +[2026-04-14 17:07:13] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 31110 --steps-per-switch 2400 --learning-rate 0.0006723430224246657 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0004 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 17:52:55] [Wave3] Trial 4 finished in 2742.5s, rc=0 +[2026-04-14 17:52:55] [Wave3] Parsed: combined=28.0934 mini_monaco=28.0934 +[2026-04-14 17:52:56] [Champion] 🏆 NEW BEST! Trial 4: score=28.09 (mini_monaco=28.1) params={'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110} +[2026-04-14 17:52:56] [Wave3] ===== Trial 4 Summary ===== +[2026-04-14 17:52:56] GP data points : 4 +[2026-04-14 17:52:56] Wave3 Champion: trial=4 score=28.09 params={'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110} +[2026-04-14 17:52:56] Top 5: +[2026-04-14 17:52:56] score=28.09 params={'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110} +[2026-04-14 17:52:56] score=27.64 params={'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040} +[2026-04-14 17:52:56] score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 17:52:56] score=14.61 params={'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000} +[2026-04-14 17:52:58] +[Wave3] ========== Trial 5/25 ========== +[2026-04-14 17:52:58] [Wave3] GP UCB top-5 proposals: +[2026-04-14 17:52:58] UCB=2.2084 mu=0.4374 σ=0.8855 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 17:52:58] UCB=2.1978 mu=0.3485 σ=0.9246 params={'learning_rate': 0.0008923041447868616, 'steps_per_switch': 7241, 'total_timesteps': 43058} +[2026-04-14 17:52:58] UCB=2.1976 mu=0.3420 σ=0.9278 params={'learning_rate': 0.0008833374140842175, 'steps_per_switch': 7589, 'total_timesteps': 42051} +[2026-04-14 17:52:58] UCB=2.1805 mu=0.3449 σ=0.9178 params={'learning_rate': 0.0009537044683873197, 'steps_per_switch': 6193, 'total_timesteps': 33355} +[2026-04-14 17:52:58] UCB=2.1634 mu=0.2762 σ=0.9436 params={'learning_rate': 0.0008911867335423042, 'steps_per_switch': 6839, 'total_timesteps': 57412} +[2026-04-14 17:52:58] [Wave3] Proposed params: {'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 17:53:00] [Wave3] Launching trial 5: {'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 17:53:00] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 31625 --steps-per-switch 7847 --learning-rate 0.0008293130840877947 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0005 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 18:22:44] [Wave3] Trial 5 finished in 1784.4s, rc=0 +[2026-04-14 18:22:44] [Wave3] Parsed: combined=137.5814 mini_monaco=137.5814 +[2026-04-14 18:22:44] [Champion] 🏆 NEW BEST! Trial 5: score=137.58 (mini_monaco=137.6) params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 18:22:44] [Wave3] ===== Trial 5 Summary ===== +[2026-04-14 18:22:44] GP data points : 5 +[2026-04-14 18:22:44] Wave3 Champion: trial=5 score=137.58 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 18:22:44] Top 5: +[2026-04-14 18:22:44] score=137.58 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625} +[2026-04-14 18:22:44] score=28.09 params={'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110} +[2026-04-14 18:22:44] score=27.64 params={'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040} +[2026-04-14 18:22:44] score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 18:22:44] score=14.61 params={'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000} diff --git a/agent/outerloop-results/autoresearch_results_phase3.jsonl b/agent/outerloop-results/autoresearch_results_phase3.jsonl index 6900f87..1b2f6f3 100644 --- a/agent/outerloop-results/autoresearch_results_phase3.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase3.jsonl @@ -1 +1,5 @@ {"trial": 1, "timestamp": "2026-04-14T14:34:25.739734", "params": {"learning_rate": 0.000225, "steps_per_switch": 5000, "total_timesteps": 45000}, "combined_test_score": 24.7695, "mini_monaco_reward": 24.7695, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2808.679010629654} +{"trial": 2, "timestamp": "2026-04-14T16:33:15.893806", "params": {"learning_rate": 0.000225, "steps_per_switch": 10000, "total_timesteps": 90000}, "combined_test_score": 14.61, "mini_monaco_reward": 14.61, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0002/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4797.378093481064} +{"trial": 3, "timestamp": "2026-04-14T17:07:09.327044", "params": {"learning_rate": 0.0004302041414294587, "steps_per_switch": 6993, "total_timesteps": 31040}, "combined_test_score": 27.6387, "mini_monaco_reward": 27.6387, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0003/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2028.9967634677887} +{"trial": 4, "timestamp": "2026-04-14T17:52:56.249319", "params": {"learning_rate": 0.0006723430224246657, "steps_per_switch": 2400, "total_timesteps": 31110}, "combined_test_score": 28.0934, "mini_monaco_reward": 28.0934, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0004/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2742.52951335907} +{"trial": 5, "timestamp": "2026-04-14T18:22:44.803514", "params": {"learning_rate": 0.0008293130840877947, "steps_per_switch": 7847, "total_timesteps": 31625}, "combined_test_score": 137.5814, "mini_monaco_reward": 137.5814, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0005/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 1784.4159185886383}