wave3: autoresearch trial 5 results

Agent: pi
Tests: N/A
Tests-Added: 0
TypeScript: N/A
This commit is contained in:
Paul Huliganga 2026-04-14 18:22:44 -04:00
parent 349396f967
commit 2a747bb97c
3 changed files with 98 additions and 7 deletions

View File

@ -1,12 +1,12 @@
{ {
"trial": 1, "trial": 5,
"timestamp": "2026-04-14T14:34:25.738395", "timestamp": "2026-04-14T18:22:44.799337",
"params": { "params": {
"learning_rate": 0.000225, "learning_rate": 0.0008293130840877947,
"steps_per_switch": 5000, "steps_per_switch": 7847,
"total_timesteps": 45000 "total_timesteps": 31625
}, },
"combined_test_score": 24.7695, "combined_test_score": 137.5814,
"mini_monaco_reward": 24.7695, "mini_monaco_reward": 137.5814,
"model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-champion/model.zip" "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-champion/model.zip"
} }

View File

@ -111,3 +111,90 @@
[2026-04-14 15:12:53] [Wave3] Only 0 results — using random proposal. [2026-04-14 15:12:53] [Wave3] Only 0 results — using random proposal.
[2026-04-14 15:12:53] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} [2026-04-14 15:12:53] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
[2026-04-14 15:12:53] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} [2026-04-14 15:12:53] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
[2026-04-14 15:13:16] =================================================================
[2026-04-14 15:13:16] [Wave3] Multi-Track Autoresearch — GP+UCB Generalization Search
[2026-04-14 15:13:16] [Wave3] Training tracks : generated_road, generated_track, mountain_track
[2026-04-14 15:13:16] [Wave3] Test tracks : mini_monaco only (zero-shot; warren removed — broken done condition)
[2026-04-14 15:13:16] [Wave3] Max trials : 25 | kappa=2.0 | push every 5
[2026-04-14 15:13:16] [Wave3] Results file : /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase3.jsonl
[2026-04-14 15:13:16] [Wave3] Champion dir : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-champion
[2026-04-14 15:13:16] [Wave3] Warm start : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip
[2026-04-14 15:13:16] =================================================================
[2026-04-14 15:13:16] [Wave3] Loaded 1 existing Phase 3 results.
[2026-04-14 15:13:16] [Wave3] Wave3 Champion: trial=1 score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000}
[2026-04-14 15:13:16] [Wave3] Starting from trial 2.
[2026-04-14 15:13:16]
[Wave3] ========== Trial 2/25 ==========
[2026-04-14 15:13:16] [Wave3] Seed trial 2/2: using hardcoded params.
[2026-04-14 15:13:16] [Wave3] Proposed params: {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000}
[2026-04-14 15:13:18] [Wave3] Launching trial 2: {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000}
[2026-04-14 15:13:18] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 90000 --steps-per-switch 10000 --learning-rate 0.000225 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0002 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip
[2026-04-14 16:33:15] [Wave3] Trial 2 finished in 4797.4s, rc=0
[2026-04-14 16:33:15] [Wave3] Parsed: combined=14.61 mini_monaco=14.61
[2026-04-14 16:33:15] [Wave3] ===== Trial 2 Summary =====
[2026-04-14 16:33:15] GP data points : 2
[2026-04-14 16:33:15] Wave3 Champion: trial=1 score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000}
[2026-04-14 16:33:15] Top 5:
[2026-04-14 16:33:15] score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000}
[2026-04-14 16:33:15] score=14.61 params={'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000}
[2026-04-14 16:33:17]
[Wave3] ========== Trial 3/25 ==========
[2026-04-14 16:33:17] [Wave3] Only 2 results — using random proposal.
[2026-04-14 16:33:17] [Wave3] Proposed params: {'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040}
[2026-04-14 16:33:19] [Wave3] Launching trial 3: {'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040}
[2026-04-14 16:33:19] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 31040 --steps-per-switch 6993 --learning-rate 0.0004302041414294587 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0003 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip
[2026-04-14 17:07:08] [Wave3] Trial 3 finished in 2029.0s, rc=0
[2026-04-14 17:07:08] [Wave3] Parsed: combined=27.6387 mini_monaco=27.6387
[2026-04-14 17:07:09] [Champion] 🏆 NEW BEST! Trial 3: score=27.64 (mini_monaco=27.6) params={'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040}
[2026-04-14 17:07:09] [Wave3] ===== Trial 3 Summary =====
[2026-04-14 17:07:09] GP data points : 3
[2026-04-14 17:07:09] Wave3 Champion: trial=3 score=27.64 params={'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040}
[2026-04-14 17:07:09] Top 5:
[2026-04-14 17:07:09] score=27.64 params={'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040}
[2026-04-14 17:07:09] score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000}
[2026-04-14 17:07:09] score=14.61 params={'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000}
[2026-04-14 17:07:11]
[Wave3] ========== Trial 4/25 ==========
[2026-04-14 17:07:11] [Wave3] GP UCB top-5 proposals:
[2026-04-14 17:07:11] UCB=2.2675 mu=0.3935 σ=0.9370 params={'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110}
[2026-04-14 17:07:11] UCB=2.2527 mu=0.5001 σ=0.8763 params={'learning_rate': 0.0006709448038149618, 'steps_per_switch': 4056, 'total_timesteps': 46285}
[2026-04-14 17:07:11] UCB=2.2459 mu=0.6168 σ=0.8145 params={'learning_rate': 0.0006364798923287126, 'steps_per_switch': 4185, 'total_timesteps': 39120}
[2026-04-14 17:07:11] UCB=2.2458 mu=0.6302 σ=0.8078 params={'learning_rate': 0.0006330433971389486, 'steps_per_switch': 4177, 'total_timesteps': 37673}
[2026-04-14 17:07:11] UCB=2.2435 mu=0.5485 σ=0.8475 params={'learning_rate': 0.0005264768982970893, 'steps_per_switch': 2587, 'total_timesteps': 32853}
[2026-04-14 17:07:11] [Wave3] Proposed params: {'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110}
[2026-04-14 17:07:13] [Wave3] Launching trial 4: {'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110}
[2026-04-14 17:07:13] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 31110 --steps-per-switch 2400 --learning-rate 0.0006723430224246657 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0004 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip
[2026-04-14 17:52:55] [Wave3] Trial 4 finished in 2742.5s, rc=0
[2026-04-14 17:52:55] [Wave3] Parsed: combined=28.0934 mini_monaco=28.0934
[2026-04-14 17:52:56] [Champion] 🏆 NEW BEST! Trial 4: score=28.09 (mini_monaco=28.1) params={'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110}
[2026-04-14 17:52:56] [Wave3] ===== Trial 4 Summary =====
[2026-04-14 17:52:56] GP data points : 4
[2026-04-14 17:52:56] Wave3 Champion: trial=4 score=28.09 params={'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110}
[2026-04-14 17:52:56] Top 5:
[2026-04-14 17:52:56] score=28.09 params={'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110}
[2026-04-14 17:52:56] score=27.64 params={'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040}
[2026-04-14 17:52:56] score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000}
[2026-04-14 17:52:56] score=14.61 params={'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000}
[2026-04-14 17:52:58]
[Wave3] ========== Trial 5/25 ==========
[2026-04-14 17:52:58] [Wave3] GP UCB top-5 proposals:
[2026-04-14 17:52:58] UCB=2.2084 mu=0.4374 σ=0.8855 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625}
[2026-04-14 17:52:58] UCB=2.1978 mu=0.3485 σ=0.9246 params={'learning_rate': 0.0008923041447868616, 'steps_per_switch': 7241, 'total_timesteps': 43058}
[2026-04-14 17:52:58] UCB=2.1976 mu=0.3420 σ=0.9278 params={'learning_rate': 0.0008833374140842175, 'steps_per_switch': 7589, 'total_timesteps': 42051}
[2026-04-14 17:52:58] UCB=2.1805 mu=0.3449 σ=0.9178 params={'learning_rate': 0.0009537044683873197, 'steps_per_switch': 6193, 'total_timesteps': 33355}
[2026-04-14 17:52:58] UCB=2.1634 mu=0.2762 σ=0.9436 params={'learning_rate': 0.0008911867335423042, 'steps_per_switch': 6839, 'total_timesteps': 57412}
[2026-04-14 17:52:58] [Wave3] Proposed params: {'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625}
[2026-04-14 17:53:00] [Wave3] Launching trial 5: {'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625}
[2026-04-14 17:53:00] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 31625 --steps-per-switch 7847 --learning-rate 0.0008293130840877947 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0005 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip
[2026-04-14 18:22:44] [Wave3] Trial 5 finished in 1784.4s, rc=0
[2026-04-14 18:22:44] [Wave3] Parsed: combined=137.5814 mini_monaco=137.5814
[2026-04-14 18:22:44] [Champion] 🏆 NEW BEST! Trial 5: score=137.58 (mini_monaco=137.6) params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625}
[2026-04-14 18:22:44] [Wave3] ===== Trial 5 Summary =====
[2026-04-14 18:22:44] GP data points : 5
[2026-04-14 18:22:44] Wave3 Champion: trial=5 score=137.58 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625}
[2026-04-14 18:22:44] Top 5:
[2026-04-14 18:22:44] score=137.58 params={'learning_rate': 0.0008293130840877947, 'steps_per_switch': 7847, 'total_timesteps': 31625}
[2026-04-14 18:22:44] score=28.09 params={'learning_rate': 0.0006723430224246657, 'steps_per_switch': 2400, 'total_timesteps': 31110}
[2026-04-14 18:22:44] score=27.64 params={'learning_rate': 0.0004302041414294587, 'steps_per_switch': 6993, 'total_timesteps': 31040}
[2026-04-14 18:22:44] score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000}
[2026-04-14 18:22:44] score=14.61 params={'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000}

View File

@ -1 +1,5 @@
{"trial": 1, "timestamp": "2026-04-14T14:34:25.739734", "params": {"learning_rate": 0.000225, "steps_per_switch": 5000, "total_timesteps": 45000}, "combined_test_score": 24.7695, "mini_monaco_reward": 24.7695, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2808.679010629654} {"trial": 1, "timestamp": "2026-04-14T14:34:25.739734", "params": {"learning_rate": 0.000225, "steps_per_switch": 5000, "total_timesteps": 45000}, "combined_test_score": 24.7695, "mini_monaco_reward": 24.7695, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2808.679010629654}
{"trial": 2, "timestamp": "2026-04-14T16:33:15.893806", "params": {"learning_rate": 0.000225, "steps_per_switch": 10000, "total_timesteps": 90000}, "combined_test_score": 14.61, "mini_monaco_reward": 14.61, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0002/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4797.378093481064}
{"trial": 3, "timestamp": "2026-04-14T17:07:09.327044", "params": {"learning_rate": 0.0004302041414294587, "steps_per_switch": 6993, "total_timesteps": 31040}, "combined_test_score": 27.6387, "mini_monaco_reward": 27.6387, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0003/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2028.9967634677887}
{"trial": 4, "timestamp": "2026-04-14T17:52:56.249319", "params": {"learning_rate": 0.0006723430224246657, "steps_per_switch": 2400, "total_timesteps": 31110}, "combined_test_score": 28.0934, "mini_monaco_reward": 28.0934, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0004/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2742.52951335907}
{"trial": 5, "timestamp": "2026-04-14T18:22:44.803514", "params": {"learning_rate": 0.0008293130840877947, "steps_per_switch": 7847, "total_timesteps": 31625}, "combined_test_score": 137.5814, "mini_monaco_reward": 137.5814, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0005/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 1784.4159185886383}