diff --git a/agent/outerloop-results/autoresearch_phase4_log.txt b/agent/outerloop-results/autoresearch_phase4_log.txt index 1903e9e..3e163cd 100644 --- a/agent/outerloop-results/autoresearch_phase4_log.txt +++ b/agent/outerloop-results/autoresearch_phase4_log.txt @@ -388,3 +388,103 @@ [2026-04-15 22:38:51] [Wave4] Proposed params: {'learning_rate': 0.0004346662414818233, 'steps_per_switch': 14860, 'total_timesteps': 83818} [2026-04-15 22:38:53] [Wave4] Launching trial 7: {'learning_rate': 0.0004346662414818233, 'steps_per_switch': 14860, 'total_timesteps': 83818} [2026-04-15 22:38:53] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 83818 --steps-per-switch 14860 --learning-rate 0.0004346662414818233 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0007 +[2026-04-15 22:47:30] ================================================================= +[2026-04-15 22:47:30] [Wave4] Multi-Track Autoresearch — GP+UCB Generalization Search +[2026-04-15 22:47:30] [Wave4] Training tracks : generated_track, mountain_track (no generated_road, no warm-start) +[2026-04-15 22:47:30] [Wave4] Test tracks : mini_monaco only (zero-shot; warren removed — broken done condition) +[2026-04-15 22:47:30] [Wave4] Max trials : 25 | kappa=2.0 | push every 5 +[2026-04-15 22:47:30] [Wave4] Results file : /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase4.jsonl +[2026-04-15 22:47:30] [Wave4] Champion dir : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-champion +[2026-04-15 22:47:30] [Wave4] Warm start : NONE (training from scratch each trial) +[2026-04-15 22:47:30] ================================================================= +[2026-04-15 22:47:30] [Wave4] Loaded 6 existing Phase 3 results. +[2026-04-15 22:47:30] [Wave4] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-15 22:47:30] [Wave4] Starting from trial 7. +[2026-04-15 22:47:30] +[Wave4] ========== Trial 7/25 ========== +[2026-04-15 22:47:30] [Wave4] GP UCB top-5 proposals: +[2026-04-15 22:47:30] UCB=3.2602 mu=2.0535 σ=0.6034 params={'learning_rate': 0.0003074676335173842, 'steps_per_switch': 14103, 'total_timesteps': 89493} +[2026-04-15 22:47:30] UCB=3.2334 mu=1.9885 σ=0.6224 params={'learning_rate': 0.00030011572199819647, 'steps_per_switch': 13772, 'total_timesteps': 89937} +[2026-04-15 22:47:30] UCB=3.1559 mu=2.0419 σ=0.5570 params={'learning_rate': 0.0003574517736244094, 'steps_per_switch': 14971, 'total_timesteps': 88076} +[2026-04-15 22:47:30] UCB=2.5661 mu=1.3681 σ=0.5990 params={'learning_rate': 0.00039989051435482747, 'steps_per_switch': 12483, 'total_timesteps': 89578} +[2026-04-15 22:47:30] UCB=2.5147 mu=0.7976 σ=0.8585 params={'learning_rate': 0.00013529496950197116, 'steps_per_switch': 12536, 'total_timesteps': 85905} +[2026-04-15 22:47:30] [Wave4] Proposed params: {'learning_rate': 0.0003074676335173842, 'steps_per_switch': 14103, 'total_timesteps': 89493} +[2026-04-15 22:47:32] [Wave4] Launching trial 7: {'learning_rate': 0.0003074676335173842, 'steps_per_switch': 14103, 'total_timesteps': 89493} +[2026-04-15 22:47:32] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 89493 --steps-per-switch 14103 --learning-rate 0.0003074676335173842 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0007 +[2026-04-15 23:45:56] [Wave4] Trial 7 finished in 3503.6s, rc=0 +[2026-04-15 23:45:56] [Wave4] Parsed: combined=80.9467 mini_monaco=80.9467 +[2026-04-15 23:45:56] [Wave4] ===== Trial 7 Summary ===== +[2026-04-15 23:45:56] GP data points : 7 +[2026-04-15 23:45:56] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-15 23:45:56] Top 5: +[2026-04-15 23:45:56] score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000} +[2026-04-15 23:45:56] score=176.67 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532} +[2026-04-15 23:45:56] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 23:45:56] score=80.95 params={'learning_rate': 0.0003074676335173842, 'steps_per_switch': 14103, 'total_timesteps': 89493} +[2026-04-15 23:45:56] score=42.30 params={'learning_rate': 0.0003, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-15 23:45:58] +[Wave4] ========== Trial 8/25 ========== +[2026-04-15 23:45:58] [Wave4] GP UCB top-5 proposals: +[2026-04-15 23:45:58] UCB=2.8761 mu=2.3811 σ=0.2475 params={'learning_rate': 0.0008478421211922181, 'steps_per_switch': 14326, 'total_timesteps': 89798} +[2026-04-15 23:45:58] UCB=2.7487 mu=1.8283 σ=0.4602 params={'learning_rate': 0.0010473338684621142, 'steps_per_switch': 13978, 'total_timesteps': 89106} +[2026-04-15 23:45:58] UCB=2.7282 mu=1.1574 σ=0.7854 params={'learning_rate': 0.0013992142565926369, 'steps_per_switch': 14988, 'total_timesteps': 89592} +[2026-04-15 23:45:58] UCB=2.1012 mu=1.4334 σ=0.3339 params={'learning_rate': 0.0008049497388184544, 'steps_per_switch': 13467, 'total_timesteps': 88845} +[2026-04-15 23:45:58] UCB=2.0136 mu=0.1079 σ=0.9528 params={'learning_rate': 0.0017586582805663546, 'steps_per_switch': 14607, 'total_timesteps': 89073} +[2026-04-15 23:45:58] [Wave4] Proposed params: {'learning_rate': 0.0008478421211922181, 'steps_per_switch': 14326, 'total_timesteps': 89798} +[2026-04-15 23:46:00] [Wave4] Launching trial 8: {'learning_rate': 0.0008478421211922181, 'steps_per_switch': 14326, 'total_timesteps': 89798} +[2026-04-15 23:46:00] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 89798 --steps-per-switch 14326 --learning-rate 0.0008478421211922181 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0008 +[2026-04-16 00:48:42] [Wave4] Trial 8 finished in 3762.1s, rc=0 +[2026-04-16 00:48:42] [Wave4] Parsed: combined=116.2588 mini_monaco=116.2588 +[2026-04-16 00:48:42] [Wave4] ===== Trial 8 Summary ===== +[2026-04-16 00:48:42] GP data points : 8 +[2026-04-16 00:48:42] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-16 00:48:42] Top 5: +[2026-04-16 00:48:42] score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000} +[2026-04-16 00:48:42] score=176.67 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532} +[2026-04-16 00:48:42] score=116.26 params={'learning_rate': 0.0008478421211922181, 'steps_per_switch': 14326, 'total_timesteps': 89798} +[2026-04-16 00:48:42] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-16 00:48:42] score=80.95 params={'learning_rate': 0.0003074676335173842, 'steps_per_switch': 14103, 'total_timesteps': 89493} +[2026-04-16 00:48:44] +[Wave4] ========== Trial 9/25 ========== +[2026-04-16 00:48:44] [Wave4] GP UCB top-5 proposals: +[2026-04-16 00:48:44] UCB=2.6052 mu=1.5807 σ=0.5122 params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893} +[2026-04-16 00:48:44] UCB=2.5103 mu=1.1029 σ=0.7037 params={'learning_rate': 0.0007143303795900681, 'steps_per_switch': 5097, 'total_timesteps': 88077} +[2026-04-16 00:48:44] UCB=2.3990 mu=0.8072 σ=0.7959 params={'learning_rate': 0.00027589394711147663, 'steps_per_switch': 5263, 'total_timesteps': 89853} +[2026-04-16 00:48:44] UCB=2.3635 mu=0.6759 σ=0.8438 params={'learning_rate': 0.0007616025453782968, 'steps_per_switch': 3774, 'total_timesteps': 88582} +[2026-04-16 00:48:44] UCB=2.3265 mu=0.9173 σ=0.7046 params={'learning_rate': 0.0011226509565016517, 'steps_per_switch': 5495, 'total_timesteps': 88334} +[2026-04-16 00:48:44] [Wave4] Proposed params: {'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893} +[2026-04-16 00:48:46] [Wave4] Launching trial 9: {'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893} +[2026-04-16 00:48:46] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 89893 --steps-per-switch 6851 --learning-rate 0.0007252855740444645 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0009 +[2026-04-16 02:11:13] [Wave4] Trial 9 finished in 4947.1s, rc=0 +[2026-04-16 02:11:13] [Wave4] Parsed: combined=1435.0401 mini_monaco=1435.0401 +[2026-04-16 02:11:13] [Wave4] ===== Trial 9 Summary ===== +[2026-04-16 02:11:13] GP data points : 9 +[2026-04-16 02:11:13] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-16 02:11:13] Top 5: +[2026-04-16 02:11:13] score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000} +[2026-04-16 02:11:13] score=1435.04 params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893} +[2026-04-16 02:11:13] score=176.67 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532} +[2026-04-16 02:11:13] score=116.26 params={'learning_rate': 0.0008478421211922181, 'steps_per_switch': 14326, 'total_timesteps': 89798} +[2026-04-16 02:11:13] score=93.39 params={'learning_rate': 0.001, 'steps_per_switch': 6000, 'total_timesteps': 80000} +[2026-04-16 02:11:15] +[Wave4] ========== Trial 10/25 ========== +[2026-04-16 02:11:15] [Wave4] GP UCB top-5 proposals: +[2026-04-16 02:11:15] UCB=2.0934 mu=0.9205 σ=0.5865 params={'learning_rate': 0.0010578566658808422, 'steps_per_switch': 4587, 'total_timesteps': 89501} +[2026-04-16 02:11:15] UCB=1.9984 mu=0.3915 σ=0.8034 params={'learning_rate': 0.00150594219964486, 'steps_per_switch': 5446, 'total_timesteps': 89972} +[2026-04-16 02:11:15] UCB=1.9795 mu=-0.0207 σ=1.0001 params={'learning_rate': 0.00187204813121039, 'steps_per_switch': 11474, 'total_timesteps': 60629} +[2026-04-16 02:11:15] UCB=1.9792 mu=-0.0197 σ=0.9994 params={'learning_rate': 0.0006516393606134399, 'steps_per_switch': 14385, 'total_timesteps': 60164} +[2026-04-16 02:11:15] UCB=1.9789 mu=-0.0200 σ=0.9995 params={'learning_rate': 0.001964829385849987, 'steps_per_switch': 10248, 'total_timesteps': 61072} +[2026-04-16 02:11:15] [Wave4] Proposed params: {'learning_rate': 0.0010578566658808422, 'steps_per_switch': 4587, 'total_timesteps': 89501} +[2026-04-16 02:11:17] [Wave4] Launching trial 10: {'learning_rate': 0.0010578566658808422, 'steps_per_switch': 4587, 'total_timesteps': 89501} +[2026-04-16 02:11:17] [Wave4] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 89501 --steps-per-switch 4587 --learning-rate 0.0010578566658808422 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0010 +[2026-04-16 03:31:41] [Wave4] Trial 10 finished in 4823.9s, rc=0 +[2026-04-16 03:31:41] [Wave4] Parsed: combined=141.3547 mini_monaco=141.3547 +[2026-04-16 03:31:41] [Wave4] ===== Trial 10 Summary ===== +[2026-04-16 03:31:41] GP data points : 10 +[2026-04-16 03:31:41] Wave4 Champion: trial=3 score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 157743} +[2026-04-16 03:31:41] Top 5: +[2026-04-16 03:31:41] score=1943.10 params={'learning_rate': 0.0006852550685205609, 'steps_per_switch': 17499, 'total_timesteps': 90000} +[2026-04-16 03:31:41] score=1435.04 params={'learning_rate': 0.0007252855740444645, 'steps_per_switch': 6851, 'total_timesteps': 89893} +[2026-04-16 03:31:41] score=176.67 params={'learning_rate': 0.0016223486895735558, 'steps_per_switch': 5524, 'total_timesteps': 79532} +[2026-04-16 03:31:41] score=141.35 params={'learning_rate': 0.0010578566658808422, 'steps_per_switch': 4587, 'total_timesteps': 89501} +[2026-04-16 03:31:41] score=116.26 params={'learning_rate': 0.0008478421211922181, 'steps_per_switch': 14326, 'total_timesteps': 89798} diff --git a/agent/outerloop-results/autoresearch_results_phase4.jsonl b/agent/outerloop-results/autoresearch_results_phase4.jsonl index 5cd802d..46e5dfe 100644 --- a/agent/outerloop-results/autoresearch_results_phase4.jsonl +++ b/agent/outerloop-results/autoresearch_results_phase4.jsonl @@ -4,3 +4,7 @@ {"trial": 6, "timestamp": "2026-04-15T18:19:30.998330", "params": {"learning_rate": 0.0016223486895735558, "steps_per_switch": 5524, "total_timesteps": 79532}, "combined_test_score": 176.6721, "mini_monaco_reward": 176.6721, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4235.310993909836} {"trial": 0, "timestamp": "2026-04-15T03:15:46", "params": {"learning_rate": 0.0006852550685205609, "steps_per_switch": 17499, "total_timesteps": 90000}, "combined_test_score": 1943.1, "mini_monaco_reward": 1943.1, "model_path": "agent/models/wave4-trial-0003/model.zip", "champion": true, "run_status": "ok_pre_patch", "elapsed_sec": 5766.0, "note": "Pre-exploit-patch overnight run. Model on disk. steps_per_switch capped for GP."} {"trial": 6, "timestamp": "2026-04-15T22:38:49.383770", "params": {"learning_rate": 0.0009434282949002715, "steps_per_switch": 14966, "total_timesteps": 83094}, "combined_test_score": 0.0, "mini_monaco_reward": 0.0, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0006/model.zip", "champion": false, "run_status": "error_rc101", "elapsed_sec": 712.563179731369} +{"trial": 7, "timestamp": "2026-04-15T23:45:56.207960", "params": {"learning_rate": 0.0003074676335173842, "steps_per_switch": 14103, "total_timesteps": 89493}, "combined_test_score": 80.9467, "mini_monaco_reward": 80.9467, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0007/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 3503.5840775966644} +{"trial": 8, "timestamp": "2026-04-16T00:48:42.336768", "params": {"learning_rate": 0.0008478421211922181, "steps_per_switch": 14326, "total_timesteps": 89798}, "combined_test_score": 116.2588, "mini_monaco_reward": 116.2588, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0008/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 3762.1077337265015} +{"trial": 9, "timestamp": "2026-04-16T02:11:13.466663", "params": {"learning_rate": 0.0007252855740444645, "steps_per_switch": 6851, "total_timesteps": 89893}, "combined_test_score": 1435.0401, "mini_monaco_reward": 1435.0401, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0009/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4947.0997133255005} +{"trial": 10, "timestamp": "2026-04-16T03:31:41.385487", "params": {"learning_rate": 0.0010578566658808422, "steps_per_switch": 4587, "total_timesteps": 89501}, "combined_test_score": 141.3547, "mini_monaco_reward": 141.3547, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave4-trial-0010/model.zip", "champion": false, "run_status": "ok", "elapsed_sec": 4823.869568347931}