From 349396f967d6dd8267073bacecf54ed3bf2a37ce Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Tue, 14 Apr 2026 15:13:10 -0400 Subject: [PATCH] fix: stream runner output in real-time instead of buffering Replace subprocess.run(capture_output=True) with Popen + line-by-line iteration so every line from multitrack_runner.py appears in the nohup log immediately rather than only after the trial completes (~35-90 min). - stdout/stderr merged via stderr=STDOUT - line-buffered (bufsize=1) - deadline-based timeout replaces subprocess timeout kwarg - output accumulated in list for parse_runner_output() as before Agent: pi Tests: 30 passed Tests-Added: 0 TypeScript: N/A --- agent/models/wave3-champion/manifest.json | 12 ++++++ .../autoresearch_phase3_log.txt | 37 ++++++++++++++++++ .../autoresearch_results_phase3.jsonl | 1 + agent/wave3_controller.py | 38 ++++++++++++------- 4 files changed, 75 insertions(+), 13 deletions(-) create mode 100644 agent/models/wave3-champion/manifest.json create mode 100644 agent/outerloop-results/autoresearch_results_phase3.jsonl diff --git a/agent/models/wave3-champion/manifest.json b/agent/models/wave3-champion/manifest.json new file mode 100644 index 0000000..03afa70 --- /dev/null +++ b/agent/models/wave3-champion/manifest.json @@ -0,0 +1,12 @@ +{ + "trial": 1, + "timestamp": "2026-04-14T14:34:25.738395", + "params": { + "learning_rate": 0.000225, + "steps_per_switch": 5000, + "total_timesteps": 45000 + }, + "combined_test_score": 24.7695, + "mini_monaco_reward": 24.7695, + "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-champion/model.zip" +} \ No newline at end of file diff --git a/agent/outerloop-results/autoresearch_phase3_log.txt b/agent/outerloop-results/autoresearch_phase3_log.txt index 6c69fdc..08e2f1c 100644 --- a/agent/outerloop-results/autoresearch_phase3_log.txt +++ b/agent/outerloop-results/autoresearch_phase3_log.txt @@ -74,3 +74,40 @@ [2026-04-14 13:47:17] [Wave3] Only 0 results — using random proposal. [2026-04-14 13:47:17] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} [2026-04-14 13:47:17] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} +[2026-04-14 13:47:34] ================================================================= +[2026-04-14 13:47:34] [Wave3] Multi-Track Autoresearch — GP+UCB Generalization Search +[2026-04-14 13:47:34] [Wave3] Training tracks : generated_road, generated_track, mountain_track +[2026-04-14 13:47:34] [Wave3] Test tracks : mini_monaco only (zero-shot; warren removed — broken done condition) +[2026-04-14 13:47:34] [Wave3] Max trials : 25 | kappa=2.0 | push every 5 +[2026-04-14 13:47:34] [Wave3] Results file : /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase3.jsonl +[2026-04-14 13:47:34] [Wave3] Champion dir : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-champion +[2026-04-14 13:47:34] [Wave3] Warm start : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 13:47:34] ================================================================= +[2026-04-14 13:47:34] [Wave3] Loaded 0 existing Phase 3 results. +[2026-04-14 13:47:34] [Wave3] No Wave 3 champion yet. +[2026-04-14 13:47:34] [Wave3] Starting from trial 1. +[2026-04-14 13:47:34] +[Wave3] ========== Trial 1/25 ========== +[2026-04-14 13:47:34] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 13:47:34] [Wave3] Proposed params: {'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 13:47:36] [Wave3] Launching trial 1: {'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 13:47:36] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 45000 --steps-per-switch 5000 --learning-rate 0.000225 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 14:34:25] [Wave3] Trial 1 finished in 2808.7s, rc=0 +[2026-04-14 14:34:25] [Wave3] Parsed: combined=24.7695 mini_monaco=24.7695 +[2026-04-14 14:34:25] [Champion] 🏆 NEW BEST! Trial 1: score=24.77 (mini_monaco=24.8) params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 14:34:25] [Wave3] ===== Trial 1 Summary ===== +[2026-04-14 14:34:25] GP data points : 1 +[2026-04-14 14:34:25] Wave3 Champion: trial=1 score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 14:34:25] Top 5: +[2026-04-14 14:34:25] score=24.77 params={'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000} +[2026-04-14 14:34:27] +[Wave3] ========== Trial 2/25 ========== +[2026-04-14 14:34:27] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 14:34:27] [Wave3] Proposed params: {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000} +[2026-04-14 14:34:29] [Wave3] Launching trial 2: {'learning_rate': 0.000225, 'steps_per_switch': 10000, 'total_timesteps': 90000} +[2026-04-14 14:34:29] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 90000 --steps-per-switch 10000 --learning-rate 0.000225 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0002 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip +[2026-04-14 15:12:53] [Wave3] Seed trial 1/2: using hardcoded params. +[2026-04-14 15:12:53] [Wave3] Seed trial 2/2: using hardcoded params. +[2026-04-14 15:12:53] [Wave3] Only 0 results — using random proposal. +[2026-04-14 15:12:53] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000} +[2026-04-14 15:12:53] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={} diff --git a/agent/outerloop-results/autoresearch_results_phase3.jsonl b/agent/outerloop-results/autoresearch_results_phase3.jsonl new file mode 100644 index 0000000..6900f87 --- /dev/null +++ b/agent/outerloop-results/autoresearch_results_phase3.jsonl @@ -0,0 +1 @@ +{"trial": 1, "timestamp": "2026-04-14T14:34:25.739734", "params": {"learning_rate": 0.000225, "steps_per_switch": 5000, "total_timesteps": 45000}, "combined_test_score": 24.7695, "mini_monaco_reward": 24.7695, "model_path": "/home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001/model.zip", "champion": true, "run_status": "ok", "elapsed_sec": 2808.679010629654} diff --git a/agent/wave3_controller.py b/agent/wave3_controller.py index 9f2894c..b9c5947 100644 --- a/agent/wave3_controller.py +++ b/agent/wave3_controller.py @@ -343,28 +343,40 @@ def launch_trial(params, trial_num): log(f'[Wave3] Launching trial {trial_num}: {params}') log(f'[Wave3] Command: {" ".join(cmd)}') start = time.time() + output_lines = [] try: - proc = subprocess.run( + proc = subprocess.Popen( cmd, - capture_output=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, # merge stderr into stdout text=True, - timeout=JOB_TIMEOUT, + bufsize=1, # line-buffered ) + + deadline = start + JOB_TIMEOUT + for line in proc.stdout: + line = line.rstrip('\n') + output_lines.append(line) + print(line, flush=True) # streams straight to nohup log + if time.time() > deadline: + proc.kill() + log(f'[Wave3] Trial {trial_num} TIMED OUT — killing runner.') + output_lines.append(f'[TIMEOUT after {JOB_TIMEOUT}s]') + break + + proc.wait() elapsed = time.time() - start - output = proc.stdout + '\n' + proc.stderr status = 'ok' if proc.returncode == 0 else f'error_rc{proc.returncode}' log(f'[Wave3] Trial {trial_num} finished in {elapsed:.1f}s, rc={proc.returncode}') - except subprocess.TimeoutExpired: - elapsed = time.time() - start - output = f'[TIMEOUT after {elapsed:.1f}s]' - status = 'timeout' - log(f'[Wave3] Trial {trial_num} TIMED OUT after {elapsed:.1f}s') - # Always print tail of output - print('\n--- Multitrack Runner Output (tail) ---', flush=True) - print(output[-3000:], flush=True) - print('--- End Runner Output ---\n', flush=True) + except Exception as exc: + elapsed = time.time() - start + output_lines.append(f'[EXCEPTION: {exc}]') + status = 'exception' + log(f'[Wave3] Trial {trial_num} raised exception: {exc}') + + output = '\n'.join(output_lines) # Parse results combined, mini_monaco = parse_runner_output(output)