donkeycar-rl-autoresearch/agent/donkeycar_outer_loop.py

129 lines
5.8 KiB
Python

import itertools
import subprocess
import json
import time
import os
import signal
def kill_old_rl():
print('[Outer Loop] Killing any stalled RL jobs…', flush=True)
try:
subprocess.run(['pkill', '-9', '-f', 'donkeycar_sb3_runner.py'], check=False)
except Exception as e:
print(f'[Outer Loop] pkill failed: {e}', flush=True)
# Parameter grid for the sweep
grid = {
'n_steer': [3, 5, 7],
'n_throttle': [2, 3],
'learning_rate': [0.001, 0.0005, 0.0001],
'timesteps': [2000],
'eval_episodes': [3],
}
REPEATS = 3 # robust trials per unique config
def build_param_combinations(grid):
keys = list(grid.keys())
vals = [grid[k] for k in keys]
for v in itertools.product(*vals):
yield dict(zip(keys, v))
def run_sweep():
results = []
out_dir = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results'
os.makedirs(out_dir, exist_ok=True)
log_file = os.path.join(out_dir, 'sweep_results.jsonl')
run_id = 0
for i, params in enumerate(build_param_combinations(grid)):
for r in range(REPEATS):
run_id += 1
print(f"\n[Outer Loop] Running config {i+1} repeat {r+1}/{REPEATS}: {params}")
with open(os.path.join(out_dir, 'outer_monitor.log'), 'a') as mlog:
mlog.write(f"\n[MONITOR {time.ctime()}] Starting config {i+1} repeat {r+1}/{REPEATS}: {params}\n")
mlog.flush()
kill_old_rl()
print(f'[Outer Loop MONITOR] Sleeping 2s after RL job kill to ensure teardown, {time.ctime()}', flush=True)
time.sleep(2)
print('[Outer Loop MONITOR] Launching inner RL job now...', flush=True)
with open(os.path.join(out_dir, 'outer_monitor.log'), 'a') as mlog:
mlog.write(f"[MONITOR {time.ctime()}] Launching inner RL job for config {i+1} repeat {r+1}\n")
mlog.flush()
cmd = [
'python3', '/home/paulh/projects/donkeycar-rl-autoresearch/agent/donkeycar_sb3_runner.py',
'--agent', 'dqn',
'--env', 'donkey-generated-roads-v0',
'--timesteps', str(params['timesteps']),
'--eval-episodes', str(params['eval_episodes']),
'--n-steer', str(params['n_steer']),
'--n-throttle', str(params['n_throttle']),
'--log-dir', os.path.join(out_dir, f'model-{i:03d}')
]
# Set learning rate (by env variable, as SB3 DQN uses fixed default in script now)
os.environ['SB3_DQN_LR'] = str(params['learning_rate'])
start = time.time()
try:
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=360) # 6 min timeout per run
elapsed = time.time() - start
output = proc.stdout + '\n' + proc.stderr
print('[Outer Loop MONITOR] RL job ran, returncode:', proc.returncode, flush=True)
# -- Show FULL output to terminal for each run --
print('--------- RL Runner Output (begin) ---------')
print(output)
print('--------- RL Runner Output (end) ---------')
with open(os.path.join(out_dir, 'outer_monitor.log'), 'a') as mlog:
mlog.write(f"[MONITOR {time.ctime()}] RL job returncode={proc.returncode} after {elapsed:.1f}s\n")
mlog.flush()
if proc.returncode != 0:
print(f'[OUTER MONITOR ALERT] RL runner exited with error, see results/log file.', flush=True)
except subprocess.TimeoutExpired as e:
elapsed = time.time() - start
# Decode output if available and type bytes
def decode(val):
if val is None:
return ''
if isinstance(val, bytes):
return val.decode('utf-8', errors='replace')
return val
output = f"[TIMEOUT] Experiment timed out after {elapsed:.1f}s. Partial output below:\n" + decode(e.stdout) + '\n' + decode(e.stderr)
print('[OUTER MONITOR ALERT] RL runner timed out and was killed.', flush=True)
with open(os.path.join(out_dir, 'outer_monitor.log'), 'a') as mlog:
mlog.write(f"[MONITOR {time.ctime()}] RL runner timed out after {elapsed:.1f}s\n")
mlog.flush()
except Exception as e:
elapsed = time.time() - start
output = f"[ERROR] Experiment errored: {str(e)}"
print('[OUTER MONITOR ALERT] EXCEPTION LAUNCHING RL RUNNER:', str(e), flush=True)
with open(os.path.join(out_dir, 'outer_monitor.log'), 'a') as mlog:
mlog.write(f"[MONITOR {time.ctime()}] Exception launching RL runner: {str(e)}\n")
mlog.flush()
# Try to extract mean_reward from output (parse from '[SB3 Runner] Eval episodes=...')
mean_reward = None
for line in output.split('\n'):
if '[SB3 Runner] Eval episodes=' in line:
try:
mean_reward = float(line.split('mean_reward=')[1].split()[0])
except Exception:
mean_reward = None
result = {
'run_id': run_id,
'config_id': i,
'repeat': r,
'params': params,
'mean_reward': mean_reward,
'elapsed_sec': elapsed,
'run_status': 'timeout' if '[TIMEOUT]' in output else ('error' if '[ERROR]' in output else 'ok'),
'raw_output': output[:1000]
}
results.append(result)
with open(log_file, 'a') as f:
f.write(json.dumps(result) + '\n')
print(f"[Outer Loop] Finished {i+1} repeat {r+1}/{REPEATS}: status={result['run_status']} mean_reward={mean_reward} time={elapsed:.1f}s")
print(f"\n[Outer Loop] Sweep done. Results saved in: {log_file}")
return results
if __name__ == "__main__":
run_sweep()