AUTORESEARCH: Full Karpathy-style GP+UCB meta-controller, clean base data, fixed all paths, ready to run
This commit is contained in:
parent
4a4e61d463
commit
bb9e6d9105
|
|
@ -0,0 +1,40 @@
|
||||||
|
# DonkeyCar RL Autoresearch - README
|
||||||
|
# ===================================
|
||||||
|
#
|
||||||
|
# QUICK START (after simulator is running):
|
||||||
|
#
|
||||||
|
# cd /home/paulh/projects/donkeycar-rl-autoresearch/agent
|
||||||
|
# python3 autoresearch_controller.py --trials 100
|
||||||
|
#
|
||||||
|
# The autoresearch will:
|
||||||
|
# 1. Load all base sweep data (clean_sweep_results.jsonl)
|
||||||
|
# 2. Fit a Gaussian Process surrogate model on reward-vs-params
|
||||||
|
# 3. Use UCB (Upper Confidence Bound) to propose next best params
|
||||||
|
# 4. Launch RL jobs automatically via the robust runner
|
||||||
|
# 5. Record all results to outerloop-results/autoresearch_results.jsonl
|
||||||
|
# 6. Repeat for --trials iterations, learning as it goes
|
||||||
|
#
|
||||||
|
# You can stop at any time with Ctrl+C.
|
||||||
|
# Restart and it automatically picks up all prior results.
|
||||||
|
#
|
||||||
|
# LOGS:
|
||||||
|
# outerloop-results/autoresearch_log.txt - human-readable log
|
||||||
|
# outerloop-results/autoresearch_results.jsonl - all trial results (JSON)
|
||||||
|
# outerloop-results/clean_sweep_results.jsonl - base sweep data
|
||||||
|
#
|
||||||
|
# TUNING:
|
||||||
|
# --trials N : number of autoresearch trials (default 100)
|
||||||
|
# --explore K : UCB kappa, higher = more exploration (default 2.0)
|
||||||
|
#
|
||||||
|
# HOW IT WORKS (Karpathy-style autoresearch):
|
||||||
|
# - A Gaussian Process (GP) is fit on all existing (params, reward) pairs
|
||||||
|
# - The GP models the unknown reward function over the parameter space
|
||||||
|
# - UCB acquisition = GP mean + kappa * GP uncertainty
|
||||||
|
# - The next trial uses the params that maximize UCB
|
||||||
|
# - This intelligently balances exploiting known good regions vs
|
||||||
|
# exploring uncertain regions - far smarter than any fixed grid
|
||||||
|
#
|
||||||
|
# PARAMETER SPACE EXPLORED (continuously, not just grid values):
|
||||||
|
# n_steer: 3 to 9 (integer)
|
||||||
|
# n_throttle: 2 to 5 (integer)
|
||||||
|
# learning_rate: 0.00005 to 0.005 (float)
|
||||||
|
|
@ -0,0 +1,337 @@
|
||||||
|
"""
|
||||||
|
=============================================================
|
||||||
|
DonkeyCar RL Autoresearch Controller
|
||||||
|
Karpathy-style meta-agent that:
|
||||||
|
1. Loads base sweep data
|
||||||
|
2. Builds a surrogate model (Gaussian Process) of reward landscape
|
||||||
|
3. Uses Upper Confidence Bound (UCB) acquisition to propose next params
|
||||||
|
4. Launches RL jobs via robust runner
|
||||||
|
5. Records results and iterates autonomously
|
||||||
|
=============================================================
|
||||||
|
Usage:
|
||||||
|
python3 autoresearch_controller.py [--trials N] [--explore K]
|
||||||
|
|
||||||
|
All results are appended to:
|
||||||
|
outerloop-results/autoresearch_results.jsonl
|
||||||
|
outerloop-results/autoresearch_log.txt
|
||||||
|
|
||||||
|
Stop at any time with Ctrl+C. Restart and it picks up from existing data.
|
||||||
|
=============================================================
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import subprocess
|
||||||
|
import itertools
|
||||||
|
import re
|
||||||
|
import numpy as np
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# ---- Paths ----
|
||||||
|
PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
RUNNER_SCRIPT = os.path.join(PROJECT_DIR, 'donkeycar_sb3_runner.py')
|
||||||
|
RESULTS_DIR = os.path.join(PROJECT_DIR, 'outerloop-results')
|
||||||
|
BASE_DATA_FILE = os.path.join(RESULTS_DIR, 'clean_sweep_results.jsonl')
|
||||||
|
AUTORESEARCH_RESULTS = os.path.join(RESULTS_DIR, 'autoresearch_results.jsonl')
|
||||||
|
AUTORESEARCH_LOG = os.path.join(RESULTS_DIR, 'autoresearch_log.txt')
|
||||||
|
|
||||||
|
os.makedirs(RESULTS_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# ---- Parameter Space Definition ----
|
||||||
|
# These define the bounds for the autoresearch to explore.
|
||||||
|
# Autoresearch can propose any value within these continuous ranges.
|
||||||
|
PARAM_SPACE = {
|
||||||
|
'n_steer': {'type': 'int', 'min': 3, 'max': 9},
|
||||||
|
'n_throttle': {'type': 'int', 'min': 2, 'max': 5},
|
||||||
|
'learning_rate': {'type': 'float', 'min': 0.00005,'max': 0.005},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Fixed params for all runs
|
||||||
|
FIXED_PARAMS = {
|
||||||
|
'timesteps': 2000,
|
||||||
|
'eval_episodes': 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
# How many candidate proposals to sample when searching for next best
|
||||||
|
N_CANDIDATES = 500
|
||||||
|
|
||||||
|
# UCB exploration constant (higher = more exploration)
|
||||||
|
UCB_KAPPA = 2.0
|
||||||
|
|
||||||
|
# Job timeout seconds
|
||||||
|
JOB_TIMEOUT = 360
|
||||||
|
|
||||||
|
# ---- Logging ----
|
||||||
|
def log(msg):
|
||||||
|
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
line = f'[{ts}] {msg}'
|
||||||
|
print(line, flush=True)
|
||||||
|
with open(AUTORESEARCH_LOG, 'a') as f:
|
||||||
|
f.write(line + '\n')
|
||||||
|
|
||||||
|
# ---- Parameter Encoding (for surrogate model) ----
|
||||||
|
PARAM_KEYS = list(PARAM_SPACE.keys())
|
||||||
|
|
||||||
|
def encode_params(params):
|
||||||
|
"""Encode a params dict into a normalized numpy vector [0,1] for the GP."""
|
||||||
|
vec = []
|
||||||
|
for k in PARAM_KEYS:
|
||||||
|
spec = PARAM_SPACE[k]
|
||||||
|
v = params[k]
|
||||||
|
norm = (v - spec['min']) / (spec['max'] - spec['min'])
|
||||||
|
vec.append(norm)
|
||||||
|
return np.array(vec)
|
||||||
|
|
||||||
|
def decode_params(vec):
|
||||||
|
"""Decode a normalized numpy vector back to a params dict."""
|
||||||
|
params = {}
|
||||||
|
for i, k in enumerate(PARAM_KEYS):
|
||||||
|
spec = PARAM_SPACE[k]
|
||||||
|
v = vec[i] * (spec['max'] - spec['min']) + spec['min']
|
||||||
|
if spec['type'] == 'int':
|
||||||
|
v = int(round(v))
|
||||||
|
v = max(spec['min'], min(spec['max'], v))
|
||||||
|
else:
|
||||||
|
v = float(v)
|
||||||
|
v = max(spec['min'], min(spec['max'], v))
|
||||||
|
params[k] = v
|
||||||
|
return params
|
||||||
|
|
||||||
|
def random_candidate():
|
||||||
|
"""Sample a random candidate in the parameter space."""
|
||||||
|
vec = np.random.uniform(0, 1, len(PARAM_KEYS))
|
||||||
|
return vec
|
||||||
|
|
||||||
|
# ---- Gaussian Process Surrogate Model (pure numpy, no sklearn needed) ----
|
||||||
|
class TinyGP:
|
||||||
|
"""
|
||||||
|
Minimal Gaussian Process regressor (RBF kernel) for surrogate modelling.
|
||||||
|
Predicts mean and std of reward for any parameter vector.
|
||||||
|
"""
|
||||||
|
def __init__(self, length_scale=0.3, noise=1e-3):
|
||||||
|
self.ls = length_scale
|
||||||
|
self.noise = noise
|
||||||
|
self.X = None
|
||||||
|
self.y = None
|
||||||
|
self.K_inv = None
|
||||||
|
|
||||||
|
def _rbf(self, X1, X2):
|
||||||
|
"""RBF kernel matrix between X1 and X2."""
|
||||||
|
diff = X1[:, np.newaxis, :] - X2[np.newaxis, :, :]
|
||||||
|
sq = np.sum(diff**2, axis=-1)
|
||||||
|
return np.exp(-sq / (2 * self.ls**2))
|
||||||
|
|
||||||
|
def fit(self, X, y):
|
||||||
|
self.X = np.array(X)
|
||||||
|
self.y = np.array(y)
|
||||||
|
n = len(y)
|
||||||
|
K = self._rbf(self.X, self.X) + self.noise * np.eye(n)
|
||||||
|
try:
|
||||||
|
self.K_inv = np.linalg.inv(K)
|
||||||
|
except np.linalg.LinAlgError:
|
||||||
|
self.K_inv = np.linalg.pinv(K)
|
||||||
|
self.alpha = self.K_inv @ self.y
|
||||||
|
|
||||||
|
def predict(self, X_new):
|
||||||
|
"""Returns (mean, std) arrays for each row in X_new."""
|
||||||
|
X_new = np.atleast_2d(X_new)
|
||||||
|
K_s = self._rbf(X_new, self.X)
|
||||||
|
mean = K_s @ self.alpha
|
||||||
|
K_ss = np.ones(len(X_new)) + self.noise
|
||||||
|
var = K_ss - np.sum((K_s @ self.K_inv) * K_s, axis=1)
|
||||||
|
var = np.maximum(var, 1e-9)
|
||||||
|
return mean, np.sqrt(var)
|
||||||
|
|
||||||
|
# ---- Load All Available Data (base sweep + autoresearch results) ----
|
||||||
|
def load_all_results():
|
||||||
|
"""Load all param-reward pairs from base sweep and any autoresearch runs."""
|
||||||
|
results = []
|
||||||
|
for fpath in [BASE_DATA_FILE, AUTORESEARCH_RESULTS]:
|
||||||
|
if not os.path.exists(fpath):
|
||||||
|
continue
|
||||||
|
with open(fpath) as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
rec = json.loads(line)
|
||||||
|
mr = rec.get('mean_reward')
|
||||||
|
if mr is not None:
|
||||||
|
results.append({'params': rec['params'], 'mean_reward': float(mr)})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return results
|
||||||
|
|
||||||
|
# ---- UCB Acquisition: Propose Next Best Parameters ----
|
||||||
|
def propose_next_params(results, n_candidates=N_CANDIDATES, kappa=UCB_KAPPA):
|
||||||
|
"""
|
||||||
|
Fit GP on existing results, then maximize UCB acquisition
|
||||||
|
over random candidate samples to propose the next params to try.
|
||||||
|
Returns: proposed params dict
|
||||||
|
"""
|
||||||
|
if len(results) < 2:
|
||||||
|
log('[AutoResearch] Not enough data for GP yet, using random proposal.')
|
||||||
|
return decode_params(random_candidate())
|
||||||
|
|
||||||
|
X = np.array([encode_params(r['params']) for r in results])
|
||||||
|
y = np.array([r['mean_reward'] for r in results])
|
||||||
|
|
||||||
|
# Normalize y for numerical stability
|
||||||
|
y_mean = y.mean()
|
||||||
|
y_std = y.std() if y.std() > 0 else 1.0
|
||||||
|
y_norm = (y - y_mean) / y_std
|
||||||
|
|
||||||
|
gp = TinyGP(length_scale=0.3, noise=1e-3)
|
||||||
|
gp.fit(X, y_norm)
|
||||||
|
|
||||||
|
# Sample candidates
|
||||||
|
candidates = np.random.uniform(0, 1, (n_candidates, len(PARAM_KEYS)))
|
||||||
|
|
||||||
|
# Compute UCB acquisition
|
||||||
|
mu, sigma = gp.predict(candidates)
|
||||||
|
ucb = mu + kappa * sigma
|
||||||
|
|
||||||
|
best_idx = np.argmax(ucb)
|
||||||
|
best_vec = candidates[best_idx]
|
||||||
|
proposed = decode_params(best_vec)
|
||||||
|
|
||||||
|
# Log the GP's top predictions
|
||||||
|
top5_idx = np.argsort(ucb)[-5:][::-1]
|
||||||
|
log(f'[AutoResearch] GP UCB top-5 candidates:')
|
||||||
|
for idx in top5_idx:
|
||||||
|
p = decode_params(candidates[idx])
|
||||||
|
log(f' UCB={ucb[idx]:.4f} mu={mu[idx]:.4f} sigma={sigma[idx]:.4f} params={p}')
|
||||||
|
|
||||||
|
return proposed
|
||||||
|
|
||||||
|
# ---- Kill Stale Jobs ----
|
||||||
|
def kill_stale():
|
||||||
|
subprocess.run(['pkill', '-9', '-f', 'donkeycar_sb3_runner.py'], check=False)
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
# ---- Launch RL Job with Proposed Params ----
|
||||||
|
def launch_job(params):
|
||||||
|
"""Launch a single RL runner job and return (mean_reward, output, status)."""
|
||||||
|
cmd = [
|
||||||
|
'python3', RUNNER_SCRIPT,
|
||||||
|
'--agent', 'dqn',
|
||||||
|
'--env', 'donkey-generated-roads-v0',
|
||||||
|
'--timesteps', str(params.get('timesteps', FIXED_PARAMS['timesteps'])),
|
||||||
|
'--eval-episodes', str(params.get('eval_episodes', FIXED_PARAMS['eval_episodes'])),
|
||||||
|
'--n-steer', str(params['n_steer']),
|
||||||
|
'--n-throttle', str(params['n_throttle']),
|
||||||
|
]
|
||||||
|
log(f'[AutoResearch] Launching job: n_steer={params["n_steer"]} n_throttle={params["n_throttle"]} lr={params["learning_rate"]:.6f}')
|
||||||
|
start = time.time()
|
||||||
|
try:
|
||||||
|
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=JOB_TIMEOUT)
|
||||||
|
elapsed = time.time() - start
|
||||||
|
output = proc.stdout + '\n' + proc.stderr
|
||||||
|
status = 'ok' if proc.returncode == 0 else 'error'
|
||||||
|
log(f'[AutoResearch] Job finished in {elapsed:.1f}s, returncode={proc.returncode}')
|
||||||
|
except subprocess.TimeoutExpired as e:
|
||||||
|
elapsed = time.time() - start
|
||||||
|
output = f'[TIMEOUT after {elapsed:.1f}s]'
|
||||||
|
status = 'timeout'
|
||||||
|
log(f'[AutoResearch] Job TIMED OUT after {elapsed:.1f}s')
|
||||||
|
|
||||||
|
# Parse mean_reward from output
|
||||||
|
mean_reward = None
|
||||||
|
m = re.search(r'\[SB3 Runner\]\[TEST\] mean_reward=([\d.]+)', output)
|
||||||
|
if m:
|
||||||
|
mean_reward = float(m.group(1))
|
||||||
|
log(f'[AutoResearch] mean_reward={mean_reward}')
|
||||||
|
|
||||||
|
# Print full runner output for transparency
|
||||||
|
print('--- Runner Output ---')
|
||||||
|
print(output[-3000:]) # last 3000 chars
|
||||||
|
print('--- End Runner Output ---')
|
||||||
|
|
||||||
|
return mean_reward, output, status, elapsed
|
||||||
|
|
||||||
|
# ---- Save Result ----
|
||||||
|
def save_result(trial, params, mean_reward, status, elapsed):
|
||||||
|
rec = {
|
||||||
|
'trial': trial,
|
||||||
|
'timestamp': datetime.now().isoformat(),
|
||||||
|
'params': params,
|
||||||
|
'mean_reward': mean_reward,
|
||||||
|
'run_status': status,
|
||||||
|
'elapsed_sec': elapsed,
|
||||||
|
}
|
||||||
|
with open(AUTORESEARCH_RESULTS, 'a') as f:
|
||||||
|
f.write(json.dumps(rec) + '\n')
|
||||||
|
|
||||||
|
# ---- Print Current Best ----
|
||||||
|
def print_summary(results, trial):
|
||||||
|
if not results:
|
||||||
|
return
|
||||||
|
best = max(results, key=lambda r: r['mean_reward'])
|
||||||
|
log(f'[AutoResearch] === Trial {trial} Summary ===')
|
||||||
|
log(f' Total runs in history: {len(results)}')
|
||||||
|
log(f' Best so far: mean_reward={best["mean_reward"]:.4f} params={best["params"]}')
|
||||||
|
# Top 5
|
||||||
|
sorted_r = sorted(results, key=lambda r: r['mean_reward'], reverse=True)
|
||||||
|
log(f' Top 5 results:')
|
||||||
|
for r in sorted_r[:5]:
|
||||||
|
log(f' mean_reward={r["mean_reward"]:.4f} params={r["params"]}')
|
||||||
|
|
||||||
|
# ---- Main Autoresearch Loop ----
|
||||||
|
def run_autoresearch(max_trials=100):
|
||||||
|
log('=' * 60)
|
||||||
|
log('[AutoResearch] Starting Karpathy-style autoresearch controller')
|
||||||
|
log(f'[AutoResearch] Max trials: {max_trials}')
|
||||||
|
log(f'[AutoResearch] Runner: {RUNNER_SCRIPT}')
|
||||||
|
log(f'[AutoResearch] Results: {AUTORESEARCH_RESULTS}')
|
||||||
|
log('=' * 60)
|
||||||
|
|
||||||
|
# Load all existing data (base sweep + prior autoresearch runs)
|
||||||
|
results = load_all_results()
|
||||||
|
log(f'[AutoResearch] Loaded {len(results)} existing result(s) from base sweep + history.')
|
||||||
|
print_summary(results, trial=0)
|
||||||
|
|
||||||
|
for trial in range(1, max_trials + 1):
|
||||||
|
log(f'\n[AutoResearch] ========== Trial {trial}/{max_trials} ==========')
|
||||||
|
|
||||||
|
# 1. Propose next params using GP+UCB
|
||||||
|
proposed = propose_next_params(results)
|
||||||
|
full_params = {**proposed, **FIXED_PARAMS}
|
||||||
|
log(f'[AutoResearch] Proposed params: {full_params}')
|
||||||
|
|
||||||
|
# 2. Kill any stale jobs
|
||||||
|
kill_stale()
|
||||||
|
|
||||||
|
# 3. Launch job
|
||||||
|
mean_reward, output, status, elapsed = launch_job(full_params)
|
||||||
|
|
||||||
|
# 4. Save result
|
||||||
|
save_result(trial, full_params, mean_reward, status, elapsed)
|
||||||
|
|
||||||
|
# 5. If we got a valid reward, add to results for next GP fit
|
||||||
|
if mean_reward is not None:
|
||||||
|
results.append({'params': full_params, 'mean_reward': mean_reward})
|
||||||
|
else:
|
||||||
|
log(f'[AutoResearch] WARNING: No valid mean_reward from this trial.')
|
||||||
|
|
||||||
|
# 6. Print running summary
|
||||||
|
print_summary(results, trial)
|
||||||
|
|
||||||
|
# 7. Brief pause between trials
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
log('[AutoResearch] All trials complete!')
|
||||||
|
print_summary(results, trial=max_trials)
|
||||||
|
|
||||||
|
# ---- Entry Point ----
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser(description='Karpathy-style autoresearch controller for DonkeyCar RL.')
|
||||||
|
parser.add_argument('--trials', type=int, default=100, help='Number of autoresearch trials to run (default: 100)')
|
||||||
|
parser.add_argument('--explore', type=float, default=2.0, help='UCB exploration constant kappa (default: 2.0, higher=more explore)')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
UCB_KAPPA = args.explore
|
||||||
|
run_autoresearch(max_trials=args.trials)
|
||||||
|
|
@ -31,7 +31,7 @@ def build_param_combinations(grid):
|
||||||
|
|
||||||
def run_sweep():
|
def run_sweep():
|
||||||
results = []
|
results = []
|
||||||
out_dir = '/home/paulh/.pi/agent/outerloop-results'
|
out_dir = '/home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results'
|
||||||
os.makedirs(out_dir, exist_ok=True)
|
os.makedirs(out_dir, exist_ok=True)
|
||||||
log_file = os.path.join(out_dir, 'sweep_results.jsonl')
|
log_file = os.path.join(out_dir, 'sweep_results.jsonl')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
{"config_id": 1, "params": {"n_steer": 3, "n_throttle": 2, "learning_rate": 0.001, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 46.7312, "run": 1}
|
||||||
|
{"config_id": 2, "params": {"n_steer": 3, "n_throttle": 2, "learning_rate": 0.0005, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 64.7249, "run": 2}
|
||||||
|
{"config_id": 3, "params": {"n_steer": 3, "n_throttle": 2, "learning_rate": 0.0001, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 36.2958, "run": 3}
|
||||||
|
{"config_id": 4, "params": {"n_steer": 3, "n_throttle": 3, "learning_rate": 0.001, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 33.6781, "run": 4}
|
||||||
|
{"config_id": 5, "params": {"n_steer": 3, "n_throttle": 3, "learning_rate": 0.0005, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 59.7928, "run": 5}
|
||||||
|
{"config_id": 6, "params": {"n_steer": 3, "n_throttle": 3, "learning_rate": 0.0001, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 61.8774, "run": 6}
|
||||||
|
{"config_id": 7, "params": {"n_steer": 5, "n_throttle": 2, "learning_rate": 0.001, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 97.7536, "run": 7}
|
||||||
|
{"config_id": 8, "params": {"n_steer": 5, "n_throttle": 2, "learning_rate": 0.0005, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 61.7233, "run": 8}
|
||||||
|
{"config_id": 9, "params": {"n_steer": 5, "n_throttle": 2, "learning_rate": 0.0001, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 53.6128, "run": 9}
|
||||||
|
{"config_id": 10, "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.001, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 50.171, "run": 10}
|
||||||
|
{"config_id": 11, "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.0005, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 78.3455, "run": 11}
|
||||||
|
{"config_id": 12, "params": {"n_steer": 5, "n_throttle": 3, "learning_rate": 0.0001, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 71.8459, "run": 12}
|
||||||
|
{"config_id": 13, "params": {"n_steer": 7, "n_throttle": 2, "learning_rate": 0.001, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 87.96, "run": 13}
|
||||||
|
{"config_id": 14, "params": {"n_steer": 7, "n_throttle": 2, "learning_rate": 0.0005, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 45.0102, "run": 14}
|
||||||
|
{"config_id": 15, "params": {"n_steer": 7, "n_throttle": 2, "learning_rate": 0.0001, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 52.6958, "run": 15}
|
||||||
|
{"config_id": 16, "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 0.001, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 80.3866, "run": 16}
|
||||||
|
{"config_id": 17, "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 0.0005, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 84.9219, "run": 17}
|
||||||
|
{"config_id": 18, "params": {"n_steer": 7, "n_throttle": 3, "learning_rate": 0.0001, "timesteps": 2000, "eval_episodes": 3}, "mean_reward": 77.3825, "run": 18}
|
||||||
Loading…
Reference in New Issue