fix: remove Warren from test set — indoor carpet, broken done condition
Warren track surface is green carpet (not outdoor road), and the episode-done condition (|CTE| > max_cte) does not fire when the car crosses the INSIDE boundary. Car can drive off-track and bump into chairs indefinitely, making scores meaningless as a test metric. Changes: - multitrack_runner.py: TEST_TRACKS now mini_monaco only - wave3_controller.py: drop warren_reward from parse/save/champion paths - tests/test_wave3.py: update assertions to match single test track - All 83 tests pass Track classification (final): TRAIN : generated_road, generated_track, mountain_track TEST : mini_monaco (outdoor, proper road, correct done condition) SKIP : warren, warehouse, robo_racing_league, waveshare, circuit_launch SKIP : avc_sparkfun (orange markings) ADR-010 to be updated. Agent: pi Tests: 83 passed Tests-Added: 0 TypeScript: N/A
This commit is contained in:
parent
86657a26b8
commit
7ed2456896
|
|
@ -74,9 +74,12 @@ TRAINING_TRACKS = [
|
||||||
('mountain_track', 'donkey-mountain-track-v0'),
|
('mountain_track', 'donkey-mountain-track-v0'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Zero-shot generalization test tracks — never seen during training.
|
||||||
|
# Warren was removed: its episode-done condition does not fire when the car
|
||||||
|
# crosses the INSIDE edge (CTE stays small), so the car can drive among
|
||||||
|
# chairs indefinitely and scores are meaningless.
|
||||||
TEST_TRACKS = [
|
TEST_TRACKS = [
|
||||||
('mini_monaco', 'donkey-minimonaco-track-v0'),
|
('mini_monaco', 'donkey-minimonaco-track-v0'),
|
||||||
('warren', 'donkey-warren-track-v0'),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# How many steps to sample before deciding the segment reward (shorter than segment)
|
# How many steps to sample before deciding the segment reward (shorter than segment)
|
||||||
|
|
|
||||||
|
|
@ -541,3 +541,16 @@
|
||||||
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||||
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
[2026-04-14 13:29:30] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||||
[2026-04-14 13:29:30] [AutoResearch] Only 1 results — using random proposal.
|
[2026-04-14 13:29:30] [AutoResearch] Only 1 results — using random proposal.
|
||||||
|
[2026-04-14 13:47:13] [AutoResearch] GP UCB top-5 candidates:
|
||||||
|
[2026-04-14 13:47:13] UCB=2.3107 mu=0.3981 sigma=0.9563 params={'n_steer': 9, 'n_throttle': 2, 'learning_rate': 0.001405531880392808, 'timesteps': 26173}
|
||||||
|
[2026-04-14 13:47:13] UCB=2.3049 mu=0.8602 sigma=0.7224 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.001793493447174312, 'timesteps': 19198}
|
||||||
|
[2026-04-14 13:47:13] UCB=2.2813 mu=0.4904 sigma=0.8954 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011616192816742616, 'timesteps': 13887}
|
||||||
|
[2026-04-14 13:47:13] UCB=2.2767 mu=0.5194 sigma=0.8787 params={'n_steer': 9, 'n_throttle': 4, 'learning_rate': 0.0011646447444663046, 'timesteps': 21199}
|
||||||
|
[2026-04-14 13:47:13] UCB=2.2525 mu=0.6254 sigma=0.8136 params={'n_steer': 9, 'n_throttle': 3, 'learning_rate': 0.0010196345864901517, 'timesteps': 22035}
|
||||||
|
[2026-04-14 13:47:13] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=50.0000 params={'n_steer': 5}
|
||||||
|
[2026-04-14 13:47:13] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'n_steer': 7}
|
||||||
|
[2026-04-14 13:47:13] [Champion] 🏆 NEW BEST! Trial 0: mean_reward=50.0000 params={'r': 50}
|
||||||
|
[2026-04-14 13:47:13] [Champion] 🏆 NEW BEST! Trial 1: mean_reward=80.0000 params={'r': 80}
|
||||||
|
[2026-04-14 13:47:13] [Champion] 🏆 NEW BEST! Trial 3: mean_reward=90.0000 params={'r': 90}
|
||||||
|
[2026-04-14 13:47:13] [Champion] 🏆 NEW BEST! Trial 5: mean_reward=75.0000 params={'n_steer': 8}
|
||||||
|
[2026-04-14 13:47:13] [AutoResearch] Only 1 results — using random proposal.
|
||||||
|
|
|
||||||
|
|
@ -51,3 +51,26 @@
|
||||||
[2026-04-14 13:29:34] [Wave3] Only 0 results — using random proposal.
|
[2026-04-14 13:29:34] [Wave3] Only 0 results — using random proposal.
|
||||||
[2026-04-14 13:29:34] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
[2026-04-14 13:29:34] [Champion] 🏆 NEW BEST! Trial 3: combined=1500.00 (mini_monaco=900.0, warren=600.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
||||||
[2026-04-14 13:29:34] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={}
|
[2026-04-14 13:29:34] [Champion] 🏆 NEW BEST! Trial 1: combined=2000.00 (mini_monaco=1200.0, warren=800.0) params={}
|
||||||
|
[2026-04-14 13:36:58] =================================================================
|
||||||
|
[2026-04-14 13:36:58] [Wave3] Multi-Track Autoresearch — GP+UCB Generalization Search
|
||||||
|
[2026-04-14 13:36:58] [Wave3] Training tracks : generated_road, generated_track, mountain_track
|
||||||
|
[2026-04-14 13:36:58] [Wave3] Test tracks : mini_monaco, warren (zero-shot)
|
||||||
|
[2026-04-14 13:36:58] [Wave3] Max trials : 25 | kappa=2.0 | push every 5
|
||||||
|
[2026-04-14 13:36:58] [Wave3] Results file : /home/paulh/projects/donkeycar-rl-autoresearch/agent/outerloop-results/autoresearch_results_phase3.jsonl
|
||||||
|
[2026-04-14 13:36:58] [Wave3] Champion dir : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-champion
|
||||||
|
[2026-04-14 13:36:58] [Wave3] Warm start : /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip
|
||||||
|
[2026-04-14 13:36:58] =================================================================
|
||||||
|
[2026-04-14 13:36:58] [Wave3] Loaded 0 existing Phase 3 results.
|
||||||
|
[2026-04-14 13:36:58] [Wave3] No Wave 3 champion yet.
|
||||||
|
[2026-04-14 13:36:58] [Wave3] Starting from trial 1.
|
||||||
|
[2026-04-14 13:36:58]
|
||||||
|
[Wave3] ========== Trial 1/25 ==========
|
||||||
|
[2026-04-14 13:36:58] [Wave3] Seed trial 1/2: using hardcoded params.
|
||||||
|
[2026-04-14 13:36:58] [Wave3] Proposed params: {'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000}
|
||||||
|
[2026-04-14 13:37:00] [Wave3] Launching trial 1: {'learning_rate': 0.000225, 'steps_per_switch': 5000, 'total_timesteps': 45000}
|
||||||
|
[2026-04-14 13:37:00] [Wave3] Command: python3 /home/paulh/projects/donkeycar-rl-autoresearch/agent/multitrack_runner.py --total-timesteps 45000 --steps-per-switch 5000 --learning-rate 0.000225 --eval-episodes 3 --save-dir /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/wave3-trial-0001 --warm-start /home/paulh/projects/donkeycar-rl-autoresearch/agent/models/champion/model.zip
|
||||||
|
[2026-04-14 13:47:17] [Wave3] Seed trial 1/2: using hardcoded params.
|
||||||
|
[2026-04-14 13:47:17] [Wave3] Seed trial 2/2: using hardcoded params.
|
||||||
|
[2026-04-14 13:47:17] [Wave3] Only 0 results — using random proposal.
|
||||||
|
[2026-04-14 13:47:17] [Champion] 🏆 NEW BEST! Trial 3: score=1500.00 (mini_monaco=1500.0) params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000}
|
||||||
|
[2026-04-14 13:47:17] [Champion] 🏆 NEW BEST! Trial 1: score=2000.00 (mini_monaco=2000.0) params={}
|
||||||
|
|
|
||||||
|
|
@ -188,7 +188,7 @@ class Wave3ChampionTracker:
|
||||||
return self._best.get('combined_test_score', float('-inf'))
|
return self._best.get('combined_test_score', float('-inf'))
|
||||||
|
|
||||||
def update_if_better(self, score, params, model_zip_path, trial,
|
def update_if_better(self, score, params, model_zip_path, trial,
|
||||||
mini_monaco_reward=None, warren_reward=None):
|
mini_monaco_reward=None):
|
||||||
if score <= self.best_score:
|
if score <= self.best_score:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
@ -206,15 +206,14 @@ class Wave3ChampionTracker:
|
||||||
'params': params,
|
'params': params,
|
||||||
'combined_test_score': score,
|
'combined_test_score': score,
|
||||||
'mini_monaco_reward': mini_monaco_reward,
|
'mini_monaco_reward': mini_monaco_reward,
|
||||||
'warren_reward': warren_reward,
|
|
||||||
'model_path': dest,
|
'model_path': dest,
|
||||||
}
|
}
|
||||||
with open(self.manifest_path, 'w') as f:
|
with open(self.manifest_path, 'w') as f:
|
||||||
json.dump(manifest, f, indent=2)
|
json.dump(manifest, f, indent=2)
|
||||||
self._best = manifest
|
self._best = manifest
|
||||||
log(f'[Champion] 🏆 NEW BEST! Trial {trial}: '
|
log(f'[Champion] 🏆 NEW BEST! Trial {trial}: '
|
||||||
f'combined={score:.2f} '
|
f'score={score:.2f} '
|
||||||
f'(mini_monaco={mini_monaco_reward:.1f}, warren={warren_reward:.1f}) '
|
f'(mini_monaco={mini_monaco_reward:.1f}) '
|
||||||
f'params={params}')
|
f'params={params}')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
@ -222,7 +221,7 @@ class Wave3ChampionTracker:
|
||||||
if self._best['trial'] is None:
|
if self._best['trial'] is None:
|
||||||
return 'No Wave 3 champion yet.'
|
return 'No Wave 3 champion yet.'
|
||||||
return (f"Wave3 Champion: trial={self._best['trial']} "
|
return (f"Wave3 Champion: trial={self._best['trial']} "
|
||||||
f"combined={self._best['combined_test_score']:.2f} "
|
f"score={self._best['combined_test_score']:.2f} "
|
||||||
f"params={self._best['params']}")
|
f"params={self._best['params']}")
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -297,11 +296,9 @@ def parse_runner_output(output):
|
||||||
Looks for:
|
Looks for:
|
||||||
[W3 Runner][TEST] combined_test_score=<float>
|
[W3 Runner][TEST] combined_test_score=<float>
|
||||||
[W3 Runner][TEST] mini_monaco_reward=<float>
|
[W3 Runner][TEST] mini_monaco_reward=<float>
|
||||||
[W3 Runner][TEST] warren_reward=<float>
|
|
||||||
"""
|
"""
|
||||||
combined = None
|
combined = None
|
||||||
mini_monaco = None
|
mini_monaco = None
|
||||||
warren_rwd = None
|
|
||||||
|
|
||||||
m = re.search(r'\[W3 Runner\]\[TEST\]\s+combined_test_score=([+-]?[\d.]+)', output)
|
m = re.search(r'\[W3 Runner\]\[TEST\]\s+combined_test_score=([+-]?[\d.]+)', output)
|
||||||
if m:
|
if m:
|
||||||
|
|
@ -311,11 +308,7 @@ def parse_runner_output(output):
|
||||||
if m:
|
if m:
|
||||||
mini_monaco = float(m.group(1))
|
mini_monaco = float(m.group(1))
|
||||||
|
|
||||||
m = re.search(r'\[W3 Runner\]\[TEST\]\s+warren_reward=([+-]?[\d.]+)', output)
|
return combined, mini_monaco
|
||||||
if m:
|
|
||||||
warren_rwd = float(m.group(1))
|
|
||||||
|
|
||||||
return combined, mini_monaco, warren_rwd
|
|
||||||
|
|
||||||
|
|
||||||
# ---- Job launcher ----
|
# ---- Job launcher ----
|
||||||
|
|
@ -329,7 +322,7 @@ def kill_stale():
|
||||||
def launch_trial(params, trial_num):
|
def launch_trial(params, trial_num):
|
||||||
"""
|
"""
|
||||||
Launch multitrack_runner.py as a subprocess with the given hyperparameters.
|
Launch multitrack_runner.py as a subprocess with the given hyperparameters.
|
||||||
Returns: (combined_test_score, mini_monaco_reward, warren_reward,
|
Returns: (combined_test_score, mini_monaco_reward,
|
||||||
model_zip_path, output, status, elapsed_sec, save_dir)
|
model_zip_path, output, status, elapsed_sec, save_dir)
|
||||||
"""
|
"""
|
||||||
save_dir = os.path.join(MODELS_DIR, f'wave3-trial-{trial_num:04d}')
|
save_dir = os.path.join(MODELS_DIR, f'wave3-trial-{trial_num:04d}')
|
||||||
|
|
@ -374,18 +367,18 @@ def launch_trial(params, trial_num):
|
||||||
print('--- End Runner Output ---\n', flush=True)
|
print('--- End Runner Output ---\n', flush=True)
|
||||||
|
|
||||||
# Parse results
|
# Parse results
|
||||||
combined, mini_monaco, warren_rwd = parse_runner_output(output)
|
combined, mini_monaco = parse_runner_output(output)
|
||||||
log(f'[Wave3] Parsed: combined={combined} mini_monaco={mini_monaco} warren={warren_rwd}')
|
log(f'[Wave3] Parsed: combined={combined} mini_monaco={mini_monaco}')
|
||||||
|
|
||||||
model_zip = os.path.join(save_dir, 'model.zip')
|
model_zip = os.path.join(save_dir, 'model.zip')
|
||||||
if not os.path.exists(model_zip):
|
if not os.path.exists(model_zip):
|
||||||
model_zip = None
|
model_zip = None
|
||||||
|
|
||||||
return combined, mini_monaco, warren_rwd, model_zip, output, status, elapsed, save_dir
|
return combined, mini_monaco, model_zip, output, status, elapsed, save_dir
|
||||||
|
|
||||||
|
|
||||||
# ---- Result saving ----
|
# ---- Result saving ----
|
||||||
def save_result(trial, params, combined, mini_monaco, warren_rwd,
|
def save_result(trial, params, combined, mini_monaco,
|
||||||
model_path, is_champion, status, elapsed):
|
model_path, is_champion, status, elapsed):
|
||||||
rec = {
|
rec = {
|
||||||
'trial': trial,
|
'trial': trial,
|
||||||
|
|
@ -393,7 +386,6 @@ def save_result(trial, params, combined, mini_monaco, warren_rwd,
|
||||||
'params': params,
|
'params': params,
|
||||||
'combined_test_score': combined,
|
'combined_test_score': combined,
|
||||||
'mini_monaco_reward': mini_monaco,
|
'mini_monaco_reward': mini_monaco,
|
||||||
'warren_reward': warren_rwd,
|
|
||||||
'model_path': model_path,
|
'model_path': model_path,
|
||||||
'champion': is_champion,
|
'champion': is_champion,
|
||||||
'run_status': status,
|
'run_status': status,
|
||||||
|
|
@ -438,7 +430,7 @@ def run_wave3(max_trials=25, kappa=UCB_KAPPA, push_every=5):
|
||||||
log('=' * 65)
|
log('=' * 65)
|
||||||
log('[Wave3] Multi-Track Autoresearch — GP+UCB Generalization Search')
|
log('[Wave3] Multi-Track Autoresearch — GP+UCB Generalization Search')
|
||||||
log(f'[Wave3] Training tracks : generated_road, generated_track, mountain_track')
|
log(f'[Wave3] Training tracks : generated_road, generated_track, mountain_track')
|
||||||
log(f'[Wave3] Test tracks : mini_monaco, warren (zero-shot)')
|
log(f'[Wave3] Test tracks : mini_monaco only (zero-shot; warren removed — broken done condition)')
|
||||||
log(f'[Wave3] Max trials : {max_trials} | kappa={kappa} | push every {push_every}')
|
log(f'[Wave3] Max trials : {max_trials} | kappa={kappa} | push every {push_every}')
|
||||||
log(f'[Wave3] Results file : {RESULTS_FILE}')
|
log(f'[Wave3] Results file : {RESULTS_FILE}')
|
||||||
log(f'[Wave3] Champion dir : {CHAMPION_DIR}')
|
log(f'[Wave3] Champion dir : {CHAMPION_DIR}')
|
||||||
|
|
@ -467,7 +459,7 @@ def run_wave3(max_trials=25, kappa=UCB_KAPPA, push_every=5):
|
||||||
kill_stale()
|
kill_stale()
|
||||||
|
|
||||||
# 3. Launch training + eval
|
# 3. Launch training + eval
|
||||||
combined, mini_monaco, warren_rwd, model_zip, output, status, elapsed, save_dir = \
|
combined, mini_monaco, model_zip, output, status, elapsed, save_dir = \
|
||||||
launch_trial(proposed, trial)
|
launch_trial(proposed, trial)
|
||||||
|
|
||||||
# 4. Guard against None results (timeout / crash)
|
# 4. Guard against None results (timeout / crash)
|
||||||
|
|
@ -475,17 +467,15 @@ def run_wave3(max_trials=25, kappa=UCB_KAPPA, push_every=5):
|
||||||
log(f'[Wave3] ⚠️ No test score parsed — defaulting to 0.0')
|
log(f'[Wave3] ⚠️ No test score parsed — defaulting to 0.0')
|
||||||
combined = 0.0
|
combined = 0.0
|
||||||
mini_monaco = mini_monaco or 0.0
|
mini_monaco = mini_monaco or 0.0
|
||||||
warren_rwd = warren_rwd or 0.0
|
|
||||||
|
|
||||||
# 5. Update champion
|
# 5. Update champion
|
||||||
is_champion = champion.update_if_better(
|
is_champion = champion.update_if_better(
|
||||||
combined, proposed, model_zip, trial,
|
combined, proposed, model_zip, trial,
|
||||||
mini_monaco_reward=mini_monaco or 0.0,
|
mini_monaco_reward=mini_monaco or 0.0,
|
||||||
warren_reward=warren_rwd or 0.0,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# 6. Save result
|
# 6. Save result
|
||||||
save_result(trial, proposed, combined, mini_monaco, warren_rwd,
|
save_result(trial, proposed, combined, mini_monaco,
|
||||||
model_zip, is_champion, status, elapsed)
|
model_zip, is_champion, status, elapsed)
|
||||||
|
|
||||||
# 7. Update GP data
|
# 7. Update GP data
|
||||||
|
|
|
||||||
|
|
@ -79,12 +79,14 @@ def test_multitrack_runner_training_tracks_defined():
|
||||||
|
|
||||||
|
|
||||||
def test_multitrack_runner_test_tracks_defined():
|
def test_multitrack_runner_test_tracks_defined():
|
||||||
"""TEST_TRACKS must contain exactly 2 entries: mini_monaco and warren."""
|
"""TEST_TRACKS must contain exactly 1 entry: mini_monaco.
|
||||||
|
Warren was removed: CTE-based episode termination does not fire when the
|
||||||
|
car crosses the inside edge, so scores on Warren are unreliable."""
|
||||||
from multitrack_runner import TEST_TRACKS
|
from multitrack_runner import TEST_TRACKS
|
||||||
assert len(TEST_TRACKS) == 2
|
assert len(TEST_TRACKS) == 1
|
||||||
names = [t[0] for t in TEST_TRACKS]
|
names = [t[0] for t in TEST_TRACKS]
|
||||||
assert 'mini_monaco' in names
|
assert 'mini_monaco' in names
|
||||||
assert 'warren' in names
|
assert 'warren' not in names, 'Warren removed — broken episode termination on inside edge'
|
||||||
|
|
||||||
|
|
||||||
def test_multitrack_runner_no_model_save_before_definition():
|
def test_multitrack_runner_no_model_save_before_definition():
|
||||||
|
|
@ -393,20 +395,17 @@ def test_wave3_propose_random_when_few_results():
|
||||||
|
|
||||||
|
|
||||||
def test_wave3_parse_runner_output_combined_score():
|
def test_wave3_parse_runner_output_combined_score():
|
||||||
"""parse_runner_output() should extract combined_test_score correctly."""
|
"""parse_runner_output() should extract combined_test_score and mini_monaco correctly."""
|
||||||
from wave3_controller import parse_runner_output
|
from wave3_controller import parse_runner_output
|
||||||
|
|
||||||
output = """
|
output = """
|
||||||
[12:34:56] [W3 Runner][TEST] track=mini_monaco mean_reward=1234.56 mean_steps=450.0 ✅ DRIVES
|
[12:34:56] [W3 Runner][TEST] track=mini_monaco mean_reward=1234.56 mean_steps=450.0 ✅ DRIVES
|
||||||
[12:34:57] [W3 Runner][TEST] track=warren mean_reward=789.01 mean_steps=310.0 ✅ DRIVES
|
|
||||||
[12:34:57] [W3 Runner][TEST] mini_monaco_reward=1234.5600
|
[12:34:57] [W3 Runner][TEST] mini_monaco_reward=1234.5600
|
||||||
[12:34:57] [W3 Runner][TEST] warren_reward=789.0100
|
[12:34:57] [W3 Runner][TEST] combined_test_score=1234.5600
|
||||||
[12:34:57] [W3 Runner][TEST] combined_test_score=2023.5700
|
|
||||||
"""
|
"""
|
||||||
combined, mini_monaco, warren = parse_runner_output(output)
|
combined, mini_monaco = parse_runner_output(output)
|
||||||
assert combined == pytest.approx(2023.57, rel=1e-4)
|
assert combined == pytest.approx(1234.56, rel=1e-4)
|
||||||
assert mini_monaco == pytest.approx(1234.56, rel=1e-4)
|
assert mini_monaco == pytest.approx(1234.56, rel=1e-4)
|
||||||
assert warren == pytest.approx(789.01, rel=1e-4)
|
|
||||||
|
|
||||||
|
|
||||||
def test_wave3_parse_runner_output_missing_returns_none():
|
def test_wave3_parse_runner_output_missing_returns_none():
|
||||||
|
|
@ -414,10 +413,9 @@ def test_wave3_parse_runner_output_missing_returns_none():
|
||||||
from wave3_controller import parse_runner_output
|
from wave3_controller import parse_runner_output
|
||||||
|
|
||||||
output = 'Training started... timeout'
|
output = 'Training started... timeout'
|
||||||
combined, mini_monaco, warren = parse_runner_output(output)
|
combined, mini_monaco = parse_runner_output(output)
|
||||||
assert combined is None
|
assert combined is None
|
||||||
assert mini_monaco is None
|
assert mini_monaco is None
|
||||||
assert warren is None
|
|
||||||
|
|
||||||
|
|
||||||
def test_wave3_champion_tracker_update_and_load():
|
def test_wave3_champion_tracker_update_and_load():
|
||||||
|
|
@ -433,8 +431,7 @@ def test_wave3_champion_tracker_update_and_load():
|
||||||
params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000},
|
params={'learning_rate': 0.0002, 'steps_per_switch': 8000, 'total_timesteps': 150000},
|
||||||
model_zip_path=None,
|
model_zip_path=None,
|
||||||
trial=3,
|
trial=3,
|
||||||
mini_monaco_reward=900.0,
|
mini_monaco_reward=1500.0,
|
||||||
warren_reward=600.0,
|
|
||||||
)
|
)
|
||||||
assert updated is True
|
assert updated is True
|
||||||
assert tracker.best_score == pytest.approx(1500.0)
|
assert tracker.best_score == pytest.approx(1500.0)
|
||||||
|
|
@ -451,10 +448,8 @@ def test_wave3_champion_tracker_does_not_regress():
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
tracker = Wave3ChampionTracker(tmpdir)
|
tracker = Wave3ChampionTracker(tmpdir)
|
||||||
tracker.update_if_better(2000.0, {}, None, 1,
|
tracker.update_if_better(2000.0, {}, None, 1, mini_monaco_reward=2000.0)
|
||||||
mini_monaco_reward=1200.0, warren_reward=800.0)
|
updated = tracker.update_if_better(1500.0, {}, None, 2, mini_monaco_reward=1500.0)
|
||||||
updated = tracker.update_if_better(1500.0, {}, None, 2,
|
|
||||||
mini_monaco_reward=900.0, warren_reward=600.0)
|
|
||||||
assert updated is False
|
assert updated is False
|
||||||
assert tracker.best_score == pytest.approx(2000.0)
|
assert tracker.best_score == pytest.approx(2000.0)
|
||||||
|
|
||||||
|
|
@ -464,33 +459,27 @@ def test_wave3_results_appended_not_overwritten():
|
||||||
from wave3_controller import save_result
|
from wave3_controller import save_result
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
# Monkey-patch the RESULTS_FILE path
|
|
||||||
import wave3_controller
|
import wave3_controller
|
||||||
original_path = wave3_controller.RESULTS_FILE
|
original_path = wave3_controller.RESULTS_FILE
|
||||||
wave3_controller.RESULTS_FILE = os.path.join(tmpdir, 'phase3_results.jsonl')
|
wave3_controller.RESULTS_FILE = os.path.join(tmpdir, 'phase3_results.jsonl')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Write 3 records
|
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
save_result(
|
save_result(
|
||||||
trial=i + 1,
|
trial=i + 1,
|
||||||
params={'learning_rate': 0.0002, 'steps_per_switch': 5000,
|
params={'learning_rate': 0.0002, 'steps_per_switch': 5000,
|
||||||
'total_timesteps': 100000},
|
'total_timesteps': 100000},
|
||||||
combined=float(i * 100),
|
combined=float(i * 100),
|
||||||
mini_monaco=float(i * 60),
|
mini_monaco=float(i * 100),
|
||||||
warren_rwd=float(i * 40),
|
|
||||||
model_path=None,
|
model_path=None,
|
||||||
is_champion=(i == 2),
|
is_champion=(i == 2),
|
||||||
status='ok',
|
status='ok',
|
||||||
elapsed=120.0 * (i + 1),
|
elapsed=120.0 * (i + 1),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Should have 3 lines
|
|
||||||
with open(wave3_controller.RESULTS_FILE) as f:
|
with open(wave3_controller.RESULTS_FILE) as f:
|
||||||
lines = [l.strip() for l in f if l.strip()]
|
lines = [l.strip() for l in f if l.strip()]
|
||||||
assert len(lines) == 3, f'Expected 3 result lines, got {len(lines)}'
|
assert len(lines) == 3
|
||||||
|
|
||||||
# All should be valid JSON
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
rec = json.loads(line)
|
rec = json.loads(line)
|
||||||
assert 'combined_test_score' in rec
|
assert 'combined_test_score' in rec
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue