diff --git a/agent/autoresearch_controller.py b/agent/autoresearch_controller.py
index 2c0288e..7f25f50 100644
--- a/agent/autoresearch_controller.py
+++ b/agent/autoresearch_controller.py
@@ -39,9 +39,9 @@ RESULTS_DIR = os.path.join(PROJECT_DIR, 'outerloop-results')
 MODELS_DIR = os.path.join(PROJECT_DIR, 'models')
 CHAMPION_DIR = os.path.join(MODELS_DIR, 'champion')
 
-# Phase 1 uses a separate results file — do NOT mix with random-policy data
-PHASE1_RESULTS = os.path.join(RESULTS_DIR, 'autoresearch_results_phase1.jsonl')
-PHASE1_LOG = os.path.join(RESULTS_DIR, 'autoresearch_phase1_log.txt')
+# Phase 2 uses a separate results file — corner learning with longer timesteps
+PHASE1_RESULTS = os.path.join(RESULTS_DIR, 'autoresearch_results_phase2.jsonl')
+PHASE1_LOG = os.path.join(RESULTS_DIR, 'autoresearch_phase2_log.txt')
 
 # Legacy base data (discretization insights, valid for n_steer/n_throttle)
 BASE_DATA_FILE = os.path.join(RESULTS_DIR, 'clean_sweep_results.jsonl')
@@ -52,28 +52,30 @@ os.makedirs(CHAMPION_DIR, exist_ok=True)
 
 # ---- Parameter Space ----
 # These are the parameters GP+UCB will optimize
-# NOTE: timesteps kept small (1000-5000) for Phase 1 exploration on CPU.
-# DonkeyCar sim runs ~20-50 steps/sec. 5000 steps ≈ 100-250s → fits in 600s timeout.
-# Increase max_timesteps once we confirm the pipeline works end-to-end.
+# PHASE 2: Corner Learning
+# Phase 1 confirmed genuine driving (599 steps, mean_reward=1022, efficiency ~99%).
+# Failure point: S-curve at step ~560 — too fast, doesn't learn left-turn recovery.
+# Fix: Much longer training so model experiences the S-curve many times.
+# Search space tightened around Phase 1 winning region: lr=0.00005-0.002, n_throttle=2-5
 PARAM_SPACE = {
     'n_steer':       {'type': 'int',   'min': 3,       'max': 9},
     'n_throttle':    {'type': 'int',   'min': 2,       'max': 5},
-    'learning_rate': {'type': 'float', 'min': 0.00005, 'max': 0.005},
-    'timesteps':     {'type': 'int',   'min': 1000,    'max': 5000},
+    'learning_rate': {'type': 'float', 'min': 0.00005, 'max': 0.002},
+    'timesteps':     {'type': 'int',   'min': 10000,   'max': 50000},
 }
 PARAM_KEYS = list(PARAM_SPACE.keys())
 
 # Fixed params
 FIXED_PARAMS = {
     'agent': 'ppo',
-    'eval_episodes': 3,
+    'eval_episodes': 5,   # More eval episodes — corner performance is stochastic
     'reward_shaping': True,
 }
 
 N_CANDIDATES = 500
 UCB_KAPPA = 2.0
 MIN_TRIALS_BEFORE_GP = 3
-JOB_TIMEOUT = 480  # 8 minutes — enough for 5000 steps + eval, with margin
+JOB_TIMEOUT = 3600  # 60 min per trial — 50k steps on CPU needs time
 
 # ---- Logging ----
 def log(msg):
@@ -222,7 +224,7 @@ class ChampionTracker:
 
 # ---- Load Results ----
 def load_phase1_results():
-    """Load Phase 1 results only — no random-policy contamination."""
+    """Load Phase 2 results for GP fitting (corner learning runs)."""
     results = []
     if not os.path.exists(PHASE1_RESULTS):
         return results
diff --git a/agent/evaluate_champion.py b/agent/evaluate_champion.py
new file mode 100644
index 0000000..3cff14f
--- /dev/null
+++ b/agent/evaluate_champion.py
@@ -0,0 +1,169 @@
+"""
+Champion Model Evaluator
+========================
+Loads the champion model and runs it live in the simulator for visual inspection.
+Prints per-step diagnostics: position, speed, CTE, efficiency, reward.
+
+Usage:
+    python3 evaluate_champion.py [--episodes N] [--steps N]
+
+Watch the simulator window to see if the car is genuinely driving the track
+or exploiting circular motion.
+"""
+
+import os
+import sys
+import time
+import json
+import numpy as np
+from collections import deque
+
+import gymnasium as gym
+import gym_donkeycar
+from stable_baselines3 import PPO
+
+# Add agent dir to path for wrappers
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from reward_wrapper import SpeedRewardWrapper
+from donkeycar_sb3_runner import ThrottleClampWrapper
+
+CHAMPION_DIR = os.path.join(os.path.dirname(__file__), 'models', 'champion')
+MANIFEST_PATH = os.path.join(CHAMPION_DIR, 'manifest.json')
+MODEL_PATH = os.path.join(CHAMPION_DIR, 'model.zip')
+
+
+def load_manifest():
+    with open(MANIFEST_PATH) as f:
+        return json.load(f)
+
+
+def print_banner(manifest):
+    print('=' * 65, flush=True)
+    print('🏆 DonkeyCar Champion Model Evaluation', flush=True)
+    print('=' * 65, flush=True)
+    print(f"  Trial:        {manifest['trial']}", flush=True)
+    print(f"  mean_reward:  {manifest['mean_reward']:.4f}", flush=True)
+    print(f"  Params:       {manifest['params']}", flush=True)
+    print(f"  Model:        {MODEL_PATH}", flush=True)
+    print('=' * 65, flush=True)
+    print(flush=True)
+
+
+def compute_efficiency(pos_history):
+    """Path efficiency = net_displacement / total_path_length over window."""
+    if len(pos_history) < 3:
+        return 1.0
+    positions = list(pos_history)
+    net = np.linalg.norm(np.array(positions[-1]) - np.array(positions[0]))
+    total = sum(
+        np.linalg.norm(np.array(positions[i+1]) - np.array(positions[i]))
+        for i in range(len(positions)-1)
+    )
+    return float(net / total) if total > 1e-6 else 1.0
+
+
+def run_episode(model, env, episode_num, max_steps=500):
+    """Run one episode with the champion policy, printing diagnostics."""
+    print(f'\n--- Episode {episode_num} ---', flush=True)
+    obs, info = env.reset()
+    pos_history = deque(maxlen=30)
+    total_reward = 0.0
+    step = 0
+
+    print(f'{"Step":>5} {"Speed":>6} {"CTE":>7} {"Eff%":>6} {"Rwd":>8} {"TotRwd":>10} {"Pos_x":>8} {"Pos_z":>8}', flush=True)
+    print('-' * 65, flush=True)
+
+    while step < max_steps:
+        action, _ = model.predict(obs, deterministic=True)
+        result = env.step(action)
+        if len(result) == 5:
+            obs, reward, terminated, truncated, info = result
+            done = terminated or truncated
+        else:
+            obs, reward, done, info = result
+
+        # Extract diagnostics from info
+        speed = float(info.get('speed', 0.0) or 0.0)
+        cte = float(info.get('cte', 0.0) or 0.0)
+        pos = info.get('pos', None)
+        if pos is not None:
+            pos_history.append(list(pos)[:3])
+            px, pz = pos[0], pos[2] if len(pos) > 2 else 0.0
+        else:
+            px, pz = 0.0, 0.0
+
+        efficiency = compute_efficiency(pos_history)
+        total_reward += reward
+        step += 1
+
+        # Print every 10 steps or on done
+        if step % 10 == 0 or done:
+            print(f'{step:>5} {speed:>6.2f} {cte:>7.3f} {efficiency*100:>5.1f}% {reward:>8.3f} {total_reward:>10.2f} {px:>8.2f} {pz:>8.2f}', flush=True)
+
+        if done:
+            print(f'\n  ✅ Episode {episode_num} done after {step} steps | total_reward={total_reward:.2f}', flush=True)
+            break
+
+    if step >= max_steps:
+        print(f'\n  ⏱️  Episode {episode_num} reached max_steps={max_steps} | total_reward={total_reward:.2f}', flush=True)
+
+    return total_reward, step
+
+
+def main(episodes=3, max_steps=500):
+    manifest = load_manifest()
+    print_banner(manifest)
+
+    params = manifest['params']
+
+    print(f'[Eval] Connecting to simulator...', flush=True)
+    try:
+        env = gym.make('donkey-generated-roads-v0')
+    except Exception as e:
+        print(f'[Eval] FAILED to connect: {e}', flush=True)
+        sys.exit(1)
+
+    # Apply same wrappers as training
+    env = ThrottleClampWrapper(env, throttle_min=0.2)
+    env = SpeedRewardWrapper(env, speed_scale=0.1)
+    print(f'[Eval] Wrappers applied: ThrottleClamp(min=0.2), SpeedRewardWrapper(scale=0.1)', flush=True)
+
+    print(f'[Eval] Loading champion model from {MODEL_PATH}...', flush=True)
+    try:
+        model = PPO.load(MODEL_PATH, env=env)
+        print(f'[Eval] Model loaded successfully.', flush=True)
+    except Exception as e:
+        print(f'[Eval] FAILED to load model: {e}', flush=True)
+        env.close()
+        sys.exit(1)
+
+    print(f'\n[Eval] Running {episodes} episodes (max {max_steps} steps each)...', flush=True)
+    print('[Eval] Watch the simulator window — is the car driving the track or circling?', flush=True)
+
+    all_rewards = []
+    for ep in range(1, episodes + 1):
+        total_reward, steps = run_episode(model, env, ep, max_steps=max_steps)
+        all_rewards.append(total_reward)
+        if ep < episodes:
+            time.sleep(2)  # Brief pause between episodes
+
+    print('\n' + '=' * 65, flush=True)
+    print('📊 Evaluation Complete', flush=True)
+    print(f'  Episodes:      {episodes}', flush=True)
+    print(f'  Rewards:       {[f"{r:.1f}" for r in all_rewards]}', flush=True)
+    print(f'  Mean reward:   {sum(all_rewards)/len(all_rewards):.2f}', flush=True)
+    print(f'  Std reward:    {float(np.std(all_rewards)):.2f}', flush=True)
+    print('=' * 65, flush=True)
+
+    env.close()
+    time.sleep(2)
+    print('[Eval] Done.', flush=True)
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--episodes', type=int, default=3, help='Number of eval episodes')
+    parser.add_argument('--steps', type=int, default=500, help='Max steps per episode')
+    args = parser.parse_args()
+    main(episodes=args.episodes, max_steps=args.steps)
diff --git a/agent/outerloop-results/autoresearch_phase1_log.txt b/agent/outerloop-results/autoresearch_phase1_log.txt
index 5bde57a..35a0dad 100644
--- a/agent/outerloop-results/autoresearch_phase1_log.txt
+++ b/agent/outerloop-results/autoresearch_phase1_log.txt
@@ -1991,3 +1991,4 @@
 [2026-04-13 19:18:00]     mean_reward=3332.0024  params={'n_steer': 4, 'n_throttle': 3, 'learning_rate': 0.0010146909128518657, 'timesteps': 4979, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
 [2026-04-13 19:18:00]     mean_reward=2306.7610  params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0004488352572615814, 'timesteps': 4898, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
 [2026-04-13 19:18:00]     mean_reward=2286.9085  params={'n_steer': 5, 'n_throttle': 3, 'learning_rate': 0.0003386484278685721, 'timesteps': 4977, 'agent': 'ppo', 'eval_episodes': 3, 'reward_shaping': True}
+[2026-04-13 19:18:01] [AutoResearch] Git push complete after trial 50
diff --git a/docs/RESEARCH_LOG.md b/docs/RESEARCH_LOG.md
index d4e9676..e408932 100644
--- a/docs/RESEARCH_LOG.md
+++ b/docs/RESEARCH_LOG.md
@@ -247,3 +247,80 @@ shaped_reward = original_reward × (1 + speed_scale × speed × efficiency)
 | > 50% | Unstable policy, inconsistent behavior |
 
 This metric will be added to the autoresearch result logging and summary.
+
+---
+
+## 2026-04-13 — 🏆 PHASE 1 MILESTONE: Genuine Track Driving Confirmed!
+
+### Finding: Champion Model Drives the Track — Real RL Behaviour Proven
+
+**This is the first confirmed genuine driving result from the autoresearch pipeline.**
+
+**Visual confirmation (user):** "It is definitely driving! The donkeycar is driving along the track!"
+
+**Evaluation data — 3 episodes, 1500 max steps:**
+
+| Episode | Steps | Total Reward | Std   | Efficiency |
+|---------|-------|-------------|-------|------------|
+| 1       | 599   | 1022.73     | —     | 96-100%    |
+| 2       | 598   | 1023.35     | —     | 96-100%    |
+| 3       | 599   | 1022.25     | —     | 96-100%    |
+| **Mean** | **599** | **1022.78** | **0.45** | **~99%** |
+
+**Champion Model Parameters:**
+- agent: PPO, n_steer=7, n_throttle=3, lr=0.000680, timesteps=4787
+- Path: `agent/models/champion/model.zip`
+
+### Track Trajectory Analysis
+
+```
+Start:    Pos(6.25,  6.30)   → Starting line
+Step 300: Pos(22.80, 2.09)   → Long straight, approaching first corner
+Step 400: Pos(18.80, -6.96)  → Negotiating first right-hand curve ✅
+Step 500: Pos(28.12, -5.61)  → Continuing along second straight
+Step 560: Pos(33.12, -6.55)  → Approaching second corner
+Step 599: CRASH CTE=8.26     → Off track at second corner ❌
+```
+
+The car successfully:
+- Accelerates from 0 → 2.3 m/s along the straight
+- Navigates the first right-hand curve
+- Follows the track for ~600 steps covering ~30+ position units
+
+### Failure Analysis: The S-Curve Crash
+
+**User observation:** "The spot where the donkeycar goes off the track is during a right hand curve which quickly turns into a left hand curve. It doesn't even look like it sees the left hand curve."
+
+**What the data shows:**
+- Steps 540-560: CTE briefly near zero (0.24) — car approaches corner well
+- Steps 570+: CTE explodes 1.4 → 3.8 → 5.9 → 8.3 — car overshoots
+- Speed at crash: 2.23-2.30 m/s — too fast for the S-curve
+
+**Root cause:** Only 4787 training timesteps — insufficient to learn:
+1. Speed reduction approaching corners
+2. Left-turn recovery after right-hand overshoot
+3. S-curve geometry (right → quick left transition)
+
+**Key insight: The model never sees the left-hand curve** because it has always crashed at the right-hand part first during training. This is an exploration problem — the car needs more timesteps to get past this point and discover what's beyond.
+
+### Reward Shaping Victory
+
+All 3 reward hacking fixes proved necessary and correct:
+- v1 additive → boundary oscillation exploit
+- v2 multiplicative → circular driving exploit
+- v3 path efficiency → genuine forward driving ✅
+
+The path efficiency metric (96-100% throughout entire run) confirms the car is making continuous forward progress — not circling, not oscillating.
+
+### Phase 1 → Phase 2 Transition
+
+**Phase 1 objective achieved:** A real PPO model drives the DonkeyCar track with genuine forward motion, consistent behaviour (std=0.45), and correct trajectory.
+
+**Next objective (targeted autoresearch):** Learn corner handling and speed modulation.
+- Increase timesteps to 10,000-50,000 per trial
+- The model needs to see the S-curve many times to learn the transition
+- Consider adding a CTE-rate-of-change penalty to discourage high speed at high CTE
+
+### This is Research!
+
+The reward hacking discovery and the progression from random walk → boundary oscillation → circular exploit → genuine driving represents real empirical RL research. Each failure mode revealed a fundamental property of reward design. The path efficiency fix was an original contribution to solving the circular driving problem without requiring track-shape knowledge.