From 66be5d83ff51a5e5bfb66b55210e7779384a5b67 Mon Sep 17 00:00:00 2001
From: paulh <paje0101+demo01@gmail.com>
Date: Thu, 9 Apr 2026 21:56:03 -0400
Subject: [PATCH] fix(ralph-loop): replace with three-tier session reset
 detection from master
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Supersedes the simple polling version written this session. The master
harness version (from the other machine) has:
  Tier 1 — Anthropic API probe via ANTHROPIC_API_KEY if available
  Tier 2 — Parse reset time from agent output ("resets 11am America/New_York")
  Tier 3 — Seeded --session-ends timestamp argument
  Tier 4 — Fixed fallback sleep (--retry-wait, default 1800s)

Agent: human
Tests: N/A
Tests-Added: 0
TypeScript: N/A
---
 ralph-loop.sh | 517 +++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 409 insertions(+), 108 deletions(-)

diff --git a/ralph-loop.sh b/ralph-loop.sh
index 7eb9931..deb6289 100755
--- a/ralph-loop.sh
+++ b/ralph-loop.sh
@@ -1,54 +1,62 @@
 #!/usr/bin/env bash
 #
-# Ralph Wiggum Loop — Autonomous agent iteration
+# Ralph Wiggum Loop — Script-Orchestrated Autonomous Agent Iteration
 #
-# Based on Geoffrey Huntley's approach:
-# - Each iteration spawns a FRESH agent with clean context
-# - Agent reads the plan, picks ONE task, implements, tests, commits, exits
-# - Loop restarts until all tasks are done
+# This runtime is for the "script is the orchestrator" model:
+# - The shell loop spawns a fresh agent every iteration
+# - The shell loop interprets runtime signals and failures
+# - The shell loop decides when to retry, stop, or wait for token reset
 #
-# Session limit handling:
-# - Detects Claude Pro usage limit messages in agent output
-# - Polls every SESSION_POLL_INTERVAL seconds until the session resets
-# - Resumes the same iteration automatically — no manual intervention needed
+# This is different from the "agent is the orchestrator" model used in
+# OpenClaw/manual orchestration, where a supervising agent evaluates results,
+# watches execution boards, and decides what to do next.
 #
 # Usage:
-#   ./ralph-loop.sh              # Build mode (default)
-#   ./ralph-loop.sh plan         # Planning mode (create IMPLEMENTATION_PLAN.md)
-#   ./ralph-loop.sh --max 20     # Limit to 20 iterations
-#   ./ralph-loop.sh --agent claude  # Use claude (default)
-#   ./ralph-loop.sh --agent codex   # Use OpenAI Codex CLI
-#   ./ralph-loop.sh --agent aider   # Use Aider
-#   ./ralph-loop.sh --agent gemini  # Use Gemini CLI
-#   ./ralph-loop.sh --agent custom  # Use custom agent (see below)
+#   ./ralph-loop.sh                               # Build mode (default)
+#   ./ralph-loop.sh plan                          # Planning mode
+#   ./ralph-loop.sh --max 20                      # Limit iterations
+#   ./ralph-loop.sh --agent claude                # Use claude (default)
+#   ./ralph-loop.sh --session-ends 2026-04-09T16:00:00
+#   ./ralph-loop.sh --retry-wait 1800
+#   ./ralph-loop.sh --board .harness/foo/execution-board.md
+#   ./ralph-loop.sh --no-require-pro
+#
+# Token / rate-limit handling:
+#   Tier 1 — Anthropic API probe if ANTHROPIC_API_KEY is available
+#   Tier 2 — Parse "resets 11am (America/New_York)" from agent output
+#   Tier 3 — Use seeded --session-ends time
+#   Tier 4 — Fixed fallback sleep
 #
 set -euo pipefail
 
-MODE="${1:-build}"
+MODE="build"
 MAX_ITERATIONS=50
 AGENT="claude"
 PLAN_FILE="IMPLEMENTATION_PLAN.md"
 SPEC_FILE="PROJECT-SPEC.md"
 AGENT_FILE="AGENT.md"
+BOARD_FILE=""
 LOG_DIR=".ralph-logs"
+RATE_LIMIT_WAIT=1800
+SESSION_ENDS=""
+REQUIRE_PRO=1
 
-# How often (in seconds) to probe whether the session has reset.
-# Default: 10 minutes. Adjust down if you want faster recovery.
-SESSION_POLL_INTERVAL="${SESSION_POLL_INTERVAL:-600}"
-
-# Parse arguments
-shift 2>/dev/null || true
 while [[ $# -gt 0 ]]; do
   case "$1" in
-    --max) MAX_ITERATIONS="$2"; shift 2 ;;
-    --agent) AGENT="$2"; shift 2 ;;
+    plan)              MODE="plan";             shift ;;
+    build)             MODE="build";            shift ;;
+    --max)             MAX_ITERATIONS="$2";     shift 2 ;;
+    --agent)           AGENT="$2";              shift 2 ;;
+    --retry-wait)      RATE_LIMIT_WAIT="$2";    shift 2 ;;
+    --session-ends)    SESSION_ENDS="$2";       shift 2 ;;
+    --board)           BOARD_FILE="$2";         shift 2 ;;
+    --no-require-pro)  REQUIRE_PRO=0;           shift ;;
     *) echo "Unknown option: $1"; exit 1 ;;
   esac
 done
 
 mkdir -p "$LOG_DIR"
 
-# Colors
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 RED='\033[0;31m'
@@ -62,16 +70,282 @@ warn()    { echo -e "${YELLOW}[ralph]${NC} $1"; }
 error()   { echo -e "${RED}[ralph]${NC} $1"; }
 info()    { echo -e "${CYAN}[ralph]${NC} $1"; }
 
-# Check prerequisites
+AGENT_EXIT_CODE=0
+
+get_claude_analysis_auth_json() {
+  env -u ANTHROPIC_API_KEY bash -ic 'claude auth status' 2>/dev/null | tail -n +1
+}
+
+verify_claude_pro_auth() {
+  local auth_json
+  auth_json=$(get_claude_analysis_auth_json)
+  if [[ -z "$auth_json" ]]; then
+    error "Could not determine Claude analysis auth status."
+    return 1
+  fi
+
+  AUTH_JSON="$auth_json" python3 - <<'PY'
+import json
+import os
+import sys
+
+data = json.loads(os.environ["AUTH_JSON"])
+if data.get("loggedIn") and data.get("subscriptionType") == "pro":
+    print("ok")
+    sys.exit(0)
+
+print(json.dumps(data, ensure_ascii=True))
+sys.exit(1)
+PY
+}
+
+log_agent_runtime() {
+  case "$AGENT" in
+    claude)
+      local claude_path claude_version auth_json
+      claude_path=$(bash -ic 'command -v claude' 2>/dev/null | tail -n 1 || true)
+      claude_version=$(bash -ic 'claude --version' 2>/dev/null | tail -n 1 || true)
+      auth_json=$(get_claude_analysis_auth_json)
+      log "Claude binary: ${claude_path:-not found}"
+      log "Claude version: ${claude_version:-unknown}"
+      if [[ -n "${ANTHROPIC_API_KEY:-}" ]]; then
+        log "Claude auth hint: ANTHROPIC_API_KEY is set (API probe enabled)"
+      else
+        log "Claude auth hint: ANTHROPIC_API_KEY is not set"
+      fi
+      if [[ -n "$auth_json" ]]; then
+        log "Claude analysis auth: $(AUTH_JSON="$auth_json" python3 - <<'PY'
+import json
+import os
+
+data = json.loads(os.environ["AUTH_JSON"])
+print(f"authMethod={data.get('authMethod')} subscriptionType={data.get('subscriptionType')} apiKeySource={data.get('apiKeySource')}")
+PY
+)"
+      fi
+      ;;
+  esac
+}
+
 if [[ ! -f "$SPEC_FILE" ]]; then
   error "Missing $SPEC_FILE — create your project spec first."
   exit 1
 fi
-
 if [[ ! -f "$AGENT_FILE" ]]; then
   warn "No $AGENT_FILE found. Using default agent instructions."
 fi
 
+probe_rate_limit() {
+  if [[ -z "${ANTHROPIC_API_KEY:-}" ]]; then
+    return 1
+  fi
+
+  local headers
+  headers=$(curl -s -D - -o /dev/null \
+    --max-time 10 \
+    -X POST "https://api.anthropic.com/v1/messages" \
+    -H "x-api-key: $ANTHROPIC_API_KEY" \
+    -H "anthropic-version: 2023-06-01" \
+    -H "content-type: application/json" \
+    -d '{"model":"claude-haiku-4-5-20251001","max_tokens":1,"messages":[{"role":"user","content":"hi"}]}' \
+    2>/dev/null) || return 1
+
+  local reset_str remaining
+  reset_str=$(echo "$headers" | grep -i "anthropic-ratelimit-output-tokens-reset:" | awk '{print $2}' | tr -d '\r\n')
+  remaining=$(echo "$headers" | grep -i "anthropic-ratelimit-output-tokens-remaining:" | awk '{print $2}' | tr -d '\r\n')
+
+  if [[ -z "$reset_str" ]]; then
+    return 1
+  fi
+
+  local reset_epoch
+  reset_epoch=$(date -d "$reset_str" +%s 2>/dev/null) \
+    || reset_epoch=$(python3 -c "
+from datetime import datetime, timezone
+import sys
+s = sys.argv[1].strip()
+for fmt in ('%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S+00:00', '%Y-%m-%dT%H:%M:%S%z'):
+    try:
+        dt = datetime.strptime(s, fmt)
+        if dt.tzinfo is None:
+            dt = dt.replace(tzinfo=timezone.utc)
+        print(int(dt.timestamp()))
+        break
+    except Exception:
+        pass
+" "$reset_str" 2>/dev/null) || return 1
+
+  echo "${reset_epoch}|${remaining:-unknown}"
+}
+
+parse_epoch() {
+  local ts="$1"
+  date -d "$ts" +%s 2>/dev/null \
+    || python3 -c "
+from datetime import datetime, timezone
+import sys
+s = sys.argv[1]
+for fmt in ('%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%d %H:%M:%S',
+            '%Y-%m-%dT%H:%M:%S%z', '%Y-%m-%dT%H:%M:%S+00:00'):
+    try:
+        dt = datetime.strptime(s, fmt)
+        if dt.tzinfo is None:
+            dt = dt.replace(tzinfo=timezone.utc)
+        print(int(dt.timestamp()))
+        break
+    except Exception:
+        pass
+" "$ts" 2>/dev/null || true
+}
+
+format_session_end() {
+  local epoch="$1"
+  date -d "@$epoch" +"%Y-%m-%dT%H:%M:%S" 2>/dev/null \
+    || date -r "$epoch" +"%Y-%m-%dT%H:%M:%S" 2>/dev/null \
+    || echo ""
+}
+
+infer_reset_epoch_from_log() {
+  local logfile="$1"
+
+  python3 - "$logfile" <<'PY' 2>/dev/null || true
+from datetime import datetime, timedelta
+from pathlib import Path
+import re
+import sys
+
+try:
+    from zoneinfo import ZoneInfo
+except Exception:
+    ZoneInfo = None
+
+logfile = Path(sys.argv[1])
+if not logfile.exists():
+    raise SystemExit(0)
+
+text = logfile.read_text(encoding="utf-8", errors="ignore")
+matches = list(re.finditer(r"resets\s+(\d{1,2})(?::(\d{2}))?\s*(am|pm)\s*\(([^)]+)\)", text, re.IGNORECASE))
+if not matches:
+    raise SystemExit(0)
+
+match = matches[-1]
+hour = int(match.group(1))
+minute = int(match.group(2) or "0")
+ampm = match.group(3).lower()
+tz_name = match.group(4).strip()
+
+if hour == 12:
+    hour = 0
+if ampm == "pm":
+    hour += 12
+
+if ZoneInfo is None:
+    raise SystemExit(0)
+
+tz = ZoneInfo(tz_name)
+now = datetime.now(tz)
+candidate = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
+if candidate <= now:
+    candidate += timedelta(days=1)
+
+print(int(candidate.timestamp()))
+PY
+}
+
+countdown_sleep() {
+  local target_epoch=$1
+  local label="${2:-token reset}"
+  local now
+  while true; do
+    now=$(date +%s)
+    local remaining=$(( target_epoch - now ))
+    if [[ $remaining -le 0 ]]; then
+      break
+    fi
+    local h=$(( remaining / 3600 ))
+    local m=$(( (remaining % 3600) / 60 ))
+    local s=$(( remaining % 60 ))
+    printf "\r${YELLOW}[ralph]${NC} Waiting for %s... %02dh%02dm%02ds remaining   " "$label" "$h" "$m" "$s"
+    sleep 5
+  done
+  echo ""
+}
+
+wait_for_tokens() {
+  local logfile="${1:-}"
+  warn "Rate limit / token exhaustion detected."
+  echo ""
+
+  local wake_epoch="" wake_source=""
+
+  info "Tier 1 — probing Anthropic API for exact reset time..."
+  local probe_result
+  if probe_result=$(probe_rate_limit); then
+    local probe_epoch probe_remaining
+    probe_epoch="${probe_result%%|*}"
+    probe_remaining="${probe_result##*|}"
+    local now
+    now=$(date +%s)
+    if [[ -n "$probe_epoch" && "$probe_epoch" -gt "$now" ]]; then
+      wake_epoch=$probe_epoch
+      wake_source="API probe"
+      info "Tokens remaining: ${probe_remaining}. Reset at: $(date -d "@$probe_epoch" 2>/dev/null || date -r "$probe_epoch" 2>/dev/null || echo "$probe_epoch")"
+    else
+      info "Probe succeeded but reset time is already past — tokens may have reset. Retrying immediately."
+      return 0
+    fi
+  else
+    warn "Tier 1 unavailable (no ANTHROPIC_API_KEY or probe failed)."
+  fi
+
+  if [[ -z "$wake_epoch" && -n "$logfile" ]]; then
+    info "Tier 2 — parsing reset time from agent output..."
+    local log_epoch
+    log_epoch=$(infer_reset_epoch_from_log "$logfile") || true
+    if [[ -n "$log_epoch" ]]; then
+      wake_epoch=$(( log_epoch + 60 ))
+      wake_source="agent output"
+      SESSION_ENDS=$(format_session_end "$log_epoch")
+      info "Detected reset at: $(date -d "@$log_epoch" 2>/dev/null || date -r "$log_epoch" 2>/dev/null || echo "$log_epoch")"
+      if [[ -n "$SESSION_ENDS" ]]; then
+        info "Updated --session-ends seed to $SESSION_ENDS"
+      fi
+    else
+      warn "Could not extract a reset time from $logfile."
+    fi
+  fi
+
+  if [[ -z "$wake_epoch" && -n "$SESSION_ENDS" ]]; then
+    info "Tier 3 — using --session-ends $SESSION_ENDS..."
+    local seed_epoch
+    seed_epoch=$(parse_epoch "$SESSION_ENDS") || true
+    if [[ -n "$seed_epoch" ]]; then
+      local now
+      now=$(date +%s)
+      if [[ "$seed_epoch" -gt "$now" ]]; then
+        wake_epoch=$(( seed_epoch + 60 ))
+        wake_source="session seed (--session-ends)"
+        info "Will wake at: $(date -d "@$wake_epoch" 2>/dev/null || date -r "$wake_epoch" 2>/dev/null || echo "$wake_epoch") (+60s buffer)"
+      else
+        warn "--session-ends is stale (already past). Ignoring it for this retry."
+      fi
+    else
+      warn "Could not parse --session-ends value: '$SESSION_ENDS'"
+    fi
+  fi
+
+  if [[ -z "$wake_epoch" ]]; then
+    warn "Tier 4 — no reset time available. Sleeping ${RATE_LIMIT_WAIT}s ($(( RATE_LIMIT_WAIT / 60 )) min)."
+    warn "Tip: set ANTHROPIC_API_KEY or pass --session-ends for a smarter wake-up."
+    wake_epoch=$(( $(date +%s) + RATE_LIMIT_WAIT ))
+    wake_source="fixed wait"
+  fi
+
+  info "Strategy: $wake_source. Press Ctrl+C to cancel."
+  countdown_sleep "$wake_epoch" "token reset"
+  log "Wake-up time reached. Retrying..."
+}
+
 run_agent() {
   local iteration=$1
   local mode=$2
@@ -86,12 +360,23 @@ run_agent() {
 
   log "Iteration $iteration ($mode mode) — starting fresh agent..."
 
-  # Disable pipefail around the agent call so a non-zero claude exit doesn't
-  # kill the script. We inspect the log content instead.
+  if [[ "$AGENT" == "claude" && "$REQUIRE_PRO" == "1" ]]; then
+    if ! verify_claude_pro_auth >/tmp/ralph-auth-check.out 2>/tmp/ralph-auth-check.err; then
+      error "Claude analysis auth is not using Pro. Refusing to run."
+      if [[ -s /tmp/ralph-auth-check.out ]]; then
+        error "Auth details: $(tail -n 1 /tmp/ralph-auth-check.out)"
+      fi
+      if [[ -s /tmp/ralph-auth-check.err ]]; then
+        error "Auth check stderr: $(tail -n 1 /tmp/ralph-auth-check.err)"
+      fi
+      exit 1
+    fi
+  fi
+
   set +e
   case "$AGENT" in
     claude)
-      echo "$prompt" | claude -p --output-format text 2>&1 | tee "$logfile"
+      echo "$prompt" | env -u ANTHROPIC_API_KEY claude -p --dangerously-skip-permissions --output-format text 2>&1 | tee "$logfile"
       ;;
     codex)
       echo "$prompt" | codex 2>&1 | tee "$logfile"
@@ -107,87 +392,72 @@ run_agent() {
         ./custom-agent.sh "$prompt" 2>&1 | tee "$logfile"
       else
         error "Custom agent selected but ./custom-agent.sh not found or not executable"
+        set -e
         exit 1
       fi
       ;;
     *)
-      error "Unknown agent: $AGENT"
-      error "Supported agents: claude, codex, aider, gemini, custom"
+      error "Unknown agent: $AGENT. Supported: claude, codex, aider, gemini, custom"
+      set -e
       exit 1
       ;;
   esac
+  AGENT_EXIT_CODE=$?
   set -e
-
-  return 0
-}
-
-# Probe whether claude is available by sending a trivial request.
-# Returns 0 if available, 1 if still rate-limited or erroring.
-probe_session() {
-  local probe_log="$LOG_DIR/probe.log"
-  set +e
-  echo "Reply with the single word OK and nothing else." \
-    | claude -p --output-format text > "$probe_log" 2>&1
-  local rc=$?
-  set -e
-
-  if [[ $rc -ne 0 ]]; then
-    return 1
-  fi
-  # Also check the output doesn't contain a limit message
-  if grep -qi 'usage limit\|rate limit\|limit reached\|exceeded.*limit' "$probe_log" 2>/dev/null; then
-    return 1
-  fi
   return 0
 }
 
 check_output() {
   local logfile="$1"
 
-  # Session / usage limit — must check BEFORE generic promise checks
-  if grep -qi 'usage limit\|rate limit\|limit reached\|exceeded.*limit\|Claude AI usage' "$logfile" 2>/dev/null; then
-    return 4  # Rate limited
-  fi
-
   if grep -q '<promise>DONE</promise>' "$logfile" 2>/dev/null; then
-    return 0  # Done
+    return 0
   elif grep -q '<promise>STUCK</promise>' "$logfile" 2>/dev/null; then
-    return 2  # Stuck — needs human intervention
+    return 2
   elif grep -q '<promise>ERROR</promise>' "$logfile" 2>/dev/null; then
-    return 3  # Unrecoverable error
+    return 3
+  elif grep -Eqi "rate.limit|rate_limit|too many requests|exceeded.*quota|usage limit|out of tokens|overloaded|you'?ve hit your limit|resets [0-9]{1,2}(:[0-9]{2})?(am|pm)" "$logfile" 2>/dev/null; then
+    return 4
   else
-    return 1  # Normal iteration — continue
+    return 1
   fi
 }
 
-wait_for_session_reset() {
-  local iteration=$1
-  warn "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-  warn "Session usage limit hit during iteration $iteration."
-  warn "Will probe every ${SESSION_POLL_INTERVAL}s until session resets."
-  warn "No manual action needed — loop will resume automatically."
-  warn "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+plan_has_remaining_work() {
+  if [[ ! -f "$PLAN_FILE" ]]; then
+    return 1
+  fi
 
-  local attempt=0
-  while true; do
-    ((attempt++))
-    local next_check
-    next_check=$(date -d "+${SESSION_POLL_INTERVAL} seconds" '+%H:%M:%S' 2>/dev/null \
-      || date -v "+${SESSION_POLL_INTERVAL}S" '+%H:%M:%S' 2>/dev/null \
-      || echo "soon")
-    info "Probe attempt $attempt — next check at $next_check..."
-    sleep "$SESSION_POLL_INTERVAL"
+  if grep -Eq '^- \[ \]' "$PLAN_FILE" 2>/dev/null; then
+    return 0
+  fi
 
-    if probe_session; then
-      success "Session available! Resuming iteration $iteration..."
-      return 0
-    else
-      warn "Still rate-limited (attempt $attempt). Waiting another ${SESSION_POLL_INTERVAL}s..."
-    fi
-  done
+  return 1
 }
 
-# ─── Main ────────────────────────────────────────────────────────────────────
+board_has_remaining_work() {
+  if [[ -z "$BOARD_FILE" || ! -f "$BOARD_FILE" ]]; then
+    return 1
+  fi
+
+  if grep -Eq '\| .*⬜ Pending .* \||\| .*🔄 In Progress .* \|' "$BOARD_FILE" 2>/dev/null; then
+    return 0
+  fi
+
+  return 1
+}
+
+has_remaining_work() {
+  if board_has_remaining_work; then
+    return 0
+  fi
+
+  if plan_has_remaining_work; then
+    return 0
+  fi
+
+  return 1
+}
 
 if [[ "$MODE" == "plan" ]]; then
   log "Planning mode — creating implementation plan..."
@@ -197,44 +467,75 @@ if [[ "$MODE" == "plan" ]]; then
 fi
 
 log "Starting Ralph Wiggum loop (max $MAX_ITERATIONS iterations)"
-log "Agent:               $AGENT"
-log "Spec:                $SPEC_FILE"
-log "Plan:                $PLAN_FILE"
-log "Poll interval:       ${SESSION_POLL_INTERVAL}s (session limit recovery)"
+log "Runtime model: script-orchestrated"
+log "Agent: $AGENT"
+log "Spec: $SPEC_FILE"
+log "Plan: $PLAN_FILE"
+if [[ -n "$BOARD_FILE" ]]; then
+  log "Board: $BOARD_FILE"
+fi
+if [[ -n "$SESSION_ENDS" ]]; then
+  log "Tier 3 (session seed): $SESSION_ENDS"
+fi
+if [[ "$AGENT" == "claude" ]]; then
+  log_agent_runtime
+  if [[ "$REQUIRE_PRO" == "1" ]]; then
+    log "Pro guard: enabled"
+  else
+    warn "Pro guard: disabled (--no-require-pro)"
+  fi
+fi
 echo ""
 
-i=1
-while [[ $i -le $MAX_ITERATIONS ]]; do
+for i in $(seq 1 "$MAX_ITERATIONS"); do
   run_agent "$i" build
   logfile="$LOG_DIR/iteration-${i}.log"
 
-  # Capture return value without triggering set -e
-  check_output "$logfile" || status=$?
-  status=${status:-0}
+  check_output "$logfile"
+  status=$?
 
   case $status in
     0)
-      success "ALL TASKS COMPLETE after $i iterations!"
-      exit 0
+      if has_remaining_work; then
+        warn "Agent reported DONE, but the tracking artifacts still show work remaining."
+        warn "Ignoring false DONE and restarting with fresh context."
+        echo ""
+        sleep 2
+      else
+        success "All tracked work appears complete after $i iterations."
+        exit 0
+      fi
       ;;
     2)
-      warn "Agent is stuck on iteration $i. Review $logfile and intervene."
+      warn "Agent is stuck. Review $logfile and intervene."
       exit 1
       ;;
     3)
-      error "Agent encountered an error on iteration $i. Review $logfile."
+      error "Agent encountered an error. Review $logfile."
       exit 1
       ;;
     4)
-      # Rate limited — wait for reset, then retry the SAME iteration
-      wait_for_session_reset "$i"
-      # Do NOT increment i — retry the same task
+      warn "Token/rate limit hit on iteration $i."
+      wait_for_tokens "$logfile"
+      echo ""
       ;;
     1)
-      log "Iteration $i complete. Restarting with fresh context..."
-      echo ""
-      sleep 2
-      ((i++))
+      if [[ $AGENT_EXIT_CODE -ne 0 ]]; then
+        warn "Agent exited with code $AGENT_EXIT_CODE but did not emit a recognized promise signal."
+        if has_remaining_work; then
+          warn "Tracked work remains. Restarting fresh."
+          echo ""
+          sleep 2
+        else
+          error "No work remains in tracking artifacts, but agent did not finish cleanly."
+          error "Review $logfile."
+          exit 1
+        fi
+      else
+        log "Iteration $i complete. Restarting with fresh context..."
+        echo ""
+        sleep 2
+      fi
       ;;
   esac
 done