diff --git a/ralph-loop.sh b/ralph-loop.sh
index 7eb9931..deb6289 100755
--- a/ralph-loop.sh
+++ b/ralph-loop.sh
@@ -1,54 +1,62 @@
#!/usr/bin/env bash
#
-# Ralph Wiggum Loop — Autonomous agent iteration
+# Ralph Wiggum Loop — Script-Orchestrated Autonomous Agent Iteration
#
-# Based on Geoffrey Huntley's approach:
-# - Each iteration spawns a FRESH agent with clean context
-# - Agent reads the plan, picks ONE task, implements, tests, commits, exits
-# - Loop restarts until all tasks are done
+# This runtime is for the "script is the orchestrator" model:
+# - The shell loop spawns a fresh agent every iteration
+# - The shell loop interprets runtime signals and failures
+# - The shell loop decides when to retry, stop, or wait for token reset
#
-# Session limit handling:
-# - Detects Claude Pro usage limit messages in agent output
-# - Polls every SESSION_POLL_INTERVAL seconds until the session resets
-# - Resumes the same iteration automatically — no manual intervention needed
+# This is different from the "agent is the orchestrator" model used in
+# OpenClaw/manual orchestration, where a supervising agent evaluates results,
+# watches execution boards, and decides what to do next.
#
# Usage:
-# ./ralph-loop.sh # Build mode (default)
-# ./ralph-loop.sh plan # Planning mode (create IMPLEMENTATION_PLAN.md)
-# ./ralph-loop.sh --max 20 # Limit to 20 iterations
-# ./ralph-loop.sh --agent claude # Use claude (default)
-# ./ralph-loop.sh --agent codex # Use OpenAI Codex CLI
-# ./ralph-loop.sh --agent aider # Use Aider
-# ./ralph-loop.sh --agent gemini # Use Gemini CLI
-# ./ralph-loop.sh --agent custom # Use custom agent (see below)
+# ./ralph-loop.sh # Build mode (default)
+# ./ralph-loop.sh plan # Planning mode
+# ./ralph-loop.sh --max 20 # Limit iterations
+# ./ralph-loop.sh --agent claude # Use claude (default)
+# ./ralph-loop.sh --session-ends 2026-04-09T16:00:00
+# ./ralph-loop.sh --retry-wait 1800
+# ./ralph-loop.sh --board .harness/foo/execution-board.md
+# ./ralph-loop.sh --no-require-pro
+#
+# Token / rate-limit handling:
+# Tier 1 — Anthropic API probe if ANTHROPIC_API_KEY is available
+# Tier 2 — Parse "resets 11am (America/New_York)" from agent output
+# Tier 3 — Use seeded --session-ends time
+# Tier 4 — Fixed fallback sleep
#
set -euo pipefail
-MODE="${1:-build}"
+MODE="build"
MAX_ITERATIONS=50
AGENT="claude"
PLAN_FILE="IMPLEMENTATION_PLAN.md"
SPEC_FILE="PROJECT-SPEC.md"
AGENT_FILE="AGENT.md"
+BOARD_FILE=""
LOG_DIR=".ralph-logs"
+RATE_LIMIT_WAIT=1800
+SESSION_ENDS=""
+REQUIRE_PRO=1
-# How often (in seconds) to probe whether the session has reset.
-# Default: 10 minutes. Adjust down if you want faster recovery.
-SESSION_POLL_INTERVAL="${SESSION_POLL_INTERVAL:-600}"
-
-# Parse arguments
-shift 2>/dev/null || true
while [[ $# -gt 0 ]]; do
case "$1" in
- --max) MAX_ITERATIONS="$2"; shift 2 ;;
- --agent) AGENT="$2"; shift 2 ;;
+ plan) MODE="plan"; shift ;;
+ build) MODE="build"; shift ;;
+ --max) MAX_ITERATIONS="$2"; shift 2 ;;
+ --agent) AGENT="$2"; shift 2 ;;
+ --retry-wait) RATE_LIMIT_WAIT="$2"; shift 2 ;;
+ --session-ends) SESSION_ENDS="$2"; shift 2 ;;
+ --board) BOARD_FILE="$2"; shift 2 ;;
+ --no-require-pro) REQUIRE_PRO=0; shift ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
mkdir -p "$LOG_DIR"
-# Colors
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
@@ -62,16 +70,282 @@ warn() { echo -e "${YELLOW}[ralph]${NC} $1"; }
error() { echo -e "${RED}[ralph]${NC} $1"; }
info() { echo -e "${CYAN}[ralph]${NC} $1"; }
-# Check prerequisites
+AGENT_EXIT_CODE=0
+
+get_claude_analysis_auth_json() {
+ env -u ANTHROPIC_API_KEY bash -ic 'claude auth status' 2>/dev/null | tail -n +1
+}
+
+verify_claude_pro_auth() {
+ local auth_json
+ auth_json=$(get_claude_analysis_auth_json)
+ if [[ -z "$auth_json" ]]; then
+ error "Could not determine Claude analysis auth status."
+ return 1
+ fi
+
+ AUTH_JSON="$auth_json" python3 - <<'PY'
+import json
+import os
+import sys
+
+data = json.loads(os.environ["AUTH_JSON"])
+if data.get("loggedIn") and data.get("subscriptionType") == "pro":
+ print("ok")
+ sys.exit(0)
+
+print(json.dumps(data, ensure_ascii=True))
+sys.exit(1)
+PY
+}
+
+log_agent_runtime() {
+ case "$AGENT" in
+ claude)
+ local claude_path claude_version auth_json
+ claude_path=$(bash -ic 'command -v claude' 2>/dev/null | tail -n 1 || true)
+ claude_version=$(bash -ic 'claude --version' 2>/dev/null | tail -n 1 || true)
+ auth_json=$(get_claude_analysis_auth_json)
+ log "Claude binary: ${claude_path:-not found}"
+ log "Claude version: ${claude_version:-unknown}"
+ if [[ -n "${ANTHROPIC_API_KEY:-}" ]]; then
+ log "Claude auth hint: ANTHROPIC_API_KEY is set (API probe enabled)"
+ else
+ log "Claude auth hint: ANTHROPIC_API_KEY is not set"
+ fi
+ if [[ -n "$auth_json" ]]; then
+ log "Claude analysis auth: $(AUTH_JSON="$auth_json" python3 - <<'PY'
+import json
+import os
+
+data = json.loads(os.environ["AUTH_JSON"])
+print(f"authMethod={data.get('authMethod')} subscriptionType={data.get('subscriptionType')} apiKeySource={data.get('apiKeySource')}")
+PY
+)"
+ fi
+ ;;
+ esac
+}
+
if [[ ! -f "$SPEC_FILE" ]]; then
error "Missing $SPEC_FILE — create your project spec first."
exit 1
fi
-
if [[ ! -f "$AGENT_FILE" ]]; then
warn "No $AGENT_FILE found. Using default agent instructions."
fi
+probe_rate_limit() {
+ if [[ -z "${ANTHROPIC_API_KEY:-}" ]]; then
+ return 1
+ fi
+
+ local headers
+ headers=$(curl -s -D - -o /dev/null \
+ --max-time 10 \
+ -X POST "https://api.anthropic.com/v1/messages" \
+ -H "x-api-key: $ANTHROPIC_API_KEY" \
+ -H "anthropic-version: 2023-06-01" \
+ -H "content-type: application/json" \
+ -d '{"model":"claude-haiku-4-5-20251001","max_tokens":1,"messages":[{"role":"user","content":"hi"}]}' \
+ 2>/dev/null) || return 1
+
+ local reset_str remaining
+ reset_str=$(echo "$headers" | grep -i "anthropic-ratelimit-output-tokens-reset:" | awk '{print $2}' | tr -d '\r\n')
+ remaining=$(echo "$headers" | grep -i "anthropic-ratelimit-output-tokens-remaining:" | awk '{print $2}' | tr -d '\r\n')
+
+ if [[ -z "$reset_str" ]]; then
+ return 1
+ fi
+
+ local reset_epoch
+ reset_epoch=$(date -d "$reset_str" +%s 2>/dev/null) \
+ || reset_epoch=$(python3 -c "
+from datetime import datetime, timezone
+import sys
+s = sys.argv[1].strip()
+for fmt in ('%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S+00:00', '%Y-%m-%dT%H:%M:%S%z'):
+ try:
+ dt = datetime.strptime(s, fmt)
+ if dt.tzinfo is None:
+ dt = dt.replace(tzinfo=timezone.utc)
+ print(int(dt.timestamp()))
+ break
+ except Exception:
+ pass
+" "$reset_str" 2>/dev/null) || return 1
+
+ echo "${reset_epoch}|${remaining:-unknown}"
+}
+
+parse_epoch() {
+ local ts="$1"
+ date -d "$ts" +%s 2>/dev/null \
+ || python3 -c "
+from datetime import datetime, timezone
+import sys
+s = sys.argv[1]
+for fmt in ('%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%d %H:%M:%S',
+ '%Y-%m-%dT%H:%M:%S%z', '%Y-%m-%dT%H:%M:%S+00:00'):
+ try:
+ dt = datetime.strptime(s, fmt)
+ if dt.tzinfo is None:
+ dt = dt.replace(tzinfo=timezone.utc)
+ print(int(dt.timestamp()))
+ break
+ except Exception:
+ pass
+" "$ts" 2>/dev/null || true
+}
+
+format_session_end() {
+ local epoch="$1"
+ date -d "@$epoch" +"%Y-%m-%dT%H:%M:%S" 2>/dev/null \
+ || date -r "$epoch" +"%Y-%m-%dT%H:%M:%S" 2>/dev/null \
+ || echo ""
+}
+
+infer_reset_epoch_from_log() {
+ local logfile="$1"
+
+ python3 - "$logfile" <<'PY' 2>/dev/null || true
+from datetime import datetime, timedelta
+from pathlib import Path
+import re
+import sys
+
+try:
+ from zoneinfo import ZoneInfo
+except Exception:
+ ZoneInfo = None
+
+logfile = Path(sys.argv[1])
+if not logfile.exists():
+ raise SystemExit(0)
+
+text = logfile.read_text(encoding="utf-8", errors="ignore")
+matches = list(re.finditer(r"resets\s+(\d{1,2})(?::(\d{2}))?\s*(am|pm)\s*\(([^)]+)\)", text, re.IGNORECASE))
+if not matches:
+ raise SystemExit(0)
+
+match = matches[-1]
+hour = int(match.group(1))
+minute = int(match.group(2) or "0")
+ampm = match.group(3).lower()
+tz_name = match.group(4).strip()
+
+if hour == 12:
+ hour = 0
+if ampm == "pm":
+ hour += 12
+
+if ZoneInfo is None:
+ raise SystemExit(0)
+
+tz = ZoneInfo(tz_name)
+now = datetime.now(tz)
+candidate = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
+if candidate <= now:
+ candidate += timedelta(days=1)
+
+print(int(candidate.timestamp()))
+PY
+}
+
+countdown_sleep() {
+ local target_epoch=$1
+ local label="${2:-token reset}"
+ local now
+ while true; do
+ now=$(date +%s)
+ local remaining=$(( target_epoch - now ))
+ if [[ $remaining -le 0 ]]; then
+ break
+ fi
+ local h=$(( remaining / 3600 ))
+ local m=$(( (remaining % 3600) / 60 ))
+ local s=$(( remaining % 60 ))
+ printf "\r${YELLOW}[ralph]${NC} Waiting for %s... %02dh%02dm%02ds remaining " "$label" "$h" "$m" "$s"
+ sleep 5
+ done
+ echo ""
+}
+
+wait_for_tokens() {
+ local logfile="${1:-}"
+ warn "Rate limit / token exhaustion detected."
+ echo ""
+
+ local wake_epoch="" wake_source=""
+
+ info "Tier 1 — probing Anthropic API for exact reset time..."
+ local probe_result
+ if probe_result=$(probe_rate_limit); then
+ local probe_epoch probe_remaining
+ probe_epoch="${probe_result%%|*}"
+ probe_remaining="${probe_result##*|}"
+ local now
+ now=$(date +%s)
+ if [[ -n "$probe_epoch" && "$probe_epoch" -gt "$now" ]]; then
+ wake_epoch=$probe_epoch
+ wake_source="API probe"
+ info "Tokens remaining: ${probe_remaining}. Reset at: $(date -d "@$probe_epoch" 2>/dev/null || date -r "$probe_epoch" 2>/dev/null || echo "$probe_epoch")"
+ else
+ info "Probe succeeded but reset time is already past — tokens may have reset. Retrying immediately."
+ return 0
+ fi
+ else
+ warn "Tier 1 unavailable (no ANTHROPIC_API_KEY or probe failed)."
+ fi
+
+ if [[ -z "$wake_epoch" && -n "$logfile" ]]; then
+ info "Tier 2 — parsing reset time from agent output..."
+ local log_epoch
+ log_epoch=$(infer_reset_epoch_from_log "$logfile") || true
+ if [[ -n "$log_epoch" ]]; then
+ wake_epoch=$(( log_epoch + 60 ))
+ wake_source="agent output"
+ SESSION_ENDS=$(format_session_end "$log_epoch")
+ info "Detected reset at: $(date -d "@$log_epoch" 2>/dev/null || date -r "$log_epoch" 2>/dev/null || echo "$log_epoch")"
+ if [[ -n "$SESSION_ENDS" ]]; then
+ info "Updated --session-ends seed to $SESSION_ENDS"
+ fi
+ else
+ warn "Could not extract a reset time from $logfile."
+ fi
+ fi
+
+ if [[ -z "$wake_epoch" && -n "$SESSION_ENDS" ]]; then
+ info "Tier 3 — using --session-ends $SESSION_ENDS..."
+ local seed_epoch
+ seed_epoch=$(parse_epoch "$SESSION_ENDS") || true
+ if [[ -n "$seed_epoch" ]]; then
+ local now
+ now=$(date +%s)
+ if [[ "$seed_epoch" -gt "$now" ]]; then
+ wake_epoch=$(( seed_epoch + 60 ))
+ wake_source="session seed (--session-ends)"
+ info "Will wake at: $(date -d "@$wake_epoch" 2>/dev/null || date -r "$wake_epoch" 2>/dev/null || echo "$wake_epoch") (+60s buffer)"
+ else
+ warn "--session-ends is stale (already past). Ignoring it for this retry."
+ fi
+ else
+ warn "Could not parse --session-ends value: '$SESSION_ENDS'"
+ fi
+ fi
+
+ if [[ -z "$wake_epoch" ]]; then
+ warn "Tier 4 — no reset time available. Sleeping ${RATE_LIMIT_WAIT}s ($(( RATE_LIMIT_WAIT / 60 )) min)."
+ warn "Tip: set ANTHROPIC_API_KEY or pass --session-ends for a smarter wake-up."
+ wake_epoch=$(( $(date +%s) + RATE_LIMIT_WAIT ))
+ wake_source="fixed wait"
+ fi
+
+ info "Strategy: $wake_source. Press Ctrl+C to cancel."
+ countdown_sleep "$wake_epoch" "token reset"
+ log "Wake-up time reached. Retrying..."
+}
+
run_agent() {
local iteration=$1
local mode=$2
@@ -86,12 +360,23 @@ run_agent() {
log "Iteration $iteration ($mode mode) — starting fresh agent..."
- # Disable pipefail around the agent call so a non-zero claude exit doesn't
- # kill the script. We inspect the log content instead.
+ if [[ "$AGENT" == "claude" && "$REQUIRE_PRO" == "1" ]]; then
+ if ! verify_claude_pro_auth >/tmp/ralph-auth-check.out 2>/tmp/ralph-auth-check.err; then
+ error "Claude analysis auth is not using Pro. Refusing to run."
+ if [[ -s /tmp/ralph-auth-check.out ]]; then
+ error "Auth details: $(tail -n 1 /tmp/ralph-auth-check.out)"
+ fi
+ if [[ -s /tmp/ralph-auth-check.err ]]; then
+ error "Auth check stderr: $(tail -n 1 /tmp/ralph-auth-check.err)"
+ fi
+ exit 1
+ fi
+ fi
+
set +e
case "$AGENT" in
claude)
- echo "$prompt" | claude -p --output-format text 2>&1 | tee "$logfile"
+ echo "$prompt" | env -u ANTHROPIC_API_KEY claude -p --dangerously-skip-permissions --output-format text 2>&1 | tee "$logfile"
;;
codex)
echo "$prompt" | codex 2>&1 | tee "$logfile"
@@ -107,87 +392,72 @@ run_agent() {
./custom-agent.sh "$prompt" 2>&1 | tee "$logfile"
else
error "Custom agent selected but ./custom-agent.sh not found or not executable"
+ set -e
exit 1
fi
;;
*)
- error "Unknown agent: $AGENT"
- error "Supported agents: claude, codex, aider, gemini, custom"
+ error "Unknown agent: $AGENT. Supported: claude, codex, aider, gemini, custom"
+ set -e
exit 1
;;
esac
+ AGENT_EXIT_CODE=$?
set -e
-
- return 0
-}
-
-# Probe whether claude is available by sending a trivial request.
-# Returns 0 if available, 1 if still rate-limited or erroring.
-probe_session() {
- local probe_log="$LOG_DIR/probe.log"
- set +e
- echo "Reply with the single word OK and nothing else." \
- | claude -p --output-format text > "$probe_log" 2>&1
- local rc=$?
- set -e
-
- if [[ $rc -ne 0 ]]; then
- return 1
- fi
- # Also check the output doesn't contain a limit message
- if grep -qi 'usage limit\|rate limit\|limit reached\|exceeded.*limit' "$probe_log" 2>/dev/null; then
- return 1
- fi
return 0
}
check_output() {
local logfile="$1"
- # Session / usage limit — must check BEFORE generic promise checks
- if grep -qi 'usage limit\|rate limit\|limit reached\|exceeded.*limit\|Claude AI usage' "$logfile" 2>/dev/null; then
- return 4 # Rate limited
- fi
-
if grep -q 'DONE' "$logfile" 2>/dev/null; then
- return 0 # Done
+ return 0
elif grep -q 'STUCK' "$logfile" 2>/dev/null; then
- return 2 # Stuck — needs human intervention
+ return 2
elif grep -q 'ERROR' "$logfile" 2>/dev/null; then
- return 3 # Unrecoverable error
+ return 3
+ elif grep -Eqi "rate.limit|rate_limit|too many requests|exceeded.*quota|usage limit|out of tokens|overloaded|you'?ve hit your limit|resets [0-9]{1,2}(:[0-9]{2})?(am|pm)" "$logfile" 2>/dev/null; then
+ return 4
else
- return 1 # Normal iteration — continue
+ return 1
fi
}
-wait_for_session_reset() {
- local iteration=$1
- warn "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
- warn "Session usage limit hit during iteration $iteration."
- warn "Will probe every ${SESSION_POLL_INTERVAL}s until session resets."
- warn "No manual action needed — loop will resume automatically."
- warn "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+plan_has_remaining_work() {
+ if [[ ! -f "$PLAN_FILE" ]]; then
+ return 1
+ fi
- local attempt=0
- while true; do
- ((attempt++))
- local next_check
- next_check=$(date -d "+${SESSION_POLL_INTERVAL} seconds" '+%H:%M:%S' 2>/dev/null \
- || date -v "+${SESSION_POLL_INTERVAL}S" '+%H:%M:%S' 2>/dev/null \
- || echo "soon")
- info "Probe attempt $attempt — next check at $next_check..."
- sleep "$SESSION_POLL_INTERVAL"
+ if grep -Eq '^- \[ \]' "$PLAN_FILE" 2>/dev/null; then
+ return 0
+ fi
- if probe_session; then
- success "Session available! Resuming iteration $iteration..."
- return 0
- else
- warn "Still rate-limited (attempt $attempt). Waiting another ${SESSION_POLL_INTERVAL}s..."
- fi
- done
+ return 1
}
-# ─── Main ────────────────────────────────────────────────────────────────────
+board_has_remaining_work() {
+ if [[ -z "$BOARD_FILE" || ! -f "$BOARD_FILE" ]]; then
+ return 1
+ fi
+
+ if grep -Eq '\| .*⬜ Pending .* \||\| .*🔄 In Progress .* \|' "$BOARD_FILE" 2>/dev/null; then
+ return 0
+ fi
+
+ return 1
+}
+
+has_remaining_work() {
+ if board_has_remaining_work; then
+ return 0
+ fi
+
+ if plan_has_remaining_work; then
+ return 0
+ fi
+
+ return 1
+}
if [[ "$MODE" == "plan" ]]; then
log "Planning mode — creating implementation plan..."
@@ -197,44 +467,75 @@ if [[ "$MODE" == "plan" ]]; then
fi
log "Starting Ralph Wiggum loop (max $MAX_ITERATIONS iterations)"
-log "Agent: $AGENT"
-log "Spec: $SPEC_FILE"
-log "Plan: $PLAN_FILE"
-log "Poll interval: ${SESSION_POLL_INTERVAL}s (session limit recovery)"
+log "Runtime model: script-orchestrated"
+log "Agent: $AGENT"
+log "Spec: $SPEC_FILE"
+log "Plan: $PLAN_FILE"
+if [[ -n "$BOARD_FILE" ]]; then
+ log "Board: $BOARD_FILE"
+fi
+if [[ -n "$SESSION_ENDS" ]]; then
+ log "Tier 3 (session seed): $SESSION_ENDS"
+fi
+if [[ "$AGENT" == "claude" ]]; then
+ log_agent_runtime
+ if [[ "$REQUIRE_PRO" == "1" ]]; then
+ log "Pro guard: enabled"
+ else
+ warn "Pro guard: disabled (--no-require-pro)"
+ fi
+fi
echo ""
-i=1
-while [[ $i -le $MAX_ITERATIONS ]]; do
+for i in $(seq 1 "$MAX_ITERATIONS"); do
run_agent "$i" build
logfile="$LOG_DIR/iteration-${i}.log"
- # Capture return value without triggering set -e
- check_output "$logfile" || status=$?
- status=${status:-0}
+ check_output "$logfile"
+ status=$?
case $status in
0)
- success "ALL TASKS COMPLETE after $i iterations!"
- exit 0
+ if has_remaining_work; then
+ warn "Agent reported DONE, but the tracking artifacts still show work remaining."
+ warn "Ignoring false DONE and restarting with fresh context."
+ echo ""
+ sleep 2
+ else
+ success "All tracked work appears complete after $i iterations."
+ exit 0
+ fi
;;
2)
- warn "Agent is stuck on iteration $i. Review $logfile and intervene."
+ warn "Agent is stuck. Review $logfile and intervene."
exit 1
;;
3)
- error "Agent encountered an error on iteration $i. Review $logfile."
+ error "Agent encountered an error. Review $logfile."
exit 1
;;
4)
- # Rate limited — wait for reset, then retry the SAME iteration
- wait_for_session_reset "$i"
- # Do NOT increment i — retry the same task
+ warn "Token/rate limit hit on iteration $i."
+ wait_for_tokens "$logfile"
+ echo ""
;;
1)
- log "Iteration $i complete. Restarting with fresh context..."
- echo ""
- sleep 2
- ((i++))
+ if [[ $AGENT_EXIT_CODE -ne 0 ]]; then
+ warn "Agent exited with code $AGENT_EXIT_CODE but did not emit a recognized promise signal."
+ if has_remaining_work; then
+ warn "Tracked work remains. Restarting fresh."
+ echo ""
+ sleep 2
+ else
+ error "No work remains in tracking artifacts, but agent did not finish cleanly."
+ error "Review $logfile."
+ exit 1
+ fi
+ else
+ log "Iteration $i complete. Restarting with fresh context..."
+ echo ""
+ sleep 2
+ fi
;;
esac
done