fix(ralph-loop): replace with three-tier session reset detection from master
Supersedes the simple polling version written this session. The master
harness version (from the other machine) has:
Tier 1 — Anthropic API probe via ANTHROPIC_API_KEY if available
Tier 2 — Parse reset time from agent output ("resets 11am America/New_York")
Tier 3 — Seeded --session-ends timestamp argument
Tier 4 — Fixed fallback sleep (--retry-wait, default 1800s)
Agent: human
Tests: N/A
Tests-Added: 0
TypeScript: N/A
This commit is contained in:
parent
82e10ff810
commit
66be5d83ff
505
ralph-loop.sh
505
ralph-loop.sh
|
|
@ -1,54 +1,62 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
#
|
#
|
||||||
# Ralph Wiggum Loop — Autonomous agent iteration
|
# Ralph Wiggum Loop — Script-Orchestrated Autonomous Agent Iteration
|
||||||
#
|
#
|
||||||
# Based on Geoffrey Huntley's approach:
|
# This runtime is for the "script is the orchestrator" model:
|
||||||
# - Each iteration spawns a FRESH agent with clean context
|
# - The shell loop spawns a fresh agent every iteration
|
||||||
# - Agent reads the plan, picks ONE task, implements, tests, commits, exits
|
# - The shell loop interprets runtime signals and failures
|
||||||
# - Loop restarts until all tasks are done
|
# - The shell loop decides when to retry, stop, or wait for token reset
|
||||||
#
|
#
|
||||||
# Session limit handling:
|
# This is different from the "agent is the orchestrator" model used in
|
||||||
# - Detects Claude Pro usage limit messages in agent output
|
# OpenClaw/manual orchestration, where a supervising agent evaluates results,
|
||||||
# - Polls every SESSION_POLL_INTERVAL seconds until the session resets
|
# watches execution boards, and decides what to do next.
|
||||||
# - Resumes the same iteration automatically — no manual intervention needed
|
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# ./ralph-loop.sh # Build mode (default)
|
# ./ralph-loop.sh # Build mode (default)
|
||||||
# ./ralph-loop.sh plan # Planning mode (create IMPLEMENTATION_PLAN.md)
|
# ./ralph-loop.sh plan # Planning mode
|
||||||
# ./ralph-loop.sh --max 20 # Limit to 20 iterations
|
# ./ralph-loop.sh --max 20 # Limit iterations
|
||||||
# ./ralph-loop.sh --agent claude # Use claude (default)
|
# ./ralph-loop.sh --agent claude # Use claude (default)
|
||||||
# ./ralph-loop.sh --agent codex # Use OpenAI Codex CLI
|
# ./ralph-loop.sh --session-ends 2026-04-09T16:00:00
|
||||||
# ./ralph-loop.sh --agent aider # Use Aider
|
# ./ralph-loop.sh --retry-wait 1800
|
||||||
# ./ralph-loop.sh --agent gemini # Use Gemini CLI
|
# ./ralph-loop.sh --board .harness/foo/execution-board.md
|
||||||
# ./ralph-loop.sh --agent custom # Use custom agent (see below)
|
# ./ralph-loop.sh --no-require-pro
|
||||||
|
#
|
||||||
|
# Token / rate-limit handling:
|
||||||
|
# Tier 1 — Anthropic API probe if ANTHROPIC_API_KEY is available
|
||||||
|
# Tier 2 — Parse "resets 11am (America/New_York)" from agent output
|
||||||
|
# Tier 3 — Use seeded --session-ends time
|
||||||
|
# Tier 4 — Fixed fallback sleep
|
||||||
#
|
#
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
MODE="${1:-build}"
|
MODE="build"
|
||||||
MAX_ITERATIONS=50
|
MAX_ITERATIONS=50
|
||||||
AGENT="claude"
|
AGENT="claude"
|
||||||
PLAN_FILE="IMPLEMENTATION_PLAN.md"
|
PLAN_FILE="IMPLEMENTATION_PLAN.md"
|
||||||
SPEC_FILE="PROJECT-SPEC.md"
|
SPEC_FILE="PROJECT-SPEC.md"
|
||||||
AGENT_FILE="AGENT.md"
|
AGENT_FILE="AGENT.md"
|
||||||
|
BOARD_FILE=""
|
||||||
LOG_DIR=".ralph-logs"
|
LOG_DIR=".ralph-logs"
|
||||||
|
RATE_LIMIT_WAIT=1800
|
||||||
|
SESSION_ENDS=""
|
||||||
|
REQUIRE_PRO=1
|
||||||
|
|
||||||
# How often (in seconds) to probe whether the session has reset.
|
|
||||||
# Default: 10 minutes. Adjust down if you want faster recovery.
|
|
||||||
SESSION_POLL_INTERVAL="${SESSION_POLL_INTERVAL:-600}"
|
|
||||||
|
|
||||||
# Parse arguments
|
|
||||||
shift 2>/dev/null || true
|
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
|
plan) MODE="plan"; shift ;;
|
||||||
|
build) MODE="build"; shift ;;
|
||||||
--max) MAX_ITERATIONS="$2"; shift 2 ;;
|
--max) MAX_ITERATIONS="$2"; shift 2 ;;
|
||||||
--agent) AGENT="$2"; shift 2 ;;
|
--agent) AGENT="$2"; shift 2 ;;
|
||||||
|
--retry-wait) RATE_LIMIT_WAIT="$2"; shift 2 ;;
|
||||||
|
--session-ends) SESSION_ENDS="$2"; shift 2 ;;
|
||||||
|
--board) BOARD_FILE="$2"; shift 2 ;;
|
||||||
|
--no-require-pro) REQUIRE_PRO=0; shift ;;
|
||||||
*) echo "Unknown option: $1"; exit 1 ;;
|
*) echo "Unknown option: $1"; exit 1 ;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
mkdir -p "$LOG_DIR"
|
mkdir -p "$LOG_DIR"
|
||||||
|
|
||||||
# Colors
|
|
||||||
GREEN='\033[0;32m'
|
GREEN='\033[0;32m'
|
||||||
YELLOW='\033[1;33m'
|
YELLOW='\033[1;33m'
|
||||||
RED='\033[0;31m'
|
RED='\033[0;31m'
|
||||||
|
|
@ -62,16 +70,282 @@ warn() { echo -e "${YELLOW}[ralph]${NC} $1"; }
|
||||||
error() { echo -e "${RED}[ralph]${NC} $1"; }
|
error() { echo -e "${RED}[ralph]${NC} $1"; }
|
||||||
info() { echo -e "${CYAN}[ralph]${NC} $1"; }
|
info() { echo -e "${CYAN}[ralph]${NC} $1"; }
|
||||||
|
|
||||||
# Check prerequisites
|
AGENT_EXIT_CODE=0
|
||||||
|
|
||||||
|
get_claude_analysis_auth_json() {
|
||||||
|
env -u ANTHROPIC_API_KEY bash -ic 'claude auth status' 2>/dev/null | tail -n +1
|
||||||
|
}
|
||||||
|
|
||||||
|
verify_claude_pro_auth() {
|
||||||
|
local auth_json
|
||||||
|
auth_json=$(get_claude_analysis_auth_json)
|
||||||
|
if [[ -z "$auth_json" ]]; then
|
||||||
|
error "Could not determine Claude analysis auth status."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
AUTH_JSON="$auth_json" python3 - <<'PY'
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
data = json.loads(os.environ["AUTH_JSON"])
|
||||||
|
if data.get("loggedIn") and data.get("subscriptionType") == "pro":
|
||||||
|
print("ok")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
print(json.dumps(data, ensure_ascii=True))
|
||||||
|
sys.exit(1)
|
||||||
|
PY
|
||||||
|
}
|
||||||
|
|
||||||
|
log_agent_runtime() {
|
||||||
|
case "$AGENT" in
|
||||||
|
claude)
|
||||||
|
local claude_path claude_version auth_json
|
||||||
|
claude_path=$(bash -ic 'command -v claude' 2>/dev/null | tail -n 1 || true)
|
||||||
|
claude_version=$(bash -ic 'claude --version' 2>/dev/null | tail -n 1 || true)
|
||||||
|
auth_json=$(get_claude_analysis_auth_json)
|
||||||
|
log "Claude binary: ${claude_path:-not found}"
|
||||||
|
log "Claude version: ${claude_version:-unknown}"
|
||||||
|
if [[ -n "${ANTHROPIC_API_KEY:-}" ]]; then
|
||||||
|
log "Claude auth hint: ANTHROPIC_API_KEY is set (API probe enabled)"
|
||||||
|
else
|
||||||
|
log "Claude auth hint: ANTHROPIC_API_KEY is not set"
|
||||||
|
fi
|
||||||
|
if [[ -n "$auth_json" ]]; then
|
||||||
|
log "Claude analysis auth: $(AUTH_JSON="$auth_json" python3 - <<'PY'
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
data = json.loads(os.environ["AUTH_JSON"])
|
||||||
|
print(f"authMethod={data.get('authMethod')} subscriptionType={data.get('subscriptionType')} apiKeySource={data.get('apiKeySource')}")
|
||||||
|
PY
|
||||||
|
)"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
if [[ ! -f "$SPEC_FILE" ]]; then
|
if [[ ! -f "$SPEC_FILE" ]]; then
|
||||||
error "Missing $SPEC_FILE — create your project spec first."
|
error "Missing $SPEC_FILE — create your project spec first."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ ! -f "$AGENT_FILE" ]]; then
|
if [[ ! -f "$AGENT_FILE" ]]; then
|
||||||
warn "No $AGENT_FILE found. Using default agent instructions."
|
warn "No $AGENT_FILE found. Using default agent instructions."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
probe_rate_limit() {
|
||||||
|
if [[ -z "${ANTHROPIC_API_KEY:-}" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
local headers
|
||||||
|
headers=$(curl -s -D - -o /dev/null \
|
||||||
|
--max-time 10 \
|
||||||
|
-X POST "https://api.anthropic.com/v1/messages" \
|
||||||
|
-H "x-api-key: $ANTHROPIC_API_KEY" \
|
||||||
|
-H "anthropic-version: 2023-06-01" \
|
||||||
|
-H "content-type: application/json" \
|
||||||
|
-d '{"model":"claude-haiku-4-5-20251001","max_tokens":1,"messages":[{"role":"user","content":"hi"}]}' \
|
||||||
|
2>/dev/null) || return 1
|
||||||
|
|
||||||
|
local reset_str remaining
|
||||||
|
reset_str=$(echo "$headers" | grep -i "anthropic-ratelimit-output-tokens-reset:" | awk '{print $2}' | tr -d '\r\n')
|
||||||
|
remaining=$(echo "$headers" | grep -i "anthropic-ratelimit-output-tokens-remaining:" | awk '{print $2}' | tr -d '\r\n')
|
||||||
|
|
||||||
|
if [[ -z "$reset_str" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
local reset_epoch
|
||||||
|
reset_epoch=$(date -d "$reset_str" +%s 2>/dev/null) \
|
||||||
|
|| reset_epoch=$(python3 -c "
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
import sys
|
||||||
|
s = sys.argv[1].strip()
|
||||||
|
for fmt in ('%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S+00:00', '%Y-%m-%dT%H:%M:%S%z'):
|
||||||
|
try:
|
||||||
|
dt = datetime.strptime(s, fmt)
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
|
print(int(dt.timestamp()))
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
" "$reset_str" 2>/dev/null) || return 1
|
||||||
|
|
||||||
|
echo "${reset_epoch}|${remaining:-unknown}"
|
||||||
|
}
|
||||||
|
|
||||||
|
parse_epoch() {
|
||||||
|
local ts="$1"
|
||||||
|
date -d "$ts" +%s 2>/dev/null \
|
||||||
|
|| python3 -c "
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
import sys
|
||||||
|
s = sys.argv[1]
|
||||||
|
for fmt in ('%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%d %H:%M:%S',
|
||||||
|
'%Y-%m-%dT%H:%M:%S%z', '%Y-%m-%dT%H:%M:%S+00:00'):
|
||||||
|
try:
|
||||||
|
dt = datetime.strptime(s, fmt)
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
|
print(int(dt.timestamp()))
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
" "$ts" 2>/dev/null || true
|
||||||
|
}
|
||||||
|
|
||||||
|
format_session_end() {
|
||||||
|
local epoch="$1"
|
||||||
|
date -d "@$epoch" +"%Y-%m-%dT%H:%M:%S" 2>/dev/null \
|
||||||
|
|| date -r "$epoch" +"%Y-%m-%dT%H:%M:%S" 2>/dev/null \
|
||||||
|
|| echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
infer_reset_epoch_from_log() {
|
||||||
|
local logfile="$1"
|
||||||
|
|
||||||
|
python3 - "$logfile" <<'PY' 2>/dev/null || true
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
try:
|
||||||
|
from zoneinfo import ZoneInfo
|
||||||
|
except Exception:
|
||||||
|
ZoneInfo = None
|
||||||
|
|
||||||
|
logfile = Path(sys.argv[1])
|
||||||
|
if not logfile.exists():
|
||||||
|
raise SystemExit(0)
|
||||||
|
|
||||||
|
text = logfile.read_text(encoding="utf-8", errors="ignore")
|
||||||
|
matches = list(re.finditer(r"resets\s+(\d{1,2})(?::(\d{2}))?\s*(am|pm)\s*\(([^)]+)\)", text, re.IGNORECASE))
|
||||||
|
if not matches:
|
||||||
|
raise SystemExit(0)
|
||||||
|
|
||||||
|
match = matches[-1]
|
||||||
|
hour = int(match.group(1))
|
||||||
|
minute = int(match.group(2) or "0")
|
||||||
|
ampm = match.group(3).lower()
|
||||||
|
tz_name = match.group(4).strip()
|
||||||
|
|
||||||
|
if hour == 12:
|
||||||
|
hour = 0
|
||||||
|
if ampm == "pm":
|
||||||
|
hour += 12
|
||||||
|
|
||||||
|
if ZoneInfo is None:
|
||||||
|
raise SystemExit(0)
|
||||||
|
|
||||||
|
tz = ZoneInfo(tz_name)
|
||||||
|
now = datetime.now(tz)
|
||||||
|
candidate = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||||
|
if candidate <= now:
|
||||||
|
candidate += timedelta(days=1)
|
||||||
|
|
||||||
|
print(int(candidate.timestamp()))
|
||||||
|
PY
|
||||||
|
}
|
||||||
|
|
||||||
|
countdown_sleep() {
|
||||||
|
local target_epoch=$1
|
||||||
|
local label="${2:-token reset}"
|
||||||
|
local now
|
||||||
|
while true; do
|
||||||
|
now=$(date +%s)
|
||||||
|
local remaining=$(( target_epoch - now ))
|
||||||
|
if [[ $remaining -le 0 ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
local h=$(( remaining / 3600 ))
|
||||||
|
local m=$(( (remaining % 3600) / 60 ))
|
||||||
|
local s=$(( remaining % 60 ))
|
||||||
|
printf "\r${YELLOW}[ralph]${NC} Waiting for %s... %02dh%02dm%02ds remaining " "$label" "$h" "$m" "$s"
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_tokens() {
|
||||||
|
local logfile="${1:-}"
|
||||||
|
warn "Rate limit / token exhaustion detected."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
local wake_epoch="" wake_source=""
|
||||||
|
|
||||||
|
info "Tier 1 — probing Anthropic API for exact reset time..."
|
||||||
|
local probe_result
|
||||||
|
if probe_result=$(probe_rate_limit); then
|
||||||
|
local probe_epoch probe_remaining
|
||||||
|
probe_epoch="${probe_result%%|*}"
|
||||||
|
probe_remaining="${probe_result##*|}"
|
||||||
|
local now
|
||||||
|
now=$(date +%s)
|
||||||
|
if [[ -n "$probe_epoch" && "$probe_epoch" -gt "$now" ]]; then
|
||||||
|
wake_epoch=$probe_epoch
|
||||||
|
wake_source="API probe"
|
||||||
|
info "Tokens remaining: ${probe_remaining}. Reset at: $(date -d "@$probe_epoch" 2>/dev/null || date -r "$probe_epoch" 2>/dev/null || echo "$probe_epoch")"
|
||||||
|
else
|
||||||
|
info "Probe succeeded but reset time is already past — tokens may have reset. Retrying immediately."
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
warn "Tier 1 unavailable (no ANTHROPIC_API_KEY or probe failed)."
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -z "$wake_epoch" && -n "$logfile" ]]; then
|
||||||
|
info "Tier 2 — parsing reset time from agent output..."
|
||||||
|
local log_epoch
|
||||||
|
log_epoch=$(infer_reset_epoch_from_log "$logfile") || true
|
||||||
|
if [[ -n "$log_epoch" ]]; then
|
||||||
|
wake_epoch=$(( log_epoch + 60 ))
|
||||||
|
wake_source="agent output"
|
||||||
|
SESSION_ENDS=$(format_session_end "$log_epoch")
|
||||||
|
info "Detected reset at: $(date -d "@$log_epoch" 2>/dev/null || date -r "$log_epoch" 2>/dev/null || echo "$log_epoch")"
|
||||||
|
if [[ -n "$SESSION_ENDS" ]]; then
|
||||||
|
info "Updated --session-ends seed to $SESSION_ENDS"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
warn "Could not extract a reset time from $logfile."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -z "$wake_epoch" && -n "$SESSION_ENDS" ]]; then
|
||||||
|
info "Tier 3 — using --session-ends $SESSION_ENDS..."
|
||||||
|
local seed_epoch
|
||||||
|
seed_epoch=$(parse_epoch "$SESSION_ENDS") || true
|
||||||
|
if [[ -n "$seed_epoch" ]]; then
|
||||||
|
local now
|
||||||
|
now=$(date +%s)
|
||||||
|
if [[ "$seed_epoch" -gt "$now" ]]; then
|
||||||
|
wake_epoch=$(( seed_epoch + 60 ))
|
||||||
|
wake_source="session seed (--session-ends)"
|
||||||
|
info "Will wake at: $(date -d "@$wake_epoch" 2>/dev/null || date -r "$wake_epoch" 2>/dev/null || echo "$wake_epoch") (+60s buffer)"
|
||||||
|
else
|
||||||
|
warn "--session-ends is stale (already past). Ignoring it for this retry."
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
warn "Could not parse --session-ends value: '$SESSION_ENDS'"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -z "$wake_epoch" ]]; then
|
||||||
|
warn "Tier 4 — no reset time available. Sleeping ${RATE_LIMIT_WAIT}s ($(( RATE_LIMIT_WAIT / 60 )) min)."
|
||||||
|
warn "Tip: set ANTHROPIC_API_KEY or pass --session-ends for a smarter wake-up."
|
||||||
|
wake_epoch=$(( $(date +%s) + RATE_LIMIT_WAIT ))
|
||||||
|
wake_source="fixed wait"
|
||||||
|
fi
|
||||||
|
|
||||||
|
info "Strategy: $wake_source. Press Ctrl+C to cancel."
|
||||||
|
countdown_sleep "$wake_epoch" "token reset"
|
||||||
|
log "Wake-up time reached. Retrying..."
|
||||||
|
}
|
||||||
|
|
||||||
run_agent() {
|
run_agent() {
|
||||||
local iteration=$1
|
local iteration=$1
|
||||||
local mode=$2
|
local mode=$2
|
||||||
|
|
@ -86,12 +360,23 @@ run_agent() {
|
||||||
|
|
||||||
log "Iteration $iteration ($mode mode) — starting fresh agent..."
|
log "Iteration $iteration ($mode mode) — starting fresh agent..."
|
||||||
|
|
||||||
# Disable pipefail around the agent call so a non-zero claude exit doesn't
|
if [[ "$AGENT" == "claude" && "$REQUIRE_PRO" == "1" ]]; then
|
||||||
# kill the script. We inspect the log content instead.
|
if ! verify_claude_pro_auth >/tmp/ralph-auth-check.out 2>/tmp/ralph-auth-check.err; then
|
||||||
|
error "Claude analysis auth is not using Pro. Refusing to run."
|
||||||
|
if [[ -s /tmp/ralph-auth-check.out ]]; then
|
||||||
|
error "Auth details: $(tail -n 1 /tmp/ralph-auth-check.out)"
|
||||||
|
fi
|
||||||
|
if [[ -s /tmp/ralph-auth-check.err ]]; then
|
||||||
|
error "Auth check stderr: $(tail -n 1 /tmp/ralph-auth-check.err)"
|
||||||
|
fi
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
case "$AGENT" in
|
case "$AGENT" in
|
||||||
claude)
|
claude)
|
||||||
echo "$prompt" | claude -p --output-format text 2>&1 | tee "$logfile"
|
echo "$prompt" | env -u ANTHROPIC_API_KEY claude -p --dangerously-skip-permissions --output-format text 2>&1 | tee "$logfile"
|
||||||
;;
|
;;
|
||||||
codex)
|
codex)
|
||||||
echo "$prompt" | codex 2>&1 | tee "$logfile"
|
echo "$prompt" | codex 2>&1 | tee "$logfile"
|
||||||
|
|
@ -107,87 +392,72 @@ run_agent() {
|
||||||
./custom-agent.sh "$prompt" 2>&1 | tee "$logfile"
|
./custom-agent.sh "$prompt" 2>&1 | tee "$logfile"
|
||||||
else
|
else
|
||||||
error "Custom agent selected but ./custom-agent.sh not found or not executable"
|
error "Custom agent selected but ./custom-agent.sh not found or not executable"
|
||||||
|
set -e
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
error "Unknown agent: $AGENT"
|
error "Unknown agent: $AGENT. Supported: claude, codex, aider, gemini, custom"
|
||||||
error "Supported agents: claude, codex, aider, gemini, custom"
|
set -e
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
AGENT_EXIT_CODE=$?
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Probe whether claude is available by sending a trivial request.
|
|
||||||
# Returns 0 if available, 1 if still rate-limited or erroring.
|
|
||||||
probe_session() {
|
|
||||||
local probe_log="$LOG_DIR/probe.log"
|
|
||||||
set +e
|
|
||||||
echo "Reply with the single word OK and nothing else." \
|
|
||||||
| claude -p --output-format text > "$probe_log" 2>&1
|
|
||||||
local rc=$?
|
|
||||||
set -e
|
|
||||||
|
|
||||||
if [[ $rc -ne 0 ]]; then
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
# Also check the output doesn't contain a limit message
|
|
||||||
if grep -qi 'usage limit\|rate limit\|limit reached\|exceeded.*limit' "$probe_log" 2>/dev/null; then
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
check_output() {
|
check_output() {
|
||||||
local logfile="$1"
|
local logfile="$1"
|
||||||
|
|
||||||
# Session / usage limit — must check BEFORE generic promise checks
|
|
||||||
if grep -qi 'usage limit\|rate limit\|limit reached\|exceeded.*limit\|Claude AI usage' "$logfile" 2>/dev/null; then
|
|
||||||
return 4 # Rate limited
|
|
||||||
fi
|
|
||||||
|
|
||||||
if grep -q '<promise>DONE</promise>' "$logfile" 2>/dev/null; then
|
if grep -q '<promise>DONE</promise>' "$logfile" 2>/dev/null; then
|
||||||
return 0 # Done
|
|
||||||
elif grep -q '<promise>STUCK</promise>' "$logfile" 2>/dev/null; then
|
|
||||||
return 2 # Stuck — needs human intervention
|
|
||||||
elif grep -q '<promise>ERROR</promise>' "$logfile" 2>/dev/null; then
|
|
||||||
return 3 # Unrecoverable error
|
|
||||||
else
|
|
||||||
return 1 # Normal iteration — continue
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
wait_for_session_reset() {
|
|
||||||
local iteration=$1
|
|
||||||
warn "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
||||||
warn "Session usage limit hit during iteration $iteration."
|
|
||||||
warn "Will probe every ${SESSION_POLL_INTERVAL}s until session resets."
|
|
||||||
warn "No manual action needed — loop will resume automatically."
|
|
||||||
warn "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
||||||
|
|
||||||
local attempt=0
|
|
||||||
while true; do
|
|
||||||
((attempt++))
|
|
||||||
local next_check
|
|
||||||
next_check=$(date -d "+${SESSION_POLL_INTERVAL} seconds" '+%H:%M:%S' 2>/dev/null \
|
|
||||||
|| date -v "+${SESSION_POLL_INTERVAL}S" '+%H:%M:%S' 2>/dev/null \
|
|
||||||
|| echo "soon")
|
|
||||||
info "Probe attempt $attempt — next check at $next_check..."
|
|
||||||
sleep "$SESSION_POLL_INTERVAL"
|
|
||||||
|
|
||||||
if probe_session; then
|
|
||||||
success "Session available! Resuming iteration $iteration..."
|
|
||||||
return 0
|
return 0
|
||||||
|
elif grep -q '<promise>STUCK</promise>' "$logfile" 2>/dev/null; then
|
||||||
|
return 2
|
||||||
|
elif grep -q '<promise>ERROR</promise>' "$logfile" 2>/dev/null; then
|
||||||
|
return 3
|
||||||
|
elif grep -Eqi "rate.limit|rate_limit|too many requests|exceeded.*quota|usage limit|out of tokens|overloaded|you'?ve hit your limit|resets [0-9]{1,2}(:[0-9]{2})?(am|pm)" "$logfile" 2>/dev/null; then
|
||||||
|
return 4
|
||||||
else
|
else
|
||||||
warn "Still rate-limited (attempt $attempt). Waiting another ${SESSION_POLL_INTERVAL}s..."
|
return 1
|
||||||
fi
|
fi
|
||||||
done
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# ─── Main ────────────────────────────────────────────────────────────────────
|
plan_has_remaining_work() {
|
||||||
|
if [[ ! -f "$PLAN_FILE" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if grep -Eq '^- \[ \]' "$PLAN_FILE" 2>/dev/null; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
board_has_remaining_work() {
|
||||||
|
if [[ -z "$BOARD_FILE" || ! -f "$BOARD_FILE" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if grep -Eq '\| .*⬜ Pending .* \||\| .*🔄 In Progress .* \|' "$BOARD_FILE" 2>/dev/null; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
has_remaining_work() {
|
||||||
|
if board_has_remaining_work; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if plan_has_remaining_work; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
if [[ "$MODE" == "plan" ]]; then
|
if [[ "$MODE" == "plan" ]]; then
|
||||||
log "Planning mode — creating implementation plan..."
|
log "Planning mode — creating implementation plan..."
|
||||||
|
|
@ -197,44 +467,75 @@ if [[ "$MODE" == "plan" ]]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "Starting Ralph Wiggum loop (max $MAX_ITERATIONS iterations)"
|
log "Starting Ralph Wiggum loop (max $MAX_ITERATIONS iterations)"
|
||||||
|
log "Runtime model: script-orchestrated"
|
||||||
log "Agent: $AGENT"
|
log "Agent: $AGENT"
|
||||||
log "Spec: $SPEC_FILE"
|
log "Spec: $SPEC_FILE"
|
||||||
log "Plan: $PLAN_FILE"
|
log "Plan: $PLAN_FILE"
|
||||||
log "Poll interval: ${SESSION_POLL_INTERVAL}s (session limit recovery)"
|
if [[ -n "$BOARD_FILE" ]]; then
|
||||||
|
log "Board: $BOARD_FILE"
|
||||||
|
fi
|
||||||
|
if [[ -n "$SESSION_ENDS" ]]; then
|
||||||
|
log "Tier 3 (session seed): $SESSION_ENDS"
|
||||||
|
fi
|
||||||
|
if [[ "$AGENT" == "claude" ]]; then
|
||||||
|
log_agent_runtime
|
||||||
|
if [[ "$REQUIRE_PRO" == "1" ]]; then
|
||||||
|
log "Pro guard: enabled"
|
||||||
|
else
|
||||||
|
warn "Pro guard: disabled (--no-require-pro)"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
i=1
|
for i in $(seq 1 "$MAX_ITERATIONS"); do
|
||||||
while [[ $i -le $MAX_ITERATIONS ]]; do
|
|
||||||
run_agent "$i" build
|
run_agent "$i" build
|
||||||
logfile="$LOG_DIR/iteration-${i}.log"
|
logfile="$LOG_DIR/iteration-${i}.log"
|
||||||
|
|
||||||
# Capture return value without triggering set -e
|
check_output "$logfile"
|
||||||
check_output "$logfile" || status=$?
|
status=$?
|
||||||
status=${status:-0}
|
|
||||||
|
|
||||||
case $status in
|
case $status in
|
||||||
0)
|
0)
|
||||||
success "ALL TASKS COMPLETE after $i iterations!"
|
if has_remaining_work; then
|
||||||
|
warn "Agent reported DONE, but the tracking artifacts still show work remaining."
|
||||||
|
warn "Ignoring false DONE and restarting with fresh context."
|
||||||
|
echo ""
|
||||||
|
sleep 2
|
||||||
|
else
|
||||||
|
success "All tracked work appears complete after $i iterations."
|
||||||
exit 0
|
exit 0
|
||||||
|
fi
|
||||||
;;
|
;;
|
||||||
2)
|
2)
|
||||||
warn "Agent is stuck on iteration $i. Review $logfile and intervene."
|
warn "Agent is stuck. Review $logfile and intervene."
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
3)
|
3)
|
||||||
error "Agent encountered an error on iteration $i. Review $logfile."
|
error "Agent encountered an error. Review $logfile."
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
4)
|
4)
|
||||||
# Rate limited — wait for reset, then retry the SAME iteration
|
warn "Token/rate limit hit on iteration $i."
|
||||||
wait_for_session_reset "$i"
|
wait_for_tokens "$logfile"
|
||||||
# Do NOT increment i — retry the same task
|
echo ""
|
||||||
;;
|
;;
|
||||||
1)
|
1)
|
||||||
|
if [[ $AGENT_EXIT_CODE -ne 0 ]]; then
|
||||||
|
warn "Agent exited with code $AGENT_EXIT_CODE but did not emit a recognized promise signal."
|
||||||
|
if has_remaining_work; then
|
||||||
|
warn "Tracked work remains. Restarting fresh."
|
||||||
|
echo ""
|
||||||
|
sleep 2
|
||||||
|
else
|
||||||
|
error "No work remains in tracking artifacts, but agent did not finish cleanly."
|
||||||
|
error "Review $logfile."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
log "Iteration $i complete. Restarting with fresh context..."
|
log "Iteration $i complete. Restarting with fresh context..."
|
||||||
echo ""
|
echo ""
|
||||||
sleep 2
|
sleep 2
|
||||||
((i++))
|
fi
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue