fix(ralph-loop): add session limit detection and auto-recovery
When claude -p hits the Pro subscription usage limit, the old loop had no detection — it would find no <promise> signal, treat it as a normal continuation, and immediately retry, burning all --max iterations. New behaviour: - check_output() returns status 4 when the log contains any usage/rate limit message (case-insensitive, multiple pattern variants) - wait_for_session_reset() polls via a trivial probe call every SESSION_POLL_INTERVAL seconds (default: 600s / 10 min) until claude responds cleanly again - When rate-limited, the same iteration is retried (i is not incremented) so no task is skipped or double-counted - set -e is temporarily suspended around agent calls so a non-zero claude exit doesn't kill the bash process Also updated the master template in docs/agent-harness/ralph-loop.sh. Agent: human Tests: N/A Tests-Added: 0 TypeScript: N/A
This commit is contained in:
parent
b1c199d21d
commit
82e10ff810
139
ralph-loop.sh
139
ralph-loop.sh
|
|
@ -7,7 +7,10 @@
|
||||||
# - Agent reads the plan, picks ONE task, implements, tests, commits, exits
|
# - Agent reads the plan, picks ONE task, implements, tests, commits, exits
|
||||||
# - Loop restarts until all tasks are done
|
# - Loop restarts until all tasks are done
|
||||||
#
|
#
|
||||||
# No context compaction. No stale reasoning. Just fresh starts.
|
# Session limit handling:
|
||||||
|
# - Detects Claude Pro usage limit messages in agent output
|
||||||
|
# - Polls every SESSION_POLL_INTERVAL seconds until the session resets
|
||||||
|
# - Resumes the same iteration automatically — no manual intervention needed
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# ./ralph-loop.sh # Build mode (default)
|
# ./ralph-loop.sh # Build mode (default)
|
||||||
|
|
@ -19,22 +22,6 @@
|
||||||
# ./ralph-loop.sh --agent gemini # Use Gemini CLI
|
# ./ralph-loop.sh --agent gemini # Use Gemini CLI
|
||||||
# ./ralph-loop.sh --agent custom # Use custom agent (see below)
|
# ./ralph-loop.sh --agent custom # Use custom agent (see below)
|
||||||
#
|
#
|
||||||
# Extensibility:
|
|
||||||
# To add support for other AI coding agents (aider, cursor, windsurf, etc.):
|
|
||||||
# 1. Add a new case in the run_agent() function's agent selection block
|
|
||||||
# 2. Format the prompt appropriately for that agent's CLI interface
|
|
||||||
# 3. Ensure the agent outputs to the logfile for promise detection
|
|
||||||
#
|
|
||||||
# Example for Aider:
|
|
||||||
# aider)
|
|
||||||
# aider --message "$prompt" --yes 2>&1 | tee "$logfile"
|
|
||||||
# ;;
|
|
||||||
#
|
|
||||||
# Example for custom script:
|
|
||||||
# custom)
|
|
||||||
# ./my-agent-wrapper.sh "$prompt" 2>&1 | tee "$logfile"
|
|
||||||
# ;;
|
|
||||||
#
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
MODE="${1:-build}"
|
MODE="${1:-build}"
|
||||||
|
|
@ -45,6 +32,10 @@ SPEC_FILE="PROJECT-SPEC.md"
|
||||||
AGENT_FILE="AGENT.md"
|
AGENT_FILE="AGENT.md"
|
||||||
LOG_DIR=".ralph-logs"
|
LOG_DIR=".ralph-logs"
|
||||||
|
|
||||||
|
# How often (in seconds) to probe whether the session has reset.
|
||||||
|
# Default: 10 minutes. Adjust down if you want faster recovery.
|
||||||
|
SESSION_POLL_INTERVAL="${SESSION_POLL_INTERVAL:-600}"
|
||||||
|
|
||||||
# Parse arguments
|
# Parse arguments
|
||||||
shift 2>/dev/null || true
|
shift 2>/dev/null || true
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
|
|
@ -62,12 +53,14 @@ GREEN='\033[0;32m'
|
||||||
YELLOW='\033[1;33m'
|
YELLOW='\033[1;33m'
|
||||||
RED='\033[0;31m'
|
RED='\033[0;31m'
|
||||||
BLUE='\033[0;34m'
|
BLUE='\033[0;34m'
|
||||||
|
CYAN='\033[0;36m'
|
||||||
NC='\033[0m'
|
NC='\033[0m'
|
||||||
|
|
||||||
log() { echo -e "${BLUE}[ralph]${NC} $1"; }
|
log() { echo -e "${BLUE}[ralph]${NC} $1"; }
|
||||||
success() { echo -e "${GREEN}[ralph]${NC} $1"; }
|
success() { echo -e "${GREEN}[ralph]${NC} $1"; }
|
||||||
warn() { echo -e "${YELLOW}[ralph]${NC} $1"; }
|
warn() { echo -e "${YELLOW}[ralph]${NC} $1"; }
|
||||||
error() { echo -e "${RED}[ralph]${NC} $1"; }
|
error() { echo -e "${RED}[ralph]${NC} $1"; }
|
||||||
|
info() { echo -e "${CYAN}[ralph]${NC} $1"; }
|
||||||
|
|
||||||
# Check prerequisites
|
# Check prerequisites
|
||||||
if [[ ! -f "$SPEC_FILE" ]]; then
|
if [[ ! -f "$SPEC_FILE" ]]; then
|
||||||
|
|
@ -93,8 +86,9 @@ run_agent() {
|
||||||
|
|
||||||
log "Iteration $iteration ($mode mode) — starting fresh agent..."
|
log "Iteration $iteration ($mode mode) — starting fresh agent..."
|
||||||
|
|
||||||
# Agent selection block
|
# Disable pipefail around the agent call so a non-zero claude exit doesn't
|
||||||
# Extend this case statement to support additional agents
|
# kill the script. We inspect the log content instead.
|
||||||
|
set +e
|
||||||
case "$AGENT" in
|
case "$AGENT" in
|
||||||
claude)
|
claude)
|
||||||
echo "$prompt" | claude -p --output-format text 2>&1 | tee "$logfile"
|
echo "$prompt" | claude -p --output-format text 2>&1 | tee "$logfile"
|
||||||
|
|
@ -103,23 +97,12 @@ run_agent() {
|
||||||
echo "$prompt" | codex 2>&1 | tee "$logfile"
|
echo "$prompt" | codex 2>&1 | tee "$logfile"
|
||||||
;;
|
;;
|
||||||
aider)
|
aider)
|
||||||
# Aider: AI pair programming in your terminal
|
|
||||||
# https://aider.chat
|
|
||||||
aider --message "$prompt" --yes 2>&1 | tee "$logfile"
|
aider --message "$prompt" --yes 2>&1 | tee "$logfile"
|
||||||
;;
|
;;
|
||||||
gemini)
|
gemini)
|
||||||
# Google Gemini CLI (if available)
|
|
||||||
# Adjust command based on actual Gemini CLI interface
|
|
||||||
echo "$prompt" | gemini-cli 2>&1 | tee "$logfile"
|
echo "$prompt" | gemini-cli 2>&1 | tee "$logfile"
|
||||||
;;
|
;;
|
||||||
custom)
|
custom)
|
||||||
# Custom agent integration
|
|
||||||
# Replace this with your own agent wrapper script
|
|
||||||
# The script should:
|
|
||||||
# 1. Accept prompt as first argument or via stdin
|
|
||||||
# 2. Perform the requested work (read files, write code, run tests, commit)
|
|
||||||
# 3. Output promise signals: <promise>PLANNED|DONE|STUCK|ERROR</promise>
|
|
||||||
# 4. Exit with appropriate code
|
|
||||||
if [[ -x "./custom-agent.sh" ]]; then
|
if [[ -x "./custom-agent.sh" ]]; then
|
||||||
./custom-agent.sh "$prompt" 2>&1 | tee "$logfile"
|
./custom-agent.sh "$prompt" 2>&1 | tee "$logfile"
|
||||||
else
|
else
|
||||||
|
|
@ -130,29 +113,82 @@ run_agent() {
|
||||||
*)
|
*)
|
||||||
error "Unknown agent: $AGENT"
|
error "Unknown agent: $AGENT"
|
||||||
error "Supported agents: claude, codex, aider, gemini, custom"
|
error "Supported agents: claude, codex, aider, gemini, custom"
|
||||||
error "To add support for other agents, edit the run_agent() function in this script"
|
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
set -e
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Probe whether claude is available by sending a trivial request.
|
||||||
|
# Returns 0 if available, 1 if still rate-limited or erroring.
|
||||||
|
probe_session() {
|
||||||
|
local probe_log="$LOG_DIR/probe.log"
|
||||||
|
set +e
|
||||||
|
echo "Reply with the single word OK and nothing else." \
|
||||||
|
| claude -p --output-format text > "$probe_log" 2>&1
|
||||||
|
local rc=$?
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ $rc -ne 0 ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
# Also check the output doesn't contain a limit message
|
||||||
|
if grep -qi 'usage limit\|rate limit\|limit reached\|exceeded.*limit' "$probe_log" 2>/dev/null; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
check_output() {
|
check_output() {
|
||||||
local logfile="$1"
|
local logfile="$1"
|
||||||
|
|
||||||
|
# Session / usage limit — must check BEFORE generic promise checks
|
||||||
|
if grep -qi 'usage limit\|rate limit\|limit reached\|exceeded.*limit\|Claude AI usage' "$logfile" 2>/dev/null; then
|
||||||
|
return 4 # Rate limited
|
||||||
|
fi
|
||||||
|
|
||||||
if grep -q '<promise>DONE</promise>' "$logfile" 2>/dev/null; then
|
if grep -q '<promise>DONE</promise>' "$logfile" 2>/dev/null; then
|
||||||
return 0 # Done
|
return 0 # Done
|
||||||
elif grep -q '<promise>STUCK</promise>' "$logfile" 2>/dev/null; then
|
elif grep -q '<promise>STUCK</promise>' "$logfile" 2>/dev/null; then
|
||||||
return 2 # Stuck
|
return 2 # Stuck — needs human intervention
|
||||||
elif grep -q '<promise>ERROR</promise>' "$logfile" 2>/dev/null; then
|
elif grep -q '<promise>ERROR</promise>' "$logfile" 2>/dev/null; then
|
||||||
return 3 # Error
|
return 3 # Unrecoverable error
|
||||||
else
|
else
|
||||||
return 1 # Continue
|
return 1 # Normal iteration — continue
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Main loop
|
wait_for_session_reset() {
|
||||||
|
local iteration=$1
|
||||||
|
warn "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||||
|
warn "Session usage limit hit during iteration $iteration."
|
||||||
|
warn "Will probe every ${SESSION_POLL_INTERVAL}s until session resets."
|
||||||
|
warn "No manual action needed — loop will resume automatically."
|
||||||
|
warn "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||||
|
|
||||||
|
local attempt=0
|
||||||
|
while true; do
|
||||||
|
((attempt++))
|
||||||
|
local next_check
|
||||||
|
next_check=$(date -d "+${SESSION_POLL_INTERVAL} seconds" '+%H:%M:%S' 2>/dev/null \
|
||||||
|
|| date -v "+${SESSION_POLL_INTERVAL}S" '+%H:%M:%S' 2>/dev/null \
|
||||||
|
|| echo "soon")
|
||||||
|
info "Probe attempt $attempt — next check at $next_check..."
|
||||||
|
sleep "$SESSION_POLL_INTERVAL"
|
||||||
|
|
||||||
|
if probe_session; then
|
||||||
|
success "Session available! Resuming iteration $iteration..."
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
warn "Still rate-limited (attempt $attempt). Waiting another ${SESSION_POLL_INTERVAL}s..."
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# ─── Main ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
if [[ "$MODE" == "plan" ]]; then
|
if [[ "$MODE" == "plan" ]]; then
|
||||||
log "Planning mode — creating implementation plan..."
|
log "Planning mode — creating implementation plan..."
|
||||||
run_agent 0 plan
|
run_agent 0 plan
|
||||||
|
|
@ -161,35 +197,44 @@ if [[ "$MODE" == "plan" ]]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "Starting Ralph Wiggum loop (max $MAX_ITERATIONS iterations)"
|
log "Starting Ralph Wiggum loop (max $MAX_ITERATIONS iterations)"
|
||||||
log "Agent: $AGENT"
|
log "Agent: $AGENT"
|
||||||
log "Spec: $SPEC_FILE"
|
log "Spec: $SPEC_FILE"
|
||||||
log "Plan: $PLAN_FILE"
|
log "Plan: $PLAN_FILE"
|
||||||
|
log "Poll interval: ${SESSION_POLL_INTERVAL}s (session limit recovery)"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
for i in $(seq 1 "$MAX_ITERATIONS"); do
|
i=1
|
||||||
|
while [[ $i -le $MAX_ITERATIONS ]]; do
|
||||||
run_agent "$i" build
|
run_agent "$i" build
|
||||||
logfile="$LOG_DIR/iteration-${i}.log"
|
logfile="$LOG_DIR/iteration-${i}.log"
|
||||||
|
|
||||||
check_output "$logfile"
|
# Capture return value without triggering set -e
|
||||||
status=$?
|
check_output "$logfile" || status=$?
|
||||||
|
status=${status:-0}
|
||||||
|
|
||||||
case $status in
|
case $status in
|
||||||
0)
|
0)
|
||||||
success "🎉 ALL TASKS COMPLETE after $i iterations!"
|
success "ALL TASKS COMPLETE after $i iterations!"
|
||||||
exit 0
|
exit 0
|
||||||
;;
|
;;
|
||||||
2)
|
2)
|
||||||
warn "Agent is stuck. Review $logfile and intervene."
|
warn "Agent is stuck on iteration $i. Review $logfile and intervene."
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
3)
|
3)
|
||||||
error "Agent encountered an error. Review $logfile."
|
error "Agent encountered an error on iteration $i. Review $logfile."
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
|
4)
|
||||||
|
# Rate limited — wait for reset, then retry the SAME iteration
|
||||||
|
wait_for_session_reset "$i"
|
||||||
|
# Do NOT increment i — retry the same task
|
||||||
|
;;
|
||||||
1)
|
1)
|
||||||
log "Iteration $i complete. Restarting with fresh context..."
|
log "Iteration $i complete. Restarting with fresh context..."
|
||||||
echo ""
|
echo ""
|
||||||
sleep 2
|
sleep 2
|
||||||
|
((i++))
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue