From 7671cf930616aaf09635202ee7f619312a75843b Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Mon, 6 Apr 2026 07:25:59 -0700 Subject: [PATCH 1/3] fix: escalating circuit breaker for doom loops in headless mode When `doom_loop` permission is auto-accepted (headless mode or config `"allow"`), the per-tool repeat counter resets every 30 calls and loops run indefinitely. Observed: 10,943 `apply_patch` calls in one session. Add escalating circuit breaker: - 1st threshold (30 calls): ask permission (existing behavior) - 2nd threshold (60 calls): inject non-synthetic warning the LLM sees, telling it to change approach - 3rd threshold (90 calls): force-stop the session via `blocked = true` Also: - Add `if (blocked) break` after the switch to exit the stream loop immediately on force-stop (not just the switch) - Add `escalation_level` to `doom_loop_detected` telemetry event for distinguishing ask/warn/stop in analytics Closes #657 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../opencode/src/altimate/telemetry/index.ts | 3 + packages/opencode/src/session/processor.ts | 75 ++++++++++++++++++- 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/packages/opencode/src/altimate/telemetry/index.ts b/packages/opencode/src/altimate/telemetry/index.ts index 772466cd4..15c3ce050 100644 --- a/packages/opencode/src/altimate/telemetry/index.ts +++ b/packages/opencode/src/altimate/telemetry/index.ts @@ -243,6 +243,9 @@ export namespace Telemetry { session_id: string tool_name: string repeat_count: number + // altimate_change start — escalation level for distinguishing ask/warn/stop in analytics + escalation_level?: number + // altimate_change end } | { type: "environment_census" diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 529f44dc0..4b229123b 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -27,6 +27,15 @@ export namespace SessionProcessor { // 30 catches pathological patterns while avoiding false positives for power users. const TOOL_REPEAT_THRESHOLD = 30 // altimate_change end + // altimate_change start — escalating circuit breaker for doom loops + // When the repeat threshold is hit and auto-accepted (headless, config allow), the + // counter resets and the loop continues indefinitely. Escalation levels: + // 1st hit (30 calls): ask permission (existing behavior) + // 2nd hit (60 calls): ask + inject synthetic warning telling model to change approach + // 3rd hit (90 calls): force-stop the session — the model is stuck + const DOOM_LOOP_WARN_ESCALATION = 2 // hits before injecting warning + const DOOM_LOOP_STOP_ESCALATION = 3 // hits before force-stopping + // altimate_change end const log = Log.create({ service: "session.processor" }) export type Info = Awaited> @@ -42,6 +51,9 @@ export namespace SessionProcessor { // altimate_change start — per-tool call counter for varied-input loop detection const toolCallCounts: Record = {} // altimate_change end + // altimate_change start — escalation counter: how many times each tool has hit TOOL_REPEAT_THRESHOLD + const toolLoopHits: Record = {} + // altimate_change end let snapshot: string | undefined let blocked = false let attempt = 0 @@ -201,20 +213,74 @@ export namespace SessionProcessor { }) } - // altimate_change start — per-tool repeat counter (catches varied-input loops like todowrite 2,080x) + // altimate_change start — per-tool repeat counter with escalating circuit breaker // Counter is scoped to the processor lifetime (create() call), so it accumulates // across multiple process() invocations within a session. This is intentional: // cross-turn accumulation catches slow-burn loops that stay under the threshold // per-turn but add up over the session. toolCallCounts[value.toolName] = (toolCallCounts[value.toolName] ?? 0) + 1 if (toolCallCounts[value.toolName] >= TOOL_REPEAT_THRESHOLD) { + toolLoopHits[value.toolName] = (toolLoopHits[value.toolName] ?? 0) + 1 + const hits = toolLoopHits[value.toolName] + const totalCalls = hits * TOOL_REPEAT_THRESHOLD + Telemetry.track({ type: "doom_loop_detected", timestamp: Date.now(), session_id: input.sessionID, tool_name: value.toolName, - repeat_count: toolCallCounts[value.toolName], + repeat_count: totalCalls, + escalation_level: hits, }) + + // Escalation level 3+: force-stop — the model is irretrievably stuck + if (hits >= DOOM_LOOP_STOP_ESCALATION) { + log.warn("doom loop circuit breaker: force-stopping session", { + tool: value.toolName, + totalCalls, + hits, + sessionID: input.sessionID, + }) + await Session.updatePart({ + id: PartID.ascending(), + messageID: input.assistantMessage.id, + sessionID: input.assistantMessage.sessionID, + type: "text", + synthetic: true, + text: + `⚠️ altimate-code: session stopped — \`${value.toolName}\` was called ${totalCalls}+ times, ` + + `indicating the agent is stuck in a loop. Please start a new session with a revised prompt.`, + time: { start: Date.now(), end: Date.now() }, + }) + blocked = true + toolCallCounts[value.toolName] = 0 + break + } + + // Escalation level 2: warn the model via synthetic message + if (hits >= DOOM_LOOP_WARN_ESCALATION) { + log.warn("doom loop escalation: injecting warning", { + tool: value.toolName, + totalCalls, + hits, + sessionID: input.sessionID, + }) + await Session.updatePart({ + id: PartID.ascending(), + messageID: input.assistantMessage.id, + sessionID: input.assistantMessage.sessionID, + type: "text", + // synthetic: false so the LLM actually sees this warning and can course-correct + text: + `⚠️ altimate-code: \`${value.toolName}\` has been called ${totalCalls}+ times this session. ` + + `You appear to be stuck in a loop. Stop repeating the same approach. ` + + `Either try a fundamentally different strategy or explain to the user what is blocking you. ` + + `The session will be force-stopped if this continues.`, + time: { start: Date.now(), end: Date.now() }, + }) + } + + // Escalation level 1: ask permission (existing behavior) const agent = await Agent.get(input.assistantMessage.agent) await PermissionNext.ask({ permission: "doom_loop", @@ -223,7 +289,7 @@ export namespace SessionProcessor { metadata: { tool: value.toolName, input: value.input, - repeat_count: toolCallCounts[value.toolName], + repeat_count: totalCalls, }, always: [value.toolName], ruleset: agent.permission, @@ -478,6 +544,9 @@ export namespace SessionProcessor { continue } if (needsCompaction) break + // altimate_change start — exit stream loop immediately on doom loop force-stop + if (blocked) break + // altimate_change end } } catch (e: any) { log.error("process", { From 558b9fa2efe1f16782ebcf8a1fe633109f1d6b8b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 6 Apr 2026 21:28:27 +0000 Subject: [PATCH 2/3] fix: address code review findings Auto-fixed verified findings from centralized code review. Co-Authored-By: Claude Sonnet 4.6 --- packages/opencode/src/session/processor.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 4b229123b..eb1ec05b2 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -248,7 +248,7 @@ export namespace SessionProcessor { type: "text", synthetic: true, text: - `⚠️ altimate-code: session stopped — \`${value.toolName}\` was called ${totalCalls}+ times, ` + + `⚠️ altimate-code: session stopped — \`${value.toolName}\` was called ${totalCalls} times, ` + `indicating the agent is stuck in a loop. Please start a new session with a revised prompt.`, time: { start: Date.now(), end: Date.now() }, }) @@ -270,9 +270,9 @@ export namespace SessionProcessor { messageID: input.assistantMessage.id, sessionID: input.assistantMessage.sessionID, type: "text", - // synthetic: false so the LLM actually sees this warning and can course-correct + synthetic: false, text: - `⚠️ altimate-code: \`${value.toolName}\` has been called ${totalCalls}+ times this session. ` + + `⚠️ altimate-code: \`${value.toolName}\` has been called ${totalCalls} times this session. ` + `You appear to be stuck in a loop. Stop repeating the same approach. ` + `Either try a fundamentally different strategy or explain to the user what is blocking you. ` + `The session will be force-stopped if this continues.`, @@ -281,6 +281,8 @@ export namespace SessionProcessor { } // Escalation level 1: ask permission (existing behavior) + // Reset before ask so denial/exception doesn't leave count at threshold + toolCallCounts[value.toolName] = 0 const agent = await Agent.get(input.assistantMessage.agent) await PermissionNext.ask({ permission: "doom_loop", @@ -294,7 +296,6 @@ export namespace SessionProcessor { always: [value.toolName], ruleset: agent.permission, }) - toolCallCounts[value.toolName] = 0 } // altimate_change end } From ac01755e50aedf0ab3c3cdcca8184ed90b3da459 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 6 Apr 2026 21:57:22 +0000 Subject: [PATCH 3/3] fix: address code review findings Auto-fixed verified findings from centralized code review. Co-Authored-By: Claude Sonnet 4.6 --- packages/opencode/src/session/processor.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index eb1ec05b2..bfdd958b8 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -254,6 +254,7 @@ export namespace SessionProcessor { }) blocked = true toolCallCounts[value.toolName] = 0 + toolLoopHits[value.toolName] = 0 break } @@ -270,7 +271,7 @@ export namespace SessionProcessor { messageID: input.assistantMessage.id, sessionID: input.assistantMessage.sessionID, type: "text", - synthetic: false, + synthetic: true, text: `⚠️ altimate-code: \`${value.toolName}\` has been called ${totalCalls} times this session. ` + `You appear to be stuck in a loop. Stop repeating the same approach. ` +