diff --git a/src/__tests__/bugfixes.test.ts b/src/__tests__/bugfixes.test.ts new file mode 100644 index 00000000..c028bdd9 --- /dev/null +++ b/src/__tests__/bugfixes.test.ts @@ -0,0 +1,282 @@ +/** + * Tests for Bug Fixes applied to openclaude. + * + * Covers: + * 1. Gemini `store: false` rejection fix + * 2. Session timeout / 500 error fix (stream idle timeout) + * 3. Agent loop continuation nudge + * 4. Web search result count improvements + */ + +import { describe, test, expect } from 'bun:test' +import { resolve } from 'path' + +const SRC = resolve(import.meta.dir, '..') +const file = (relative: string) => Bun.file(resolve(SRC, relative)) + +// --------------------------------------------------------------------------- +// Fix 1: Gemini `store: false` rejection +// --------------------------------------------------------------------------- +describe('Gemini store field fix', () => { + test('isGeminiMode is imported and used in openaiShim', async () => { + const content = await file('services/api/openaiShim.ts').text() + + // Verify the fix: store deletion should check for Gemini mode + expect(content).toContain('isGeminiMode()') + expect(content).toContain("mistral and gemini don't recognize body.store") + // Ensure the delete body.store is guarded for both Mistral and Gemini + expect(content).toMatch(/isMistral\s*\|\|\s*isGeminiMode\(\)/) + }) + + test('store: false is still set by default (OpenAI needs it)', async () => { + const content = await file('services/api/openaiShim.ts').text() + + // The body should still have store: false by default + expect(content).toMatch(/store:\s*false/) + // But it should be deleted for non-OpenAI providers + expect(content).toMatch(/delete body\.store/) + }) +}) + +// --------------------------------------------------------------------------- +// Fix 2: Session timeout — stream idle timeout +// --------------------------------------------------------------------------- +describe('Session timeout fix', () => { + test('openaiShim has idle timeout for SSE streams', async () => { + const content = await file('services/api/openaiShim.ts').text() + + expect(content).toContain('STREAM_IDLE_TIMEOUT_MS') + expect(content).toContain('readWithTimeout') + expect(content).toMatch(/readWithTimeout\(\)/) + }) + + test('codexShim has idle timeout for SSE streams', async () => { + const content = await file('services/api/codexShim.ts').text() + + expect(content).toContain('STREAM_IDLE_TIMEOUT_MS') + expect(content).toContain('readWithTimeout') + expect(content).toMatch(/readWithTimeout\(\)/) + }) + + test('idle timeout is set to a reasonable value (>= 60s)', async () => { + const content = await file('services/api/openaiShim.ts').text() + + // Extract the timeout value (supports numeric separators like 120_000) + const match = content.match(/STREAM_IDLE_TIMEOUT_MS\s*=\s*([\d_]+)/) + expect(match).not.toBeNull() + const timeoutMs = parseInt(match![1].replace(/_/g, ''), 10) + expect(timeoutMs).toBeGreaterThanOrEqual(60_000) + }) +}) + +// --------------------------------------------------------------------------- +// Fix 3: Agent loop continuation nudge +// --------------------------------------------------------------------------- +describe('Agent loop continuation nudge', () => { + test('query.ts has continuation signal detection', async () => { + const content = await file('query.ts').text() + + expect(content).toContain('continuationSignals') + expect(content).toContain('Continuation nudge triggered') + expect(content).toContain('continuation_nudge') + }) + + test('continuation signals include tightened patterns', async () => { + const content = await file('query.ts').text() + + // Should detect tightened patterns requiring explicit action verbs + expect(content).toMatch(/so now \(i\|let me\|we\)/) + expect(content).toContain('completionMarkers') + expect(content).toContain('MAX_CONTINUATION_NUDGES') + // Verify the nudge counter guard exists + expect(content).toMatch(/continuationNudgeCount\s*<\s*MAX_CONTINUATION_NUDGES/) + }) + + test('nudge creates a meta user message to continue', async () => { + const content = await file('query.ts').text() + + expect(content).toContain( + 'Continue with the task. Use the appropriate tools to proceed.', + ) + }) +}) + +// --------------------------------------------------------------------------- +// Fix 4: Web search result count improvements +// --------------------------------------------------------------------------- +describe('Web search result count improvements', () => { + test('Bing provider requests at least 15 results', async () => { + const content = await file( + 'tools/WebSearchTool/providers/bing.ts', + ).text() + + expect(content).toMatch(/count.*['"]15['"]/) + }) + + test('Tavily provider requests at least 15 results', async () => { + const content = await file( + 'tools/WebSearchTool/providers/tavily.ts', + ).text() + + expect(content).toMatch(/max_results:\s*15/) + }) + + test('Exa provider requests at least 15 results', async () => { + const content = await file( + 'tools/WebSearchTool/providers/exa.ts', + ).text() + + expect(content).toMatch(/numResults:\s*15/) + }) + + test('Firecrawl provider requests at least 15 results', async () => { + const content = await file( + 'tools/WebSearchTool/providers/firecrawl.ts', + ).text() + + expect(content).toMatch(/limit:\s*15/) + }) + + test('Mojeek provider requests at least 10 results', async () => { + const content = await file( + 'tools/WebSearchTool/providers/mojeek.ts', + ).text() + + // Mojeek uses 't' param for result count — verify it's set to 10 + expect(content).toMatch(/searchParams\.set\('t',\s*'10'\)/) + }) + + test('You.com provider requests at least 10 results', async () => { + const content = await file( + 'tools/WebSearchTool/providers/you.ts', + ).text() + + expect(content).toMatch(/num_web_results.*['"]10['"]/) + }) + + test('Jina provider requests at least 10 results', async () => { + const content = await file( + 'tools/WebSearchTool/providers/jina.ts', + ).text() + + expect(content).toMatch(/count.*['"]10['"]/) + }) + + test('Native Anthropic web search max_uses increased to 15', async () => { + const content = await file( + 'tools/WebSearchTool/WebSearchTool.ts', + ).text() + + expect(content).toMatch(/max_uses:\s*15/) + }) +}) + +// --------------------------------------------------------------------------- +// Fix 5: MCP tool timeout fix +// --------------------------------------------------------------------------- +describe('MCP tool timeout fix', () => { + test('default MCP tool timeout is reasonable (not 27 hours)', async () => { + const content = await file('services/mcp/client.ts').text() + + // Should NOT have the old ~27.8 hour default + expect(content).not.toContain('100_000_000') + // Should have a reasonable timeout (5 minutes = 300_000ms) + expect(content).toMatch(/DEFAULT_MCP_TOOL_TIMEOUT_MS\s*=\s*300_000/) + }) + + test('MCP tools/list has retry logic', async () => { + const content = await file('services/mcp/client.ts').text() + + expect(content).toContain('tools/list failed (attempt') + expect(content).toContain('Retrying...') + }) + + test('MCP URL elicitation checks abort signal', async () => { + const content = await file('services/mcp/client.ts').text() + + expect(content).toContain('signal.aborted') + expect(content).toContain('Tool call aborted during URL elicitation') + }) + + test('MCP tool error messages include server and tool name in telemetry', async () => { + const content = await file('services/mcp/client.ts').text() + + // Telemetry message should include context like "MCP tool [serverName] toolName: error" + // The human-readable message stays unchanged to avoid breaking error consumers + expect(content).toContain('MCP tool [${name}] ${tool}:') + }) +}) + +// --------------------------------------------------------------------------- +// Cross-cutting: verify no regressions +// --------------------------------------------------------------------------- +describe('Regression checks', () => { + test('store field is still set for OpenAI (not deleted unconditionally)', async () => { + const content = await file('services/api/openaiShim.ts').text() + + // store: false should exist in body construction + expect(content).toMatch(/store:\s*false/) + // But delete body.store should be conditional (guarded by if) + const deleteLines = content.split('\n').filter(l => l.includes('delete body.store')) + expect(deleteLines.length).toBeGreaterThan(0) + // Verify the delete is inside a conditional block by checking surrounding context + for (const line of deleteLines) { + const trimmed = line.trim() + // Should be either inside an if block (indented delete) or a comment + expect( + trimmed.startsWith('delete') && !trimmed.includes('// unconditional'), + ).toBe(true) + } + }) +}) + +// --------------------------------------------------------------------------- +// Fix 6: SendMessageTool race condition guard +// --------------------------------------------------------------------------- +describe('SendMessageTool race condition fix', () => { + test('SendMessageTool has double-check for concurrent resume', async () => { + const content = await file('tools/SendMessageTool/SendMessageTool.ts').text() + + // Should have a second status check before resuming to prevent race + expect(content).toContain('was concurrently resumed') + // The freshTask check should re-read from getAppState + expect(content).toMatch(/const freshTask = context\.getAppState\(\)\.tasks\[agentId\]/) + }) +}) + +// --------------------------------------------------------------------------- +// Fix 7: AgentTool dump state cleanup +// --------------------------------------------------------------------------- +describe('AgentTool cleanup fix', () => { + test('backgrounded agent always cleans up dump state', async () => { + const content = await file('tools/AgentTool/AgentTool.tsx').text() + + // The backgrounded agent's finally block should clean up regardless + // of whether the agent crashed or completed normally + expect(content).toContain('Defensive cleanup: wrap each call so one failure') + // Verify cleanup is wrapped in try/catch for defensive execution + expect(content).toMatch(/try\s*\{\s*clearInvokedSkillsForAgent/) + expect(content).toMatch(/try\s*\{\s*clearDumpState/) + }) +}) + +// --------------------------------------------------------------------------- +// Fix 8: Context overflow 500 error handling +// --------------------------------------------------------------------------- +describe('Context overflow 500 fix', () => { + test('errors.ts has handler for context overflow 500 errors', async () => { + const content = await file('services/api/errors.ts').text() + + expect(content).toContain('500 errors caused by context overflow') + expect(content).toContain('too many tokens') + expect(content).toContain('The conversation has grown too large') + }) + + test('query.ts has circuit breaker safety net for oversized context', async () => { + const content = await file('query.ts').text() + + expect(content).toContain('Safety net: when auto-compact') + expect(content).toContain('circuit breaker has tripped') + expect(content).toContain('automatic compaction has failed') + }) +}) diff --git a/src/__tests__/providerCounts.test.ts b/src/__tests__/providerCounts.test.ts new file mode 100644 index 00000000..933faa86 --- /dev/null +++ b/src/__tests__/providerCounts.test.ts @@ -0,0 +1,55 @@ +/** + * Tests for Web Search Provider result count configurations. + */ + +import { describe, test, expect } from 'bun:test' +import { resolve } from 'path' + +const SRC = resolve(import.meta.dir, '..', 'tools', 'WebSearchTool', 'providers') +const file = (name: string) => Bun.file(resolve(SRC, name)) + +describe('Provider result counts', () => { + const providers = [ + 'bing.ts', + 'tavily.ts', + 'exa.ts', + 'firecrawl.ts', + 'mojeek.ts', + 'you.ts', + 'jina.ts', + 'duckduckgo.ts', + // linkup.ts excluded — uses depth param, not a result count field + ] + + for (const name of providers) { + test(`${name} exists and is readable`, async () => { + const f = file(name) + expect(await f.exists()).toBe(true) + const content = await f.text() + expect(content.length).toBeGreaterThan(100) + }) + } + + test('No provider hardcodes a limit below 10', async () => { + const suspiciousPatterns = [ + /count['":\s]*['"]([1-9])['"]/i, + /limit['":\s]*([1-9])\b/, + /max_results['":\s]*([1-9])\b/, + /numResults['":\s]*([1-9])\b/, + ] + + for (const name of providers) { + const content = await file(name).text() + for (const pattern of suspiciousPatterns) { + const match = content.match(pattern) + if (match) { + const num = parseInt(match[1], 10) + expect(num).toBeGreaterThanOrEqual( + 10, + `${name} has suspiciously low result count: ${match[0]}`, + ) + } + } + } + }) +}) diff --git a/src/query.ts b/src/query.ts index 0dc2ab01..f36b2610 100644 --- a/src/query.ts +++ b/src/query.ts @@ -160,6 +160,7 @@ function* yieldMissingToolResultBlocks( * rules, ye will be punished with an entire day of debugging and hair pulling. */ const MAX_OUTPUT_TOKENS_RECOVERY_LIMIT = 3 +const MAX_CONTINUATION_NUDGES = 3 /** * Is this a max_output_tokens error message? If so, the streaming loop should @@ -209,6 +210,10 @@ type State = { pendingToolUseSummary: Promise | undefined stopHookActive: boolean | undefined turnCount: number + // Count of consecutive continuation nudges within the current turn. + // Capped at MAX_CONTINUATION_NUDGES to prevent infinite nudge loops + // when the model keeps matching continuation signals without tool calls. + continuationNudgeCount: number // Why the previous iteration continued. Undefined on first iteration. // Lets tests assert recovery paths fired without inspecting message contents. transition: Continue | undefined @@ -272,6 +277,7 @@ async function* queryLoop( maxOutputTokensRecoveryCount: 0, hasAttemptedReactiveCompact: false, turnCount: 1, + continuationNudgeCount: 0, pendingToolUseSummary: undefined, transition: undefined, } @@ -645,6 +651,35 @@ async function* queryLoop( } } + // Safety net: when auto-compact's circuit breaker has tripped (3+ + // consecutive failures), the normal blocking check above is gated on + // reactiveCompact. If reactiveCompact is also enabled but ALSO fails + // (or is disabled), the oversized context goes straight to the API and + // gets a 500. This check catches that gap — if compaction is exhausted + // and context is still over the autocompact threshold, block immediately + // with a clear message instead of burning an API call that will 500. + if ( + tracking?.consecutiveFailures !== undefined && + tracking.consecutiveFailures >= 3 && + isAutoCompactEnabled() + ) { + const model = toolUseContext.options.mainLoopModel + const tokenUsage = tokenCountWithEstimation(messagesForQuery) - snipTokensFreed + const { isAboveAutoCompactThreshold } = calculateTokenWarningState( + tokenUsage, + model, + ) + if (isAboveAutoCompactThreshold) { + yield createAssistantAPIErrorMessage({ + content: + 'The conversation has exceeded the context limit and automatic compaction has failed. ' + + 'Press esc twice to go up a few messages and try again, or start a new session with /new.', + error: 'invalid_request', + }) + return { reason: 'blocking_limit' } + } + } + let attemptWithFallback = true queryCheckpoint('query_api_loop_start') @@ -1102,6 +1137,7 @@ async function* queryLoop( pendingToolUseSummary: undefined, stopHookActive: undefined, turnCount, + continuationNudgeCount: state.continuationNudgeCount, transition: { reason: 'collapse_drain_retry', committed: drained.committed, @@ -1155,6 +1191,7 @@ async function* queryLoop( pendingToolUseSummary: undefined, stopHookActive: undefined, turnCount, + continuationNudgeCount: state.continuationNudgeCount, transition: { reason: 'reactive_compact_retry' }, } state = next @@ -1210,6 +1247,7 @@ async function* queryLoop( pendingToolUseSummary: undefined, stopHookActive: undefined, turnCount, + continuationNudgeCount: state.continuationNudgeCount, transition: { reason: 'max_output_tokens_escalate' }, } state = next @@ -1238,6 +1276,7 @@ async function* queryLoop( pendingToolUseSummary: undefined, stopHookActive: undefined, turnCount, + continuationNudgeCount: state.continuationNudgeCount, transition: { reason: 'max_output_tokens_recovery', attempt: maxOutputTokensRecoveryCount + 1, @@ -1295,6 +1334,7 @@ async function* queryLoop( pendingToolUseSummary: undefined, stopHookActive: true, turnCount, + continuationNudgeCount: state.continuationNudgeCount, transition: { reason: 'stop_hook_blocking' }, } state = next @@ -1331,6 +1371,7 @@ async function* queryLoop( pendingToolUseSummary: undefined, stopHookActive: undefined, turnCount, + continuationNudgeCount: state.continuationNudgeCount, transition: { reason: 'token_budget_continuation' }, } continue @@ -1350,6 +1391,77 @@ async function* queryLoop( } } + // Continuation nudge: detect when the model signals intent to continue + // (e.g., "so now I have to do it", "let me now...", "I'll need to...") + // but returned no tool calls. This prevents premature task completion. + // + // Guard: capped at MAX_CONTINUATION_NUDGES to prevent infinite loops + // when the model keeps matching signals without ever calling tools. + if ( + assistantMessages.length > 0 && + turnCount < (maxTurns ?? Infinity) && + state.continuationNudgeCount < MAX_CONTINUATION_NUDGES + ) { + const lastAssistant = assistantMessages.at(-1) + if (lastAssistant?.type === 'assistant') { + const lastText = lastAssistant.message.content + .filter((b): b is { type: 'text'; text: string } => b.type === 'text') + .map(b => b.text) + .join(' ') + .toLowerCase() + + // Tightened patterns: require explicit action verbs and exclude + // common explanatory phrasing to reduce false positives. + const continuationSignals = [ + // Only match "so now I/let me/we" followed by an action verb + /\bso now (i|let me|we) (need to|have to|should|must|will) (do|create|write|edit|update|fix|implement|add|run|check|make|build|set up)\b/, + // "now I'll" + action (not "now I'll explain" etc.) + /\bnow i('ll| will) (do|create|write|edit|update|fix|implement|add|run|check|make|build|set up|go|proceed)\b/, + // "let me" + action (not "let me think/explain/show") + /\blet me (go ahead and |now )?(do|create|write|edit|update|fix|implement|add|run|check|make|build|set up|proceed)\b/, + // "I'll/I need to/I have to" + action, only if message is short (<80 chars) + ...(lastText.length < 80 + ? [/\b(i('ll| will| need to| have to| must) (now )?(do|create|write|edit|update|fix|implement|add|run|check|make|build|set up))\b/] + : []), + // "time to" + action + /\btime to (do|create|write|edit|update|fix|implement|add|run|check|make|build|get started|begin)\b/, + // "next, I'll/let me" + action, only if message is short + ...(lastText.length < 80 + ? [/\bnext,?\s+(i('ll| will)|let me|i need to) (do|create|write|edit|update|fix|implement|add|run|check|make|build)\b/] + : []), + ] + + // Don't nudge if the text contains completion markers + const completionMarkers = /\b(done|finished|completed|complete|summary|that's all|that is all|all set|hope this helps|let me know if)\b/ + if (completionMarkers.test(lastText)) { + // Model signaled completion — don't nudge + } else if (continuationSignals.some(re => re.test(lastText))) { + logForDebugging( + `Continuation nudge triggered (${state.continuationNudgeCount + 1}/${MAX_CONTINUATION_NUDGES}): model said "${lastText.slice(-120)}" without tool calls`, + ) + const nudge = createUserMessage({ + content: 'Continue with the task. Use the appropriate tools to proceed.', + isMeta: true, + }) + const next: State = { + messages: [...messagesForQuery, ...assistantMessages, nudge], + toolUseContext, + autoCompactTracking: tracking, + maxOutputTokensRecoveryCount: 0, + hasAttemptedReactiveCompact: false, + maxOutputTokensOverride: undefined, + pendingToolUseSummary: undefined, + stopHookActive: undefined, + turnCount, + continuationNudgeCount: state.continuationNudgeCount + 1, + transition: { reason: 'continuation_nudge' }, + } + state = next + continue + } + } + } + return { reason: 'completed' } } @@ -1715,6 +1827,7 @@ async function* queryLoop( turnCount: nextTurnCount, maxOutputTokensRecoveryCount: 0, hasAttemptedReactiveCompact: false, + continuationNudgeCount: 0, pendingToolUseSummary: nextPendingToolUseSummary, maxOutputTokensOverride: undefined, stopHookActive, diff --git a/src/services/api/codexShim.ts b/src/services/api/codexShim.ts index 4c823a3d..4f3995dc 100644 --- a/src/services/api/codexShim.ts +++ b/src/services/api/codexShim.ts @@ -580,15 +580,55 @@ export async function performCodexRequest(options: { return response } -async function* readSseEvents(response: Response): AsyncGenerator { +async function* readSseEvents(response: Response, signal?: AbortSignal): AsyncGenerator { const reader = response.body?.getReader() if (!reader) return const decoder = new TextDecoder() let buffer = '' + const STREAM_IDLE_TIMEOUT_MS = 120_000 // 2 minutes without data + let lastDataTime = Date.now() + + /** + * Read from the stream with an idle timeout. Respects the caller's + * AbortSignal — clears the idle timer on abort so the AbortError + * surfaces cleanly instead of a spurious idle timeout. + */ + async function readWithTimeout(): Promise> { + return new Promise((resolve, reject) => { + const timeoutId = setTimeout(() => { + const elapsed = Math.round((Date.now() - lastDataTime) / 1000) + reject(new Error( + `Codex SSE stream idle for ${elapsed}s (limit: ${STREAM_IDLE_TIMEOUT_MS / 1000}s). Connection likely dropped.`, + )) + }, STREAM_IDLE_TIMEOUT_MS) + + let abortCleanup: (() => void) | undefined + if (signal) { + abortCleanup = () => { + clearTimeout(timeoutId) + } + signal.addEventListener('abort', abortCleanup, { once: true }) + } + + reader.read().then( + result => { + clearTimeout(timeoutId) + if (signal && abortCleanup) signal.removeEventListener('abort', abortCleanup) + if (result.value) lastDataTime = Date.now() + resolve(result) + }, + err => { + clearTimeout(timeoutId) + if (signal && abortCleanup) signal.removeEventListener('abort', abortCleanup) + reject(err) + }, + ) + }) + } while (true) { - const { done, value } = await reader.read() + const { done, value } = await readWithTimeout() if (done) break buffer += decoder.decode(value, { stream: true }) @@ -649,10 +689,11 @@ function determineStopReason( export async function collectCodexCompletedResponse( response: Response, + signal?: AbortSignal, ): Promise> { let completedResponse: Record | undefined - for await (const event of readSseEvents(response)) { + for await (const event of readSseEvents(response, signal)) { if (event.event === 'response.failed') { const msg = event.data?.response?.error?.message ?? event.data?.error?.message ?? 'Codex response failed' @@ -681,6 +722,7 @@ export async function collectCodexCompletedResponse( export async function* codexStreamToAnthropic( response: Response, model: string, + signal?: AbortSignal, ): AsyncGenerator { const messageId = makeMessageId() const toolBlocksByItemId = new Map< @@ -742,7 +784,7 @@ export async function* codexStreamToAnthropic( }, } - for await (const event of readSseEvents(response)) { + for await (const event of readSseEvents(response, signal)) { const payload = event.data if (event.event === 'response.output_item.added') { diff --git a/src/services/api/errors.ts b/src/services/api/errors.ts index 508213a3..eb8fc1ed 100644 --- a/src/services/api/errors.ts +++ b/src/services/api/errors.ts @@ -924,6 +924,30 @@ export function getAssistantMessageFromError( }) } + // 500 errors caused by context overflow — the API returns 500 instead of 400 + // when the request body (including conversation context) exceeds limits. + // This happens when auto-compact fails or the token estimation undercounts. + // Detect by checking for context-related keywords in 500 responses. + if ( + error instanceof APIError && + error.status >= 500 && + (error.message.toLowerCase().includes('too many tokens') || + error.message.toLowerCase().includes('request too large') || + error.message.toLowerCase().includes('context length') || + error.message.toLowerCase().includes('maximum context') || + error.message.toLowerCase().includes('input length') || + error.message.toLowerCase().includes('payload too large')) + ) { + const rewindInstruction = getIsNonInteractiveSession() + ? '' + : ' Press esc twice to go up a few messages, or run /compact to reduce context.' + return createAssistantAPIErrorMessage({ + content: `The conversation has grown too large for the API to process.${rewindInstruction} Alternatively, start a new session with /new.`, + error: 'invalid_request', + errorDetails: `Context overflow (500): ${error.message}`, + }) + } + // Connection errors (non-timeout) — use formatAPIError for detailed messages if (error instanceof APIConnectionError) { return createAssistantAPIErrorMessage({ diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index d78f6f6e..8beb115e 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -641,6 +641,7 @@ function repairPossiblyTruncatedObjectJson(raw: string): string | null { async function* openaiStreamToAnthropic( response: Response, model: string, + signal?: AbortSignal, ): AsyncGenerator { const messageId = makeMessageId() let contentBlockIndex = 0 @@ -688,6 +689,51 @@ async function* openaiStreamToAnthropic( const decoder = new TextDecoder() let buffer = '' + const STREAM_IDLE_TIMEOUT_MS = 120_000 // 2 minutes without data = connection likely dead + let lastDataTime = Date.now() + + /** + * Read from the stream with an idle timeout. If no data arrives within + * STREAM_IDLE_TIMEOUT_MS, assume the connection is dead and throw so + * withRetry can reconnect. This prevents indefinite hangs on stale + * SSE connections from OpenAI/Gemini during long-running sessions. + * Respects the caller's AbortSignal — clears the idle timer on abort + * so the rejection reason is AbortError, not a spurious idle timeout. + */ + async function readWithTimeout(): Promise> { + return new Promise((resolve, reject) => { + const timeoutId = setTimeout(() => { + const elapsed = Math.round((Date.now() - lastDataTime) / 1000) + reject(new Error( + `OpenAI/Gemini SSE stream idle for ${elapsed}s (limit: ${STREAM_IDLE_TIMEOUT_MS / 1000}s). Connection likely dropped.`, + )) + }, STREAM_IDLE_TIMEOUT_MS) + + // If the caller aborts, clear the timer so the AbortError surfaces + // cleanly instead of being masked by a spurious idle timeout. + let abortCleanup: (() => void) | undefined + if (signal) { + abortCleanup = () => { + clearTimeout(timeoutId) + } + signal.addEventListener('abort', abortCleanup, { once: true }) + } + + reader.read().then( + result => { + clearTimeout(timeoutId) + if (signal && abortCleanup) signal.removeEventListener('abort', abortCleanup) + if (result.value) lastDataTime = Date.now() + resolve(result) + }, + err => { + clearTimeout(timeoutId) + if (signal && abortCleanup) signal.removeEventListener('abort', abortCleanup) + reject(err) + }, + ) + }) + } const closeActiveContentBlock = async function* () { if (!hasEmittedContentStart) return @@ -715,7 +761,7 @@ async function* openaiStreamToAnthropic( try { while (true) { - const { done, value } = await reader.read() + const { done, value } = await readWithTimeout() if (done) break buffer += decoder.decode(value, { stream: true }) @@ -1075,13 +1121,13 @@ class OpenAIShimMessages { const isResponsesStream = response.url?.includes('/responses') return new OpenAIShimStream( (request.transport === 'codex_responses' || isResponsesStream) - ? codexStreamToAnthropic(response, request.resolvedModel) - : openaiStreamToAnthropic(response, request.resolvedModel), + ? codexStreamToAnthropic(response, request.resolvedModel, options?.signal) + : openaiStreamToAnthropic(response, request.resolvedModel, options?.signal), ) } if (request.transport === 'codex_responses') { - const data = await collectCodexCompletedResponse(response) + const data = await collectCodexCompletedResponse(response, options?.signal) return convertCodexResponseToAnthropicMessage( data, request.resolvedModel, @@ -1271,8 +1317,9 @@ class OpenAIShimMessages { delete body.max_completion_tokens } - // mistral also doesn't recognize body.store - if (isMistral) { + // mistral and gemini don't recognize body.store — Gemini returns 400 + // "Invalid JSON payload received. Unknown name 'store': Cannot find field." + if (isMistral || isGeminiMode()) { delete body.store } diff --git a/src/services/mcp/client.ts b/src/services/mcp/client.ts index 8857b56c..0f2bbf15 100644 --- a/src/services/mcp/client.ts +++ b/src/services/mcp/client.ts @@ -206,9 +206,12 @@ export function isMcpSessionExpiredError(error: Error): boolean { } /** - * Default timeout for MCP tool calls (effectively infinite - ~27.8 hours). + * Default timeout for MCP tool calls (5 minutes — reasonable for most tools). + * Use MCP_TOOL_TIMEOUT env var to override per-server. + * The previous default of ~27.8 hours effectively meant no timeout, causing + * tools to hang indefinitely on unresponsive servers. */ -const DEFAULT_MCP_TOOL_TIMEOUT_MS = 100_000_000 +const DEFAULT_MCP_TOOL_TIMEOUT_MS = 300_000 /** * Cap on MCP tool descriptions and server instructions sent to the model. @@ -1764,10 +1767,32 @@ export const fetchToolsForClient = memoizeWithLRU( return [] } - const result = (await client.client.request( - { method: 'tools/list' }, - ListToolsResultSchema, - )) as ListToolsResult + // Retry tool list fetch up to 2 times on transient failures. + // Without retry, a single timeout during tools/list makes all MCP tools + // silently disappear from the model's context until the next reconnect. + let result: ListToolsResult | undefined + let lastError: unknown + for (let attempt = 0; attempt < 3; attempt++) { + try { + result = (await client.client.request( + { method: 'tools/list' }, + ListToolsResultSchema, + )) as ListToolsResult + break + } catch (err) { + lastError = err + if (attempt < 2) { + logMCPDebug( + client.name, + `tools/list failed (attempt ${attempt + 1}/3): ${errorMessage(err)}. Retrying...`, + ) + await sleep(1000 * (attempt + 1)) + } + } + } + if (!result) { + throw lastError ?? new Error('tools/list failed after 3 attempts') + } // Sanitize tool data from MCP server const toolsToProcess = recursivelySanitizeUnicode(result.tools) @@ -2864,6 +2889,11 @@ export async function callMCPToolWithUrlElicitationRetry({ }): Promise { const MAX_URL_ELICITATION_RETRIES = 3 for (let attempt = 0; ; attempt++) { + // Check abort signal before each attempt — without this, a cancelled + // elicitation retry loop continues spinning until MAX retries + if (signal.aborted) { + throw new Error('Tool call aborted during URL elicitation') + } try { return await callToolFn({ client: connectedClient, @@ -3156,9 +3186,12 @@ async function callMCPTool({ errorDetails = String(result.error) } logMCPError(name, errorDetails) + // Include server and tool name in telemetry for debugging, but keep + // the human-readable message unchanged to avoid breaking error consumers + // that parse the message string. throw new McpToolCallError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS( errorDetails, - 'MCP tool returned error', + `MCP tool [${name}] ${tool}: ${errorDetails}`, '_meta' in result && result._meta ? { _meta: result._meta } : undefined, ) } diff --git a/src/tools/AgentTool/AgentTool.tsx b/src/tools/AgentTool/AgentTool.tsx index 16ef0c5f..9a4b2480 100644 --- a/src/tools/AgentTool/AgentTool.tsx +++ b/src/tools/AgentTool/AgentTool.tsx @@ -1042,10 +1042,12 @@ export const AgentTool = buildTool({ }); } finally { stopBackgroundedSummarization?.(); - clearInvokedSkillsForAgent(syncAgentId); - clearDumpState(syncAgentId); - // Note: worktree cleanup is done before enqueueAgentNotification - // in both try and catch paths so we can include worktree info + // Defensive cleanup: wrap each call so one failure doesn't + // prevent the other from running. Without this, if + // clearInvokedSkillsForAgent throws, clearDumpState is + // skipped and dump state leaks. + try { clearInvokedSkillsForAgent(syncAgentId); } catch { /* cleanup best-effort */ } + try { clearDumpState(syncAgentId); } catch { /* cleanup best-effort */ } } }); diff --git a/src/tools/SendMessageTool/SendMessageTool.ts b/src/tools/SendMessageTool/SendMessageTool.ts index 289f6a18..a0c4b9d0 100644 --- a/src/tools/SendMessageTool/SendMessageTool.ts +++ b/src/tools/SendMessageTool/SendMessageTool.ts @@ -819,7 +819,25 @@ export const SendMessageTool: Tool = }, } } - // task exists but stopped — auto-resume + // task exists but stopped — auto-resume. + // Guard against race: two concurrent SendMessage calls to the same + // stopped agent could both trigger resumeAgentBackground(), causing + // duplicate task registration. Check status again after acquiring + // the task reference (the first resume changes status to 'running'). + const freshTask = context.getAppState().tasks[agentId] + if (isLocalAgentTask(freshTask) && freshTask.status === 'running') { + queuePendingMessage( + agentId, + input.message, + context.setAppStateForTasks ?? context.setAppState, + ) + return { + data: { + success: true, + message: `Message queued for delivery to ${input.to} at its next tool round (was concurrently resumed).`, + }, + } + } try { const result = await resumeAgentBackground({ agentId, diff --git a/src/tools/WebSearchTool/WebSearchTool.ts b/src/tools/WebSearchTool/WebSearchTool.ts index 9cb49748..bdec1d13 100644 --- a/src/tools/WebSearchTool/WebSearchTool.ts +++ b/src/tools/WebSearchTool/WebSearchTool.ts @@ -125,7 +125,7 @@ function makeToolSchema(input: Input): BetaWebSearchTool20250305 { name: 'web_search', allowed_domains: input.allowed_domains, blocked_domains: input.blocked_domains, - max_uses: 8, // Hardcoded to 8 searches maximum + max_uses: 15, // Allow up to 15 searches per query for better coverage } } diff --git a/src/tools/WebSearchTool/providers/bing.ts b/src/tools/WebSearchTool/providers/bing.ts index 72a0f92b..b252096b 100644 --- a/src/tools/WebSearchTool/providers/bing.ts +++ b/src/tools/WebSearchTool/providers/bing.ts @@ -19,7 +19,7 @@ export const bingProvider: SearchProvider = { const url = new URL('https://api.bing.microsoft.com/v7.0/search') url.searchParams.set('q', input.query) - url.searchParams.set('count', '10') + url.searchParams.set('count', '15') const res = await fetch(url.toString(), { headers: { 'Ocp-Apim-Subscription-Key': process.env.BING_API_KEY! }, diff --git a/src/tools/WebSearchTool/providers/exa.ts b/src/tools/WebSearchTool/providers/exa.ts index 879bf14a..e242a429 100644 --- a/src/tools/WebSearchTool/providers/exa.ts +++ b/src/tools/WebSearchTool/providers/exa.ts @@ -19,7 +19,7 @@ export const exaProvider: SearchProvider = { const body: Record = { query: input.query, - numResults: 10, + numResults: 15, type: 'auto', } diff --git a/src/tools/WebSearchTool/providers/firecrawl.ts b/src/tools/WebSearchTool/providers/firecrawl.ts index 235aefaf..14a38334 100644 --- a/src/tools/WebSearchTool/providers/firecrawl.ts +++ b/src/tools/WebSearchTool/providers/firecrawl.ts @@ -21,7 +21,7 @@ export const firecrawlProvider: SearchProvider = { query = `${query} ${exclusions}` } - const data = await app.search(query, { limit: 10 }) + const data = await app.search(query, { limit: 15 }) const hits = applyDomainFilters( (data.web ?? []).map((r: { url: string; title?: string; description?: string }) => ({ diff --git a/src/tools/WebSearchTool/providers/jina.ts b/src/tools/WebSearchTool/providers/jina.ts index a9fb3e03..5867aab6 100644 --- a/src/tools/WebSearchTool/providers/jina.ts +++ b/src/tools/WebSearchTool/providers/jina.ts @@ -19,6 +19,7 @@ export const jinaProvider: SearchProvider = { const url = new URL('https://s.jina.ai/') url.searchParams.set('q', input.query) + url.searchParams.set('count', '10') const res = await fetch(url.toString(), { headers: { diff --git a/src/tools/WebSearchTool/providers/linkup.ts b/src/tools/WebSearchTool/providers/linkup.ts index 240f00c3..9be34563 100644 --- a/src/tools/WebSearchTool/providers/linkup.ts +++ b/src/tools/WebSearchTool/providers/linkup.ts @@ -26,6 +26,7 @@ export const linkupProvider: SearchProvider = { body: JSON.stringify({ q: input.query, search_type: 'standard', + depth: 'standard', }), signal, }) diff --git a/src/tools/WebSearchTool/providers/mojeek.ts b/src/tools/WebSearchTool/providers/mojeek.ts index 01376539..73cdee81 100644 --- a/src/tools/WebSearchTool/providers/mojeek.ts +++ b/src/tools/WebSearchTool/providers/mojeek.ts @@ -20,6 +20,7 @@ export const mojeekProvider: SearchProvider = { const url = new URL('https://www.mojeek.com/search') url.searchParams.set('q', input.query) url.searchParams.set('fmt', 'json') + url.searchParams.set('t', '10') const headers: Record = { 'Accept': 'application/json', diff --git a/src/tools/WebSearchTool/providers/tavily.ts b/src/tools/WebSearchTool/providers/tavily.ts index 8d9ccfa2..b7b0db1b 100644 --- a/src/tools/WebSearchTool/providers/tavily.ts +++ b/src/tools/WebSearchTool/providers/tavily.ts @@ -25,7 +25,7 @@ export const tavilyProvider: SearchProvider = { }, body: JSON.stringify({ query: input.query, - max_results: 10, + max_results: 15, include_answer: false, }), signal, diff --git a/src/tools/WebSearchTool/providers/you.ts b/src/tools/WebSearchTool/providers/you.ts index 5bec709e..ad6ae737 100644 --- a/src/tools/WebSearchTool/providers/you.ts +++ b/src/tools/WebSearchTool/providers/you.ts @@ -19,6 +19,7 @@ export const youProvider: SearchProvider = { const url = new URL('https://api.ydc-index.io/v1/search') url.searchParams.set('query', input.query) + url.searchParams.set('num_web_results', '10') const res = await fetch(url.toString(), { headers: { 'X-API-Key': process.env.YOU_API_KEY! },