fix: resolve 12 bugs across API, MCP, agent tools, web search, and context overflow (#674)

* fix: resolve 12 bugs across API, MCP, agent tools, web search, and context overflow API fixes: - Fix Gemini 400 error: delete 'store: false' field for Gemini endpoints (was globally injected, Gemini rejects unknown fields) - Fix session timeout 500 errors after ~25min: add 120s idle timeout on SSE stream readers in openaiShim and codexShim to detect dead connections and trigger withRetry reconnection - Fix context overflow 500 errors: add handler in errors.ts for 500 responses caused by oversized conversation context (too many tokens), surfacing user-friendly message with recovery actions instead of raw 'API Error: 500' Agent loop fix: - Fix premature task completion: detect continuation signals like 'so now I have to do it' in assistant text without tool calls and inject a meta nudge to force the agent to continue Web search improvements: - Increase result counts: Bing/Tavily/Exa/Firecrawl from 10→15, Mojeek/You/Jina from default→10 (explicit), max_uses 8→15 MCP fixes: - Reduce default tool timeout from ~27.8 hours to 5 minutes (tools no longer hang indefinitely on unresponsive servers) - Add retry logic (3 attempts) for tools/list fetch failures (prevents all MCP tools from silently disappearing on timeout) - Add abort signal check in URL elicitation retry loop - Improve MCP error messages with server and tool name context Agent tool fixes: - Fix SendMessage race condition: double-check task status before auto-resuming stopped agents to prevent duplicate registration - Fix auto-compact circuit breaker gap: when auto-compact fails 3+ consecutive times, proactively block oversized context BEFORE the API call instead of letting it 500. Clear message with recovery instructions (/new, /compact, rewind). Tests: 850 total, 0 failures (25 new bugfix tests) * fix: address all 4 review blockers + 6 additional issues from PR #674 Blockers (from Vasanthdev2004 review): 1. Continuation nudge infinite loop — no loop guard Added continuationNudgeCount to State, capped at MAX_CONTINUATION_NUDGES (3). Counter increments on each nudge, resets on tool execution (next_turn). 2. Continuation signal regexes too broad — high false-positive rate Tightened all patterns to require explicit action verbs. Added completion marker check (done/finished/completed/summary). Broad patterns only fire on messages <80 chars. 3. BUGFIXES.md in repo root — scope contamination Removed. PR description already contains this info. 4. AgentTool dump state cleanup is comment-only, not a bug fix Wrapped clearInvokedSkillsForAgent and clearDumpState in individual try/catch blocks so one failure doesn't prevent the other. Additional issues: 5+6. readWithTimeout ignores AbortSignal, timer leak on abort Added optional signal param to openaiStreamToAnthropic, codexStreamToAnthropic, collectCodexCompletedResponse, readSseEvents. Added abort listener that clears idle timer so AbortError surfaces cleanly instead of spurious idle timeout. 7. MCP error format change breaks consumers Reverted human-readable message to original errorDetails format. Moved server/tool context to telemetryMessage param only. 10. AgentTool test broken by comment change Updated test assertions to match new defensive cleanup text + try/catch. 12. Mojeek test regex dangerously broad Tightened to match searchParams.set('t', '10') specifically. 14. linkup.ts in providerCounts test — no result count field Removed from providers list (uses depth param, not result count). 15. Error message overlap between errors.ts and query.ts Prefixed errorDetails with 'Context overflow (500):' to distinguish. Tests: 851 pass, 0 fail --------- Co-authored-by: openclaude-bot <bot@openclaude.ai> Co-authored-by: Fix Bot <fix@openclaude.dev>
2026-04-14 16:29:53 +05:30
parent 1741f32cb7
commit 25ce2ca7bf
18 changed files with 647 additions and 27 deletions
--- a/src/tests/bugfixes.test.ts
+++ b/src/tests/bugfixes.test.ts
@@ -0,0 +1,282 @@
+/**
+ * Tests for Bug Fixes applied to openclaude.
+ *
+ * Covers:
+ * 1. Gemini `store: false` rejection fix
+ * 2. Session timeout / 500 error fix (stream idle timeout)
+ * 3. Agent loop continuation nudge
+ * 4. Web search result count improvements
+ */
+
+import { describe, test, expect } from 'bun:test'
+import { resolve } from 'path'
+
+const SRC = resolve(import.meta.dir, '..')
+const file = (relative: string) => Bun.file(resolve(SRC, relative))
+
+// ---------------------------------------------------------------------------
+// Fix 1: Gemini `store: false` rejection
+// ---------------------------------------------------------------------------
+describe('Gemini store field fix', () => {
+  test('isGeminiMode is imported and used in openaiShim', async () => {
+    const content = await file('services/api/openaiShim.ts').text()
+
+    // Verify the fix: store deletion should check for Gemini mode
+    expect(content).toContain('isGeminiMode()')
+    expect(content).toContain("mistral and gemini don't recognize body.store")
+    // Ensure the delete body.store is guarded for both Mistral and Gemini
+    expect(content).toMatch(/isMistral\s*\|\|\s*isGeminiMode\(\)/)
+  })
+
+  test('store: false is still set by default (OpenAI needs it)', async () => {
+    const content = await file('services/api/openaiShim.ts').text()
+
+    // The body should still have store: false by default
+    expect(content).toMatch(/store:\s*false/)
+    // But it should be deleted for non-OpenAI providers
+    expect(content).toMatch(/delete body\.store/)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Fix 2: Session timeout — stream idle timeout
+// ---------------------------------------------------------------------------
+describe('Session timeout fix', () => {
+  test('openaiShim has idle timeout for SSE streams', async () => {
+    const content = await file('services/api/openaiShim.ts').text()
+
+    expect(content).toContain('STREAM_IDLE_TIMEOUT_MS')
+    expect(content).toContain('readWithTimeout')
+    expect(content).toMatch(/readWithTimeout\(\)/)
+  })
+
+  test('codexShim has idle timeout for SSE streams', async () => {
+    const content = await file('services/api/codexShim.ts').text()
+
+    expect(content).toContain('STREAM_IDLE_TIMEOUT_MS')
+    expect(content).toContain('readWithTimeout')
+    expect(content).toMatch(/readWithTimeout\(\)/)
+  })
+
+  test('idle timeout is set to a reasonable value (>= 60s)', async () => {
+    const content = await file('services/api/openaiShim.ts').text()
+
+    // Extract the timeout value (supports numeric separators like 120_000)
+    const match = content.match(/STREAM_IDLE_TIMEOUT_MS\s*=\s*([\d_]+)/)
+    expect(match).not.toBeNull()
+    const timeoutMs = parseInt(match![1].replace(/_/g, ''), 10)
+    expect(timeoutMs).toBeGreaterThanOrEqual(60_000)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Fix 3: Agent loop continuation nudge
+// ---------------------------------------------------------------------------
+describe('Agent loop continuation nudge', () => {
+  test('query.ts has continuation signal detection', async () => {
+    const content = await file('query.ts').text()
+
+    expect(content).toContain('continuationSignals')
+    expect(content).toContain('Continuation nudge triggered')
+    expect(content).toContain('continuation_nudge')
+  })
+
+  test('continuation signals include tightened patterns', async () => {
+    const content = await file('query.ts').text()
+
+    // Should detect tightened patterns requiring explicit action verbs
+    expect(content).toMatch(/so now \(i\|let me\|we\)/)
+    expect(content).toContain('completionMarkers')
+    expect(content).toContain('MAX_CONTINUATION_NUDGES')
+    // Verify the nudge counter guard exists
+    expect(content).toMatch(/continuationNudgeCount\s*<\s*MAX_CONTINUATION_NUDGES/)
+  })
+
+  test('nudge creates a meta user message to continue', async () => {
+    const content = await file('query.ts').text()
+
+    expect(content).toContain(
+      'Continue with the task. Use the appropriate tools to proceed.',
+    )
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Fix 4: Web search result count improvements
+// ---------------------------------------------------------------------------
+describe('Web search result count improvements', () => {
+  test('Bing provider requests at least 15 results', async () => {
+    const content = await file(
+      'tools/WebSearchTool/providers/bing.ts',
+    ).text()
+
+    expect(content).toMatch(/count.*['"]15['"]/)
+  })
+
+  test('Tavily provider requests at least 15 results', async () => {
+    const content = await file(
+      'tools/WebSearchTool/providers/tavily.ts',
+    ).text()
+
+    expect(content).toMatch(/max_results:\s*15/)
+  })
+
+  test('Exa provider requests at least 15 results', async () => {
+    const content = await file(
+      'tools/WebSearchTool/providers/exa.ts',
+    ).text()
+
+    expect(content).toMatch(/numResults:\s*15/)
+  })
+
+  test('Firecrawl provider requests at least 15 results', async () => {
+    const content = await file(
+      'tools/WebSearchTool/providers/firecrawl.ts',
+    ).text()
+
+    expect(content).toMatch(/limit:\s*15/)
+  })
+
+  test('Mojeek provider requests at least 10 results', async () => {
+    const content = await file(
+      'tools/WebSearchTool/providers/mojeek.ts',
+    ).text()
+
+    // Mojeek uses 't' param for result count — verify it's set to 10
+    expect(content).toMatch(/searchParams\.set\('t',\s*'10'\)/)
+  })
+
+  test('You.com provider requests at least 10 results', async () => {
+    const content = await file(
+      'tools/WebSearchTool/providers/you.ts',
+    ).text()
+
+    expect(content).toMatch(/num_web_results.*['"]10['"]/)
+  })
+
+  test('Jina provider requests at least 10 results', async () => {
+    const content = await file(
+      'tools/WebSearchTool/providers/jina.ts',
+    ).text()
+
+    expect(content).toMatch(/count.*['"]10['"]/)
+  })
+
+  test('Native Anthropic web search max_uses increased to 15', async () => {
+    const content = await file(
+      'tools/WebSearchTool/WebSearchTool.ts',
+    ).text()
+
+    expect(content).toMatch(/max_uses:\s*15/)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Fix 5: MCP tool timeout fix
+// ---------------------------------------------------------------------------
+describe('MCP tool timeout fix', () => {
+  test('default MCP tool timeout is reasonable (not 27 hours)', async () => {
+    const content = await file('services/mcp/client.ts').text()
+
+    // Should NOT have the old ~27.8 hour default
+    expect(content).not.toContain('100_000_000')
+    // Should have a reasonable timeout (5 minutes = 300_000ms)
+    expect(content).toMatch(/DEFAULT_MCP_TOOL_TIMEOUT_MS\s*=\s*300_000/)
+  })
+
+  test('MCP tools/list has retry logic', async () => {
+    const content = await file('services/mcp/client.ts').text()
+
+    expect(content).toContain('tools/list failed (attempt')
+    expect(content).toContain('Retrying...')
+  })
+
+  test('MCP URL elicitation checks abort signal', async () => {
+    const content = await file('services/mcp/client.ts').text()
+
+    expect(content).toContain('signal.aborted')
+    expect(content).toContain('Tool call aborted during URL elicitation')
+  })
+
+  test('MCP tool error messages include server and tool name in telemetry', async () => {
+    const content = await file('services/mcp/client.ts').text()
+
+    // Telemetry message should include context like "MCP tool [serverName] toolName: error"
+    // The human-readable message stays unchanged to avoid breaking error consumers
+    expect(content).toContain('MCP tool [${name}] ${tool}:')
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Cross-cutting: verify no regressions
+// ---------------------------------------------------------------------------
+describe('Regression checks', () => {
+  test('store field is still set for OpenAI (not deleted unconditionally)', async () => {
+    const content = await file('services/api/openaiShim.ts').text()
+
+    // store: false should exist in body construction
+    expect(content).toMatch(/store:\s*false/)
+    // But delete body.store should be conditional (guarded by if)
+    const deleteLines = content.split('\n').filter(l => l.includes('delete body.store'))
+    expect(deleteLines.length).toBeGreaterThan(0)
+    // Verify the delete is inside a conditional block by checking surrounding context
+    for (const line of deleteLines) {
+      const trimmed = line.trim()
+      // Should be either inside an if block (indented delete) or a comment
+      expect(
+        trimmed.startsWith('delete') && !trimmed.includes('// unconditional'),
+      ).toBe(true)
+    }
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Fix 6: SendMessageTool race condition guard
+// ---------------------------------------------------------------------------
+describe('SendMessageTool race condition fix', () => {
+  test('SendMessageTool has double-check for concurrent resume', async () => {
+    const content = await file('tools/SendMessageTool/SendMessageTool.ts').text()
+
+    // Should have a second status check before resuming to prevent race
+    expect(content).toContain('was concurrently resumed')
+    // The freshTask check should re-read from getAppState
+    expect(content).toMatch(/const freshTask = context\.getAppState\(\)\.tasks\[agentId\]/)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Fix 7: AgentTool dump state cleanup
+// ---------------------------------------------------------------------------
+describe('AgentTool cleanup fix', () => {
+  test('backgrounded agent always cleans up dump state', async () => {
+    const content = await file('tools/AgentTool/AgentTool.tsx').text()
+
+    // The backgrounded agent's finally block should clean up regardless
+    // of whether the agent crashed or completed normally
+    expect(content).toContain('Defensive cleanup: wrap each call so one failure')
+    // Verify cleanup is wrapped in try/catch for defensive execution
+    expect(content).toMatch(/try\s*\{\s*clearInvokedSkillsForAgent/)
+    expect(content).toMatch(/try\s*\{\s*clearDumpState/)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Fix 8: Context overflow 500 error handling
+// ---------------------------------------------------------------------------
+describe('Context overflow 500 fix', () => {
+  test('errors.ts has handler for context overflow 500 errors', async () => {
+    const content = await file('services/api/errors.ts').text()
+
+    expect(content).toContain('500 errors caused by context overflow')
+    expect(content).toContain('too many tokens')
+    expect(content).toContain('The conversation has grown too large')
+  })
+
+  test('query.ts has circuit breaker safety net for oversized context', async () => {
+    const content = await file('query.ts').text()
+
+    expect(content).toContain('Safety net: when auto-compact')
+    expect(content).toContain('circuit breaker has tripped')
+    expect(content).toContain('automatic compaction has failed')
+  })
+})
--- a/src/tests/providerCounts.test.ts
+++ b/src/tests/providerCounts.test.ts
@@ -0,0 +1,55 @@
+/**
+ * Tests for Web Search Provider result count configurations.
+ */
+
+import { describe, test, expect } from 'bun:test'
+import { resolve } from 'path'
+
+const SRC = resolve(import.meta.dir, '..', 'tools', 'WebSearchTool', 'providers')
+const file = (name: string) => Bun.file(resolve(SRC, name))
+
+describe('Provider result counts', () => {
+  const providers = [
+    'bing.ts',
+    'tavily.ts',
+    'exa.ts',
+    'firecrawl.ts',
+    'mojeek.ts',
+    'you.ts',
+    'jina.ts',
+    'duckduckgo.ts',
+    // linkup.ts excluded — uses depth param, not a result count field
+  ]
+
+  for (const name of providers) {
+    test(`${name} exists and is readable`, async () => {
+      const f = file(name)
+      expect(await f.exists()).toBe(true)
+      const content = await f.text()
+      expect(content.length).toBeGreaterThan(100)
+    })
+  }
+
+  test('No provider hardcodes a limit below 10', async () => {
+    const suspiciousPatterns = [
+      /count['":\s]*['"]([1-9])['"]/i,
+      /limit['":\s]*([1-9])\b/,
+      /max_results['":\s]*([1-9])\b/,
+      /numResults['":\s]*([1-9])\b/,
+    ]
+
+    for (const name of providers) {
+      const content = await file(name).text()
+      for (const pattern of suspiciousPatterns) {
+        const match = content.match(pattern)
+        if (match) {
+          const num = parseInt(match[1], 10)
+          expect(num).toBeGreaterThanOrEqual(
+            10,
+            `${name} has suspiciously low result count: ${match[0]}`,
+          )
+        }
+      }
+    }
+  })
+})