diff --git a/src/__tests__/bugfixes.test.ts b/src/__tests__/bugfixes.test.ts index c028bdd9..edaa2715 100644 --- a/src/__tests__/bugfixes.test.ts +++ b/src/__tests__/bugfixes.test.ts @@ -169,6 +169,14 @@ describe('Web search result count improvements', () => { expect(content).toMatch(/max_uses:\s*15/) }) + + test('codex web search path guarantees a non-empty result body', async () => { + const content = await file( + 'tools/WebSearchTool/WebSearchTool.ts', + ).text() + + expect(content).toContain("results.push('No results found.')") + }) }) // --------------------------------------------------------------------------- diff --git a/src/services/api/codexShim.test.ts b/src/services/api/codexShim.test.ts index 40fcf79a..7589311a 100644 --- a/src/services/api/codexShim.test.ts +++ b/src/services/api/codexShim.test.ts @@ -8,6 +8,7 @@ import { convertCodexResponseToAnthropicMessage, convertToolsToResponsesTools, } from './codexShim.js' +import { __test as webSearchToolTest } from '../../tools/WebSearchTool/WebSearchTool.js' const tempDirs: string[] = [] const originalEnv = { @@ -609,6 +610,164 @@ describe('Codex request translation', () => { ]) }) + test('recovers Codex web search text and sources from sparse completed response', () => { + const output = webSearchToolTest.makeOutputFromCodexWebSearchResponse( + { + output: [ + { + type: 'web_search_call', + sources: [ + { + title: 'OpenClaude repo', + url: 'https://github.com/example/openclaude', + }, + ], + }, + { + type: 'message', + role: 'assistant', + content: [ + { + type: 'text', + text: 'OpenClaude is available on GitHub.', + sources: [ + { + title: 'Docs', + url: 'https://docs.example.com/openclaude', + }, + ], + }, + ], + }, + ], + }, + 'OpenClaude GitHub 2026', + 0.42, + ) + + expect(output.results).toEqual([ + 'OpenClaude is available on GitHub.', + { + tool_use_id: 'codex-web-search', + content: [ + { + title: 'OpenClaude repo', + url: 'https://github.com/example/openclaude', + }, + { + title: 'Docs', + url: 'https://docs.example.com/openclaude', + }, + ], + }, + ]) + }) + + test('falls back to a non-empty Codex web search result message', () => { + const output = webSearchToolTest.makeOutputFromCodexWebSearchResponse( + { output: [] }, + 'OpenClaude GitHub 2026', + 0.11, + ) + + expect(output.results).toEqual(['No results found.']) + }) + + test('surfaces Codex web search failure reason with a message', () => { + const output = webSearchToolTest.makeOutputFromCodexWebSearchResponse( + { + output: [ + { + type: 'web_search_call', + status: 'failed', + error: { message: 'upstream search provider rate-limited' }, + }, + ], + }, + 'OpenClaude GitHub 2026', + 0.05, + ) + + expect(output.results).toEqual([ + 'Web search failed: upstream search provider rate-limited', + ]) + }) + + test('surfaces Codex web search failure reason nested under action.error', () => { + const output = webSearchToolTest.makeOutputFromCodexWebSearchResponse( + { + output: [ + { + type: 'web_search_call', + status: 'failed', + action: { error: { message: 'query blocked' } }, + }, + ], + }, + 'OpenClaude GitHub 2026', + 0.05, + ) + + expect(output.results).toEqual(['Web search failed: query blocked']) + }) + + test('handles Codex web search failure with no reason attached', () => { + const output = webSearchToolTest.makeOutputFromCodexWebSearchResponse( + { + output: [ + { + type: 'web_search_call', + status: 'failed', + }, + ], + }, + 'OpenClaude GitHub 2026', + 0.05, + ) + + expect(output.results).toEqual(['Web search failed.']) + }) + + test('a failure item does not suppress sources from a later message item', () => { + const output = webSearchToolTest.makeOutputFromCodexWebSearchResponse( + { + output: [ + { + type: 'web_search_call', + status: 'failed', + error: { message: 'partial outage' }, + }, + { + type: 'message', + role: 'assistant', + content: [ + { + type: 'output_text', + text: 'Partial results below.', + sources: [ + { title: 'Docs', url: 'https://docs.example.com/openclaude' }, + ], + }, + ], + }, + ], + }, + 'OpenClaude GitHub 2026', + 0.05, + ) + + expect(output.results).toEqual([ + 'Web search failed: partial outage', + 'Partial results below.', + { + tool_use_id: 'codex-web-search', + content: [ + { title: 'Docs', url: 'https://docs.example.com/openclaude' }, + ], + }, + ]) + }) + test('translates Codex SSE text stream into Anthropic events', async () => { const responseText = [ 'event: response.output_item.added', diff --git a/src/tools/WebFetchTool/applyPromptFallback.test.ts b/src/tools/WebFetchTool/applyPromptFallback.test.ts new file mode 100644 index 00000000..f9914a74 --- /dev/null +++ b/src/tools/WebFetchTool/applyPromptFallback.test.ts @@ -0,0 +1,87 @@ +import { afterEach, beforeEach, expect, mock, test } from 'bun:test' + +// Mock the Anthropic-API-side before importing the module under test, so +// queryHaiku resolves into whatever the individual test wants (slow, failing, +// or successful). We preserve every other export from claude.js so unrelated +// transitive imports still work. +const haikuMock = mock() + +beforeEach(async () => { + haikuMock.mockReset() + const actual = await import('../../services/api/claude.js') + mock.module('../../services/api/claude.js', () => ({ + ...actual, + queryHaiku: haikuMock, + })) +}) + +afterEach(() => { + mock.restore() +}) + +async function runApply(markdown = 'Hello world.', signal?: AbortSignal): Promise { + const nonce = `${Date.now()}-${Math.random()}` + const { applyPromptToMarkdown } = + await import(`./utils.js?ts=${nonce}`) + const ctrl = new AbortController() + return applyPromptToMarkdown( + 'summarize', + markdown, + signal ?? ctrl.signal, + false, + false, + ) +} + +test('returns raw truncated markdown when queryHaiku throws', async () => { + haikuMock.mockImplementation(async () => { + throw new Error('MiniMax rejected the model name') + }) + + const output = await runApply('Gitlawb homepage content.') + expect(output).toContain('[Secondary-model summarization unavailable') + expect(output).toContain('Gitlawb homepage content.') +}) + +test('returns raw truncated markdown when queryHaiku simulates a timeout', async () => { + // Simulating raceWithTimeout's rejection path directly — we can't actually + // wait 45s in a test. The error shape matches what raceWithTimeout produces. + haikuMock.mockImplementation(async () => { + const err = new Error('Secondary-model summarization timed out after 45000ms') + ;(err as NodeJS.ErrnoException).code = 'SECONDARY_MODEL_TIMEOUT' + throw err + }) + + const output = await runApply('Slow provider content.') + expect(output).toContain('[Secondary-model summarization unavailable') + expect(output).toContain('Slow provider content.') +}) + +test('returns the model response when queryHaiku succeeds', async () => { + haikuMock.mockImplementation(async () => ({ + message: { + content: [{ type: 'text', text: 'This page is about GitLawb, an AI legal platform.' }], + }, + })) + + const output = await runApply('some page content') + expect(output).toBe('This page is about GitLawb, an AI legal platform.') +}) + +test('returns fallback when queryHaiku resolves with empty content', async () => { + haikuMock.mockImplementation(async () => ({ message: { content: [] } })) + + const output = await runApply('some page content') + expect(output).toContain('[Secondary-model summarization unavailable') + expect(output).toContain('some page content') +}) + +test('propagates AbortError from the caller signal', async () => { + const ctrl = new AbortController() + haikuMock.mockImplementation(async () => { + ctrl.abort() + return new Promise(() => {}) + }) + + await expect(runApply('content', ctrl.signal)).rejects.toThrow() +}) diff --git a/src/tools/WebFetchTool/domainCheck.test.ts b/src/tools/WebFetchTool/domainCheck.test.ts index 15d3bc4c..243fedce 100644 --- a/src/tools/WebFetchTool/domainCheck.test.ts +++ b/src/tools/WebFetchTool/domainCheck.test.ts @@ -20,8 +20,11 @@ afterEach(() => { describe('checkDomainBlocklist', () => { test('returns allowed without API call in OpenAI mode', async () => { process.env.CLAUDE_CODE_USE_OPENAI = '1' + const actual = await import('../../utils/model/providers.js') mock.module('../../utils/model/providers.js', () => ({ + ...actual, getAPIProvider: () => 'openai', + isFirstPartyAnthropicBaseUrl: () => false, })) const getSpy = mock(() => Promise.resolve({ status: 200, data: { can_fetch: true } }), @@ -37,8 +40,11 @@ describe('checkDomainBlocklist', () => { test('returns allowed without API call in Gemini mode', async () => { process.env.CLAUDE_CODE_USE_GEMINI = '1' + const actual = await import('../../utils/model/providers.js') mock.module('../../utils/model/providers.js', () => ({ + ...actual, getAPIProvider: () => 'gemini', + isFirstPartyAnthropicBaseUrl: () => false, })) const getSpy = mock(() => Promise.resolve({ status: 200, data: { can_fetch: true } }), @@ -57,8 +63,11 @@ describe('checkDomainBlocklist', () => { delete process.env.CLAUDE_CODE_USE_GEMINI delete process.env.CLAUDE_CODE_USE_GITHUB + const actual = await import('../../utils/model/providers.js') mock.module('../../utils/model/providers.js', () => ({ + ...actual, getAPIProvider: () => 'firstParty', + isFirstPartyAnthropicBaseUrl: () => true, })) const getSpy = mock(() => Promise.resolve({ status: 200, data: { can_fetch: true } }), diff --git a/src/tools/WebFetchTool/utils.ts b/src/tools/WebFetchTool/utils.ts index eea52960..70b4b88e 100644 --- a/src/tools/WebFetchTool/utils.ts +++ b/src/tools/WebFetchTool/utils.ts @@ -275,20 +275,76 @@ export async function getWithPermittedRedirects( if (depth > MAX_REDIRECTS) { throw new Error(`Too many redirects (exceeded ${MAX_REDIRECTS})`) } + + const axiosConfig = { + signal, + timeout: FETCH_TIMEOUT_MS, + maxRedirects: 0, + responseType: 'arraybuffer' as const, + maxContentLength: MAX_HTTP_CONTENT_LENGTH, + lookup: ssrfGuardedLookup, + headers: { + Accept: 'text/markdown, text/html, */*', + 'User-Agent': getWebFetchUserAgent(), + }, + } + try { - return await axios.get(url, { - signal, - timeout: FETCH_TIMEOUT_MS, - maxRedirects: 0, - responseType: 'arraybuffer', - maxContentLength: MAX_HTTP_CONTENT_LENGTH, - lookup: ssrfGuardedLookup, - headers: { - Accept: 'text/markdown, text/html, */*', - 'User-Agent': getWebFetchUserAgent(), - }, - }) + return await axios.get(url, axiosConfig) } catch (error) { + // Try native fetch as a fallback for timeout / network errors + // (Bun/Node bundled contexts occasionally hang with axios + custom lookup.) + const isTimeoutLike = + axios.isAxiosError(error) && + (!error.response && + (error.code === 'ECONNABORTED' || + error.code === 'ETIMEDOUT' || + error.message?.toLowerCase().includes('timeout'))) + if (isTimeoutLike && !signal.aborted) { + try { + const fetchResponse = await fetch(url, { + signal, + redirect: 'manual', + headers: axiosConfig.headers, + }) + // Handle redirects manually + if ([301, 302, 307, 308].includes(fetchResponse.status)) { + const redirectLocation = fetchResponse.headers.get('location') + if (!redirectLocation) { + throw new Error('Redirect missing Location header') + } + const redirectUrl = new URL(redirectLocation, url).toString() + if (redirectChecker(url, redirectUrl)) { + return getWithPermittedRedirects( + redirectUrl, + signal, + redirectChecker, + depth + 1, + ) + } else { + return { + type: 'redirect' as const, + originalUrl: url, + redirectUrl, + statusCode: fetchResponse.status, + } + } + } + const arrayBuffer = await fetchResponse.arrayBuffer() + // Build an AxiosResponse-like shape so downstream code stays happy + return { + data: new Uint8Array(arrayBuffer), + status: fetchResponse.status, + statusText: fetchResponse.statusText, + headers: Object.fromEntries(fetchResponse.headers.entries()), + config: axiosConfig, + request: undefined, + } as unknown as AxiosResponse + } catch { + // Fall through to original error handling + } + } + if ( axios.isAxiosError(error) && error.response && @@ -489,6 +545,58 @@ export async function getURLMarkdownContent( return entry } +// Budget for the secondary-model summarization after fetch. If the small- +// fast model is slow (e.g. a 200k-context third-party running a reasoning +// pass over ~100KB of markdown), we'd rather fall back to raw truncated +// markdown than hang the tool. Also keeps the worst-case WebFetch bounded +// to FETCH_TIMEOUT_MS + SECONDARY_MODEL_TIMEOUT_MS regardless of provider. +const SECONDARY_MODEL_TIMEOUT_MS = 45_000 + +function raceWithTimeout( + promise: Promise, + timeoutMs: number, + signal: AbortSignal, +): Promise { + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + const err = new Error(`Secondary-model summarization timed out after ${timeoutMs}ms`) + ;(err as NodeJS.ErrnoException).code = 'SECONDARY_MODEL_TIMEOUT' + reject(err) + }, timeoutMs) + const onAbort = () => { + clearTimeout(timer) + reject(new AbortError()) + } + if (signal.aborted) { + clearTimeout(timer) + reject(new AbortError()) + return + } + signal.addEventListener('abort', onAbort, { once: true }) + promise.then( + value => { + clearTimeout(timer) + signal.removeEventListener('abort', onAbort) + resolve(value) + }, + err => { + clearTimeout(timer) + signal.removeEventListener('abort', onAbort) + reject(err) + }, + ) + }) +} + +function buildFallbackMarkdownSummary(truncatedContent: string): string { + return [ + '[Secondary-model summarization unavailable — returning raw fetched content.', + 'This typically means the configured small-fast model took too long or errored.]', + '', + truncatedContent, + ].join('\n') +} + export async function applyPromptToMarkdown( prompt: string, markdownContent: string, @@ -508,18 +616,35 @@ export async function applyPromptToMarkdown( prompt, isPreapprovedDomain, ) - const assistantMessage = await queryHaiku({ - systemPrompt: asSystemPrompt([]), - userPrompt: modelPrompt, - signal, - options: { - querySource: 'web_fetch_apply', - agents: [], - isNonInteractiveSession, - hasAppendSystemPrompt: false, - mcpTools: [], - }, - }) + let assistantMessage + try { + assistantMessage = await raceWithTimeout( + queryHaiku({ + systemPrompt: asSystemPrompt([]), + userPrompt: modelPrompt, + signal, + options: { + querySource: 'web_fetch_apply', + agents: [], + isNonInteractiveSession, + hasAppendSystemPrompt: false, + mcpTools: [], + }, + }), + SECONDARY_MODEL_TIMEOUT_MS, + signal, + ) + } catch (err) { + // User interrupts and SIGINTs still propagate. Everything else (timeout, + // provider-side error, unsupported model on third-party endpoint) falls + // back to raw markdown so the user still gets usable content rather than + // a hang. Log so it's visible in debug traces. + if (err instanceof AbortError || (err as Error)?.name === 'AbortError') { + throw err + } + logError(err) + return buildFallbackMarkdownSummary(truncatedContent) + } // We need to bubble this up, so that the tool call throws, causing us to return // an is_error tool_use block to the server, and render a red dot in the UI. @@ -534,5 +659,5 @@ export async function applyPromptToMarkdown( return contentBlock.text } } - return 'No response from model' + return buildFallbackMarkdownSummary(truncatedContent) } diff --git a/src/tools/WebSearchTool/WebSearchTool.ts b/src/tools/WebSearchTool/WebSearchTool.ts index 6a17510f..254af60d 100644 --- a/src/tools/WebSearchTool/WebSearchTool.ts +++ b/src/tools/WebSearchTool/WebSearchTool.ts @@ -203,6 +203,61 @@ function buildCodexWebSearchInstructions(): string { ].join(' ') } +function pushCodexTextResult( + results: (SearchResult | string)[], + value: unknown, +): void { + if (typeof value !== 'string') return + const trimmed = value.trim() + if (trimmed) { + results.push(trimmed) + } +} + +function addCodexSource( + sourceMap: Map, + source: unknown, +): void { + if (typeof source?.url !== 'string' || !source.url) return + sourceMap.set(source.url, { + title: + typeof source.title === 'string' && source.title + ? source.title + : source.url, + url: source.url, + }) +} + +function getCodexSources(item: Record): unknown[] { + if (Array.isArray(item.action?.sources)) { + return item.action.sources + } + if (Array.isArray(item.sources)) { + return item.sources + } + if (Array.isArray(item.result?.sources)) { + return item.result.sources + } + return [] +} + +function extractCodexWebSearchFailure(item: Record): string | undefined { + // Codex web_search_call items can carry a status field. When the tool + // call fails (rate limit, upstream error, model-side guardrail), the + // parser should surface a meaningful error rather than the generic + // "No results found." fallback. Shape observed across recent payloads: + // { type: 'web_search_call', status: 'failed', error: { message?: string } } + // { type: 'web_search_call', status: 'failed', action: { error?: { message?: string } } } + if (item?.status !== 'failed') return undefined + const reason = + (typeof item.error?.message === 'string' && item.error.message) || + (typeof item.action?.error?.message === 'string' && + item.action.error.message) || + (typeof item.error === 'string' && item.error) || + undefined + return reason ? `Web search failed: ${reason}` : 'Web search failed.' +} + function makeOutputFromCodexWebSearchResponse( response: Record, query: string, @@ -214,18 +269,12 @@ function makeOutputFromCodexWebSearchResponse( for (const item of output) { if (item?.type === 'web_search_call') { - const sources = Array.isArray(item.action?.sources) - ? item.action.sources - : [] - for (const source of sources) { - if (typeof source?.url !== 'string' || !source.url) continue - sourceMap.set(source.url, { - title: - typeof source.title === 'string' && source.title - ? source.title - : source.url, - url: source.url, - }) + const failure = extractCodexWebSearchFailure(item) + if (failure) { + results.push(failure) + } + for (const source of getCodexSources(item)) { + addCodexSource(sourceMap, source) } continue } @@ -235,11 +284,12 @@ function makeOutputFromCodexWebSearchResponse( } for (const part of item.content) { - if (part?.type === 'output_text' && typeof part.text === 'string') { - const trimmed = part.text.trim() - if (trimmed) { - results.push(trimmed) - } + if (part?.type === 'output_text' || part?.type === 'text') { + pushCodexTextResult(results, part.text) + } + + for (const source of getCodexSources(part)) { + addCodexSource(sourceMap, source) } const annotations = Array.isArray(part?.annotations) @@ -247,23 +297,13 @@ function makeOutputFromCodexWebSearchResponse( : [] for (const annotation of annotations) { if (annotation?.type !== 'url_citation') continue - if (typeof annotation.url !== 'string' || !annotation.url) continue - sourceMap.set(annotation.url, { - title: - typeof annotation.title === 'string' && annotation.title - ? annotation.title - : annotation.url, - url: annotation.url, - }) + addCodexSource(sourceMap, annotation) } } } - if (results.length === 0 && typeof response.output_text === 'string') { - const trimmed = response.output_text.trim() - if (trimmed) { - results.push(trimmed) - } + if (results.length === 0) { + pushCodexTextResult(results, response.output_text) } if (sourceMap.size > 0) { @@ -273,6 +313,10 @@ function makeOutputFromCodexWebSearchResponse( }) } + if (results.length === 0) { + results.push('No results found.') + } + return { query, results, @@ -280,6 +324,10 @@ function makeOutputFromCodexWebSearchResponse( } } +export const __test = { + makeOutputFromCodexWebSearchResponse, +} + async function runCodexWebSearch( input: Input, signal: AbortSignal, @@ -457,6 +505,19 @@ function shouldUseAdapterProvider(): boolean { return getAvailableProviders().length > 0 } +/** + * Returns true when the current provider has a working native or Codex + * web-search fallback after an adapter failure. OpenAI shim providers + * (moonshot, minimax, nvidia-nim, openai, github, etc.) do NOT support + * Anthropic's web_search_20250305 tool, so falling through to the native + * path silently produces "Did 0 searches". + */ +function hasNativeSearchFallback(): boolean { + if (isCodexResponsesWebSearchEnabled()) return true + const provider = getAPIProvider() + return provider === 'firstParty' || provider === 'vertex' || provider === 'foundry' +} + // --------------------------------------------------------------------------- // Tool export // --------------------------------------------------------------------------- @@ -609,6 +670,17 @@ export const WebSearchTool = buildTool({ // Auto mode: only fall through on transient errors (network, timeout, 5xx). // Config / guardrail errors (SSRF, HTTPS, bad URL, etc.) must surface. if (!isTransientError(err)) throw err + // No viable fallback for this provider — surface the adapter error + // instead of falling through to a broken native path. + if (!hasNativeSearchFallback()) { + const provider = getAPIProvider() + const errMsg = err instanceof Error ? err.message : String(err) + throw new Error( + `Web search is unavailable for provider "${provider}". ` + + `The search adapter failed (${errMsg}). ` + + `Try switching to a provider with built-in web search (e.g. Anthropic, Codex) or try again later.`, + ) + } console.error( `[web-search] Adapter failed, falling through to native: ${err}`, ) diff --git a/src/tools/WebSearchTool/providers/duckduckgo.ts b/src/tools/WebSearchTool/providers/duckduckgo.ts index a21dbe5f..d3d9ead4 100644 --- a/src/tools/WebSearchTool/providers/duckduckgo.ts +++ b/src/tools/WebSearchTool/providers/duckduckgo.ts @@ -12,12 +12,33 @@ const DDG_ANOMALY_HINT = 'JINA_API_KEY, BING_API_KEY, MOJEEK_API_KEY, LINKUP_API_KEY — ' + 'or use an Anthropic / Vertex / Foundry provider for native web search.' +const MAX_RETRIES = 3 +const INITIAL_BACKOFF_MS = 1000 + function isAnomalyError(message: string): boolean { return /anomaly in the request|likely making requests too quickly/i.test( message, ) } +function isRetryableDDGError(err: unknown): boolean { + if (!(err instanceof Error)) return false + const msg = err.message.toLowerCase() + return ( + msg.includes('anomaly') || + msg.includes('too quickly') || + msg.includes('rate limit') || + msg.includes('timeout') || + msg.includes('econnreset') || + msg.includes('etimedout') || + msg.includes('econnaborted') + ) +} + +function sleep(ms: number): Promise { + return new Promise(r => setTimeout(r, ms)) +} + export const duckduckgoProvider: SearchProvider = { name: 'duckduckgo', @@ -36,31 +57,44 @@ export const duckduckgoProvider: SearchProvider = { throw new Error('duck-duck-scrape package not installed. Run: npm install duck-duck-scrape') } if (signal?.aborted) throw new DOMException('Aborted', 'AbortError') - // TODO: duck-duck-scrape doesn't accept AbortSignal — can't cancel in-flight searches - let response: Awaited> - try { - response = await search(input.query, { safeSearch: SafeSearchType.STRICT }) - } catch (err) { - const msg = err instanceof Error ? err.message : String(err) - if (isAnomalyError(msg)) { - throw new Error(DDG_ANOMALY_HINT) + + let lastErr: unknown + for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { + if (signal?.aborted) throw new DOMException('Aborted', 'AbortError') + try { + // TODO: duck-duck-scrape doesn't accept AbortSignal — can't cancel in-flight searches + const response = await search(input.query, { safeSearch: SafeSearchType.STRICT }) + + const hits = applyDomainFilters( + response.results.map(r => ({ + title: r.title || r.url, + url: r.url, + description: r.description ?? undefined, + })), + input, + ) + + return { + hits, + providerName: 'duckduckgo', + durationSeconds: (performance.now() - start) / 1000, + } + } catch (err) { + lastErr = err + const msg = err instanceof Error ? err.message : String(err) + if (isAnomalyError(msg)) { + throw new Error(DDG_ANOMALY_HINT) + } + if (!isRetryableDDGError(err) || attempt === MAX_RETRIES - 1) { + throw err + } + // Exponential backoff with jitter: 1s, 2s, 4s +/- 20% + const baseDelay = INITIAL_BACKOFF_MS * Math.pow(2, attempt) + const jitter = baseDelay * 0.2 * (Math.random() * 2 - 1) + await sleep(baseDelay + jitter) } - throw err } - const hits = applyDomainFilters( - response.results.map(r => ({ - title: r.title || r.url, - url: r.url, - description: r.description ?? undefined, - })), - input, - ) - - return { - hits, - providerName: 'duckduckgo', - durationSeconds: (performance.now() - start) / 1000, - } + throw lastErr }, } diff --git a/src/utils/model/model.openai-shim-providers.test.ts b/src/utils/model/model.openai-shim-providers.test.ts index 9b6c2ae4..ac793c47 100644 --- a/src/utils/model/model.openai-shim-providers.test.ts +++ b/src/utils/model/model.openai-shim-providers.test.ts @@ -1,7 +1,13 @@ -import { afterEach, beforeEach, expect, test } from 'bun:test' +import { afterEach, beforeEach, expect, mock, test } from 'bun:test' import { saveGlobalConfig } from '../config.js' -import { getUserSpecifiedModelSetting } from './model.js' +import { + getDefaultHaikuModel, + getDefaultOpusModel, + getDefaultSonnetModel, + getSmallFastModel, + getUserSpecifiedModelSetting, +} from './model.js' const SAVED_ENV = { CLAUDE_CODE_USE_OPENAI: process.env.CLAUDE_CODE_USE_OPENAI, @@ -28,6 +34,11 @@ function restoreEnv(key: keyof typeof SAVED_ENV): void { } beforeEach(() => { + // Other test files (notably modelOptions.github.test.ts) install a + // persistent mock.module for './providers.js' that overrides getAPIProvider + // globally. Without mock.restore() here, those overrides bleed into this + // suite and the provider-kind branches we're testing become unreachable. + mock.restore() delete process.env.CLAUDE_CODE_USE_OPENAI delete process.env.CLAUDE_CODE_USE_GEMINI delete process.env.CLAUDE_CODE_USE_GITHUB @@ -113,3 +124,76 @@ test('github provider still reads OPENAI_MODEL (regression guard)', () => { expect(model).toBe('github:copilot') }) +// --------------------------------------------------------------------------- +// Default model helpers — must not fall through to claude-haiku-4-5 etc. for +// OpenAI-shim providers whose endpoints don't speak Anthropic model names. +// Hitting that fallthrough caused WebFetch to hang for 60s on MiniMax/Codex +// because queryHaiku() shipped an unknown model id to the shim endpoint. +// --------------------------------------------------------------------------- + +test('getSmallFastModel returns OPENAI_MODEL for MiniMax (regression: WebFetch hang)', () => { + process.env.MINIMAX_API_KEY = 'minimax-test' + process.env.OPENAI_MODEL = 'MiniMax-M2.5-highspeed' + + expect(getSmallFastModel()).toBe('MiniMax-M2.5-highspeed') +}) + +test('getSmallFastModel returns OPENAI_MODEL for Codex (regression)', () => { + process.env.CLAUDE_CODE_USE_OPENAI = '1' + process.env.OPENAI_BASE_URL = 'https://chatgpt.com/backend-api/codex' + process.env.OPENAI_MODEL = 'codexspark' + process.env.CODEX_API_KEY = 'codex-test' + process.env.CHATGPT_ACCOUNT_ID = 'acct_test' + + expect(getSmallFastModel()).toBe('codexspark') +}) + +test('getSmallFastModel returns OPENAI_MODEL for NVIDIA NIM (regression)', () => { + process.env.NVIDIA_NIM = '1' + process.env.CLAUDE_CODE_USE_OPENAI = '1' + process.env.OPENAI_MODEL = 'nvidia/llama-3.1-nemotron-70b-instruct' + + expect(getSmallFastModel()).toBe('nvidia/llama-3.1-nemotron-70b-instruct') +}) + +test('getDefaultOpusModel returns OPENAI_MODEL for MiniMax', () => { + process.env.MINIMAX_API_KEY = 'minimax-test' + process.env.OPENAI_MODEL = 'MiniMax-M2.7' + + expect(getDefaultOpusModel()).toBe('MiniMax-M2.7') +}) + +test('getDefaultSonnetModel returns OPENAI_MODEL for NVIDIA NIM', () => { + process.env.NVIDIA_NIM = '1' + process.env.CLAUDE_CODE_USE_OPENAI = '1' + process.env.OPENAI_MODEL = 'nvidia/llama-3.1-nemotron-70b-instruct' + + expect(getDefaultSonnetModel()).toBe('nvidia/llama-3.1-nemotron-70b-instruct') +}) + +test('getDefaultHaikuModel returns OPENAI_MODEL for MiniMax', () => { + process.env.MINIMAX_API_KEY = 'minimax-test' + process.env.OPENAI_MODEL = 'MiniMax-M2.5-highspeed' + + expect(getDefaultHaikuModel()).toBe('MiniMax-M2.5-highspeed') +}) + +test('default helpers do not leak claude-* names to shim providers', () => { + // Umbrella guard: for each OpenAI-shim provider, none of the default-model + // helpers may return an Anthropic-branded model name. That was the source + // of the WebFetch 60s hang — MiniMax received "claude-haiku-4-5" and sat + // on the connection. + process.env.MINIMAX_API_KEY = 'minimax-test' + process.env.OPENAI_MODEL = 'MiniMax-M2.7' + + for (const fn of [ + getSmallFastModel, + getDefaultOpusModel, + getDefaultSonnetModel, + getDefaultHaikuModel, + ]) { + const model = fn() + expect(model.toLowerCase()).not.toContain('claude') + } +}) + diff --git a/src/utils/model/model.ts b/src/utils/model/model.ts index 9ff9a0c3..f5a7607b 100644 --- a/src/utils/model/model.ts +++ b/src/utils/model/model.ts @@ -52,10 +52,25 @@ export function getSmallFastModel(): ModelName { if (getAPIProvider() === 'openai') { return process.env.OPENAI_MODEL || 'gpt-4o-mini' } + // Codex provider — OPENAI_MODEL is always set for Codex profiles; only fall + // back to a codex-spark alias when an override env strips it. + if (getAPIProvider() === 'codex') { + return process.env.OPENAI_MODEL || 'codexspark' + } // For GitHub Copilot provider if (getAPIProvider() === 'github') { return process.env.OPENAI_MODEL || 'github:copilot' } + // NVIDIA NIM — OPENAI_MODEL carries the user's active NIM model; use a + // small Meta Llama variant as the conservative fallback. + if (getAPIProvider() === 'nvidia-nim') { + return process.env.OPENAI_MODEL || 'meta/llama-3.1-8b-instruct' + } + // MiniMax — OPENAI_MODEL carries the active MiniMax model; fall back to + // the fastest tier (M2.5-highspeed) when missing. + if (getAPIProvider() === 'minimax') { + return process.env.OPENAI_MODEL || 'MiniMax-M2.5-highspeed' + } return getDefaultHaikuModel() } @@ -171,6 +186,14 @@ export function getDefaultOpusModel(): ModelName { if (getAPIProvider() === 'github') { return process.env.OPENAI_MODEL || 'github:copilot' } + // NVIDIA NIM + if (getAPIProvider() === 'nvidia-nim') { + return process.env.OPENAI_MODEL || 'nvidia/llama-3.1-nemotron-70b-instruct' + } + // MiniMax — flagship tier for "opus"-equivalent. + if (getAPIProvider() === 'minimax') { + return process.env.OPENAI_MODEL || 'MiniMax-M2.7' + } // 3P providers (Bedrock, Vertex, Foundry) — kept as a separate branch // even when values match, since 3P availability lags firstParty and // these will diverge again at the next model launch. @@ -205,6 +228,14 @@ export function getDefaultSonnetModel(): ModelName { if (getAPIProvider() === 'github') { return process.env.OPENAI_MODEL || 'github:copilot' } + // NVIDIA NIM + if (getAPIProvider() === 'nvidia-nim') { + return process.env.OPENAI_MODEL || 'nvidia/llama-3.1-nemotron-70b-instruct' + } + // MiniMax — mid tier for "sonnet"-equivalent. + if (getAPIProvider() === 'minimax') { + return process.env.OPENAI_MODEL || 'MiniMax-M2.5' + } // Default to Sonnet 4.5 for 3P since they may not have 4.6 yet if (getAPIProvider() !== 'firstParty') { return getModelStrings().sonnet45 @@ -237,6 +268,14 @@ export function getDefaultHaikuModel(): ModelName { if (getAPIProvider() === 'gemini') { return process.env.GEMINI_MODEL || 'gemini-2.0-flash-lite' } + // NVIDIA NIM + if (getAPIProvider() === 'nvidia-nim') { + return process.env.OPENAI_MODEL || 'meta/llama-3.1-8b-instruct' + } + // MiniMax — fastest tier for "haiku"-equivalent. + if (getAPIProvider() === 'minimax') { + return process.env.OPENAI_MODEL || 'MiniMax-M2.5-highspeed' + } // Haiku 4.5 is available on all platforms (first-party, Foundry, Bedrock, Vertex) return getModelStrings().haiku45 diff --git a/src/utils/model/providers.ts b/src/utils/model/providers.ts index aed15e55..06f5bc61 100644 --- a/src/utils/model/providers.ts +++ b/src/utils/model/providers.ts @@ -19,7 +19,12 @@ export function getAPIProvider(): APIProvider { if (isEnvTruthy(process.env.NVIDIA_NIM)) { return 'nvidia-nim' } - if (isEnvTruthy(process.env.MINIMAX_API_KEY)) { + // MiniMax is signalled by a real API key, not a '1'/'true' flag. Using + // isEnvTruthy() here silently treated every MiniMax user as 'firstParty' + // (or 'openai' once they set CLAUDE_CODE_USE_OPENAI via the profile), + // making every provider-kind-specific branch for 'minimax' elsewhere in + // the codebase unreachable. Presence check is the correct signal. + if (typeof process.env.MINIMAX_API_KEY === 'string' && process.env.MINIMAX_API_KEY.trim() !== '') { return 'minimax' } return isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI)