From 4cb963e660dbd6ee438c04042700db05a9d32c59 Mon Sep 17 00:00:00 2001 From: nehan <86887033+muhnehh@users.noreply.github.com> Date: Mon, 20 Apr 2026 12:24:02 +0400 Subject: [PATCH] feat(api): improve local provider reliability with readiness and self-healing (#738) * feat(api): classify openai-compatible provider failures * Update src/services/api/providerConfig.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat(api): harden openai-compatible diagnostics and env fallback * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix openaiShim duplicate requests and diagnostics * remove unused url from http failure classifier * dedupe env diagnostic warnings * Remove hardcoded URLs from OpenAI error tests Removed hardcoded URLs from network failure classification tests. * Update providerConfig.envDiagnostics.test.ts * fix(openai-shim): return successful responses and restore localhost classifier tests * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat(provider): add truthful local generation readiness checks Implement Phase 2 provider readiness behavior by adding structured Ollama generation probes, wiring setup flows to readiness states, extending system-check with generation readiness output, and updating focused tests. * feat(api): add local self-healing fallback retries Implement Phase 3 self-healing behavior for local OpenAI-compatible providers: retry base URL fallbacks for localhost resolution and endpoint mismatches, plus capability-gated toolless retry for tool-incompatible local models; include diagnostics and focused tests. * fix(api): address review blockers for local provider reliability * Update src/utils/providerDiscovery.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix: harden readiness probes and cross-platform test stability * fix: refresh toolless retry payload and stabilize osc clipboard test * fix: harden Ollama readiness parsing and redact provider URLs --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- scripts/system-check.test.ts | 17 ++ scripts/system-check.ts | 108 +++++++- src/commands/provider/provider.tsx | 86 +++++-- src/components/ProviderManager.test.tsx | 63 +++-- src/components/ProviderManager.tsx | 54 ++-- .../memory/memoryFileSelectorPaths.test.ts | 13 +- src/ink/termio/osc.test.ts | 24 +- src/services/api/openaiErrorClassification.ts | 5 +- .../api/openaiShim.diagnostics.test.ts | 167 ++++++++++++ src/services/api/openaiShim.test.ts | 206 ++++++++++++++- src/services/api/openaiShim.ts | 179 ++++++++++--- src/services/api/providerConfig.local.test.ts | 41 +++ src/services/api/providerConfig.ts | 98 +++++++ src/services/autoFix/autoFixRunner.test.ts | 2 +- src/services/autoFix/autoFixRunner.ts | 25 +- src/services/wiki/init.test.ts | 8 +- src/utils/localInstaller.ts | 5 +- src/utils/providerDiscovery.test.ts | 228 ++++++++++++++++- src/utils/providerDiscovery.ts | 241 ++++++++++++++---- src/utils/settings/settings.ts | 4 +- src/utils/urlRedaction.test.ts | 38 +++ src/utils/urlRedaction.ts | 48 ++++ 22 files changed, 1452 insertions(+), 208 deletions(-) create mode 100644 src/utils/urlRedaction.test.ts create mode 100644 src/utils/urlRedaction.ts diff --git a/scripts/system-check.test.ts b/scripts/system-check.test.ts index 6937414c..7290ba63 100644 --- a/scripts/system-check.test.ts +++ b/scripts/system-check.test.ts @@ -20,6 +20,23 @@ describe('formatReachabilityFailureDetail', () => { ) }) + test('redacts credentials and sensitive query parameters in endpoint details', () => { + const detail = formatReachabilityFailureDetail( + 'http://user:pass@localhost:11434/v1/models?token=abc123&mode=test', + 502, + 'bad gateway', + { + transport: 'chat_completions', + requestedModel: 'llama3.1:8b', + resolvedModel: 'llama3.1:8b', + }, + ) + + expect(detail).toBe( + 'Unexpected status 502 from http://redacted:redacted@localhost:11434/v1/models?token=redacted&mode=test. Body: bad gateway', + ) + }) + test('adds alias/entitlement hint for codex model support 400s', () => { const detail = formatReachabilityFailureDetail( 'https://chatgpt.com/backend-api/codex/responses', diff --git a/scripts/system-check.ts b/scripts/system-check.ts index 80e9ef26..8f87b59e 100644 --- a/scripts/system-check.ts +++ b/scripts/system-check.ts @@ -7,6 +7,11 @@ import { resolveProviderRequest, isLocalProviderUrl as isProviderLocalUrl, } from '../src/services/api/providerConfig.js' +import { + getLocalOpenAICompatibleProviderLabel, + probeOllamaGenerationReadiness, +} from '../src/utils/providerDiscovery.js' +import { redactUrlForDisplay } from '../src/utils/urlRedaction.js' type CheckResult = { ok: boolean @@ -69,7 +74,7 @@ export function formatReachabilityFailureDetail( }, ): string { const compactBody = responseBody.trim().replace(/\s+/g, ' ').slice(0, 240) - const base = `Unexpected status ${status} from ${endpoint}.` + const base = `Unexpected status ${status} from ${redactUrlForDisplay(endpoint)}.` const bodySuffix = compactBody ? ` Body: ${compactBody}` : '' if (request.transport !== 'codex_responses' || status !== 400) { @@ -255,7 +260,7 @@ function checkOpenAIEnv(): CheckResult[] { results.push(pass('OPENAI_MODEL', process.env.OPENAI_MODEL)) } - results.push(pass('OPENAI_BASE_URL', request.baseUrl)) + results.push(pass('OPENAI_BASE_URL', redactUrlForDisplay(request.baseUrl))) if (request.transport === 'codex_responses') { const credentials = resolveCodexApiCredentials(process.env) @@ -308,7 +313,7 @@ async function checkBaseUrlReachability(): Promise { return pass('Provider reachability', 'Skipped (OpenAI-compatible mode disabled).') } - if (useGithub) { + if (useGithub && !useOpenAI) { return pass( 'Provider reachability', 'Skipped for GitHub Models (inference endpoint differs from OpenAI /models probe).', @@ -326,6 +331,7 @@ async function checkBaseUrlReachability(): Promise { const endpoint = request.transport === 'codex_responses' ? `${request.baseUrl}/responses` : `${request.baseUrl}/models` + const redactedEndpoint = redactUrlForDisplay(endpoint) const controller = new AbortController() const timeout = setTimeout(() => controller.abort(), 4000) @@ -375,7 +381,10 @@ async function checkBaseUrlReachability(): Promise { }) if (response.status === 200 || response.status === 401 || response.status === 403) { - return pass('Provider reachability', `Reached ${endpoint} (status ${response.status}).`) + return pass( + 'Provider reachability', + `Reached ${redactedEndpoint} (status ${response.status}).`, + ) } const responseBody = await response.text().catch(() => '') @@ -391,12 +400,100 @@ async function checkBaseUrlReachability(): Promise { ) } catch (error) { const message = error instanceof Error ? error.message : String(error) - return fail('Provider reachability', `Failed to reach ${endpoint}: ${message}`) + return fail( + 'Provider reachability', + `Failed to reach ${redactedEndpoint}: ${message}`, + ) } finally { clearTimeout(timeout) } } +async function checkProviderGenerationReadiness(): Promise { + const useGemini = isTruthy(process.env.CLAUDE_CODE_USE_GEMINI) + const useOpenAI = isTruthy(process.env.CLAUDE_CODE_USE_OPENAI) + const useGithub = isTruthy(process.env.CLAUDE_CODE_USE_GITHUB) + const useMistral = isTruthy(process.env.CLAUDE_CODE_USE_MISTRAL) + + if (!useGemini && !useOpenAI && !useGithub && !useMistral) { + return pass('Provider generation readiness', 'Skipped (OpenAI-compatible mode disabled).') + } + + if (useGithub && !useOpenAI) { + return pass( + 'Provider generation readiness', + 'Skipped for GitHub Models (runtime generation uses a different endpoint flow).', + ) + } + + if (useGemini || useMistral) { + return pass( + 'Provider generation readiness', + 'Skipped for managed provider mode.', + ) + } + + if (!useOpenAI) { + return pass('Provider generation readiness', 'Skipped (OpenAI-compatible mode disabled).') + } + + const request = resolveProviderRequest({ + model: process.env.OPENAI_MODEL, + baseUrl: process.env.OPENAI_BASE_URL, + }) + + if (request.transport === 'codex_responses') { + return pass( + 'Provider generation readiness', + 'Skipped for Codex responses (reachability probe already performs a lightweight generation request).', + ) + } + + if (!isLocalBaseUrl(request.baseUrl)) { + return pass('Provider generation readiness', 'Skipped for non-local provider URL.') + } + + const localProviderLabel = getLocalOpenAICompatibleProviderLabel(request.baseUrl) + if (localProviderLabel !== 'Ollama') { + return pass( + 'Provider generation readiness', + `Skipped for ${localProviderLabel} (no provider-specific generation probe).`, + ) + } + + const readiness = await probeOllamaGenerationReadiness({ + baseUrl: request.baseUrl, + model: request.requestedModel, + }) + + if (readiness.state === 'ready') { + return pass( + 'Provider generation readiness', + `Generated a test response with ${readiness.probeModel ?? request.requestedModel}.`, + ) + } + + if (readiness.state === 'unreachable') { + return fail( + 'Provider generation readiness', + `Could not reach Ollama at ${redactUrlForDisplay(request.baseUrl)}.`, + ) + } + + if (readiness.state === 'no_models') { + return fail( + 'Provider generation readiness', + 'Ollama is reachable, but no installed models were found. Pull a model first (for example: ollama pull qwen2.5-coder:7b).', + ) + } + + const detailSuffix = readiness.detail ? ` Detail: ${readiness.detail}.` : '' + return fail( + 'Provider generation readiness', + `Ollama is reachable, but generation failed for ${readiness.probeModel ?? request.requestedModel}.${detailSuffix}`, + ) +} + function isAtomicChatUrl(baseUrl: string): boolean { try { const parsed = new URL(baseUrl) @@ -567,6 +664,7 @@ async function main(): Promise { results.push(checkBuildArtifacts()) results.push(...checkOpenAIEnv()) results.push(await checkBaseUrlReachability()) + results.push(await checkProviderGenerationReadiness()) results.push(checkOllamaProcessorMode()) if (!options.json) { diff --git a/src/commands/provider/provider.tsx b/src/commands/provider/provider.tsx index 0b026f7e..6d954d2a 100644 --- a/src/commands/provider/provider.tsx +++ b/src/commands/provider/provider.tsx @@ -66,10 +66,44 @@ import { import { getOllamaChatBaseUrl, getLocalOpenAICompatibleProviderLabel, - hasLocalOllama, - listOllamaModels, + probeOllamaGenerationReadiness, + type OllamaGenerationReadiness, } from '../../utils/providerDiscovery.js' +function describeOllamaReadinessIssue( + readiness: OllamaGenerationReadiness, + options?: { + baseUrl?: string + allowManualFallback?: boolean + }, +): string { + const endpoint = options?.baseUrl ?? 'http://localhost:11434' + + if (readiness.state === 'unreachable') { + return `Could not reach Ollama at ${endpoint}. Start Ollama first, then run /provider again.` + } + + if (readiness.state === 'no_models') { + const manualSuffix = options?.allowManualFallback + ? ', or enter details manually' + : '' + return `Ollama is running, but no installed models were found. Pull a chat model such as qwen2.5-coder:7b or llama3.1:8b first${manualSuffix}.` + } + + if (readiness.state === 'generation_failed') { + const modelHint = readiness.probeModel ?? 'the selected model' + const detailSuffix = readiness.detail + ? ` Details: ${readiness.detail}.` + : '' + const manualSuffix = options?.allowManualFallback + ? ' You can also enter details manually.' + : '' + return `Ollama is reachable and models are installed, but a generation probe failed for ${modelHint}.${detailSuffix} Run "ollama run ${modelHint}" once and retry.${manualSuffix}` + } + + return '' +} + type ProviderChoice = 'auto' | ProviderProfile | 'codex-oauth' | 'clear' type Step = @@ -715,6 +749,7 @@ function AutoRecommendationStep({ | { state: 'openai' defaultModel: string + reason: string } | { state: 'error' @@ -728,19 +763,27 @@ function AutoRecommendationStep({ void (async () => { const defaultModel = getGoalDefaultOpenAIModel(goal) try { - const ollamaAvailable = await hasLocalOllama() - if (!ollamaAvailable) { + const readiness = await probeOllamaGenerationReadiness() + if (readiness.state !== 'ready') { if (!cancelled) { - setStatus({ state: 'openai', defaultModel }) + setStatus({ + state: 'openai', + defaultModel, + reason: describeOllamaReadinessIssue(readiness), + }) } return } - const models = await listOllamaModels() - const recommended = recommendOllamaModel(models, goal) + const recommended = recommendOllamaModel(readiness.models, goal) if (!recommended) { if (!cancelled) { - setStatus({ state: 'openai', defaultModel }) + setStatus({ + state: 'openai', + defaultModel, + reason: + 'Ollama responded to a generation probe, but no recommended chat model matched this goal.', + }) } return } @@ -796,10 +839,10 @@ function AutoRecommendationStep({ - No viable local Ollama chat model was detected. Auto setup can - continue into OpenAI-compatible setup with a default model of{' '} + Auto setup can continue into OpenAI-compatible setup with a default model of{' '} {status.defaultModel}. + {status.reason}