feat(api): improve local provider reliability with readiness and self-healing (#738)

* feat(api): classify openai-compatible provider failures * Update src/services/api/providerConfig.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat(api): harden openai-compatible diagnostics and env fallback * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix openaiShim duplicate requests and diagnostics * remove unused url from http failure classifier * dedupe env diagnostic warnings * Remove hardcoded URLs from OpenAI error tests Removed hardcoded URLs from network failure classification tests. * Update providerConfig.envDiagnostics.test.ts * fix(openai-shim): return successful responses and restore localhost classifier tests * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat(provider): add truthful local generation readiness checks Implement Phase 2 provider readiness behavior by adding structured Ollama generation probes, wiring setup flows to readiness states, extending system-check with generation readiness output, and updating focused tests. * feat(api): add local self-healing fallback retries Implement Phase 3 self-healing behavior for local OpenAI-compatible providers: retry base URL fallbacks for localhost resolution and endpoint mismatches, plus capability-gated toolless retry for tool-incompatible local models; include diagnostics and focused tests. * fix(api): address review blockers for local provider reliability * Update src/utils/providerDiscovery.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix: harden readiness probes and cross-platform test stability * fix: refresh toolless retry payload and stabilize osc clipboard test * fix: harden Ollama readiness parsing and redact provider URLs --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-04-20 12:24:02 +04:00
parent b09972f223
commit 4cb963e660
22 changed files with 1452 additions and 208 deletions
--- a/src/utils/localInstaller.ts
+++ b/src/utils/localInstaller.ts
@@ -44,9 +44,10 @@ function getCandidateLocalBinaryPaths(localInstallDir: string): string[] {
 }

 export function isManagedLocalInstallationPath(execPath: string): boolean {
+  const normalizedExecPath = execPath.replace(/\\+/g, '/')
  return (
-    execPath.includes('/.openclaude/local/node_modules/') ||
-    execPath.includes('/.claude/local/node_modules/')
+    normalizedExecPath.includes('/.openclaude/local/node_modules/') ||
+    normalizedExecPath.includes('/.claude/local/node_modules/')
  )
 }

--- a/src/utils/providerDiscovery.test.ts
+++ b/src/utils/providerDiscovery.test.ts
@@ -1,9 +1,9 @@
 import { afterEach, expect, mock, test } from 'bun:test'

-import {
-  getLocalOpenAICompatibleProviderLabel,
-  listOpenAICompatibleModels,
-} from './providerDiscovery.js'
+async function loadProviderDiscoveryModule() {
+  // @ts-expect-error cache-busting query string for Bun module mocks
+  return import(`./providerDiscovery.js?ts=${Date.now()}-${Math.random()}`)
+}

 const originalFetch = globalThis.fetch
 const originalEnv = {
@@ -16,6 +16,8 @@ afterEach(() => {
 })

 test('lists models from a local openai-compatible /models endpoint', async () => {
+  const { listOpenAICompatibleModels } = await loadProviderDiscoveryModule()
+
  globalThis.fetch = mock((input, init) => {
    const url = typeof input === 'string' ? input : input.url
    expect(url).toBe('http://localhost:1234/v1/models')
@@ -47,6 +49,8 @@ test('lists models from a local openai-compatible /models endpoint', async () =>
 })

 test('returns null when a local openai-compatible /models request fails', async () => {
+  const { listOpenAICompatibleModels } = await loadProviderDiscoveryModule()
+
  globalThis.fetch = mock(() =>
    Promise.resolve(new Response('not available', { status: 503 })),
  ) as typeof globalThis.fetch
@@ -56,13 +60,19 @@ test('returns null when a local openai-compatible /models request fails', async
  ).resolves.toBeNull()
 })

-test('detects LM Studio from the default localhost port', () => {
+test('detects LM Studio from the default localhost port', async () => {
+  const { getLocalOpenAICompatibleProviderLabel } =
+    await loadProviderDiscoveryModule()
+
  expect(getLocalOpenAICompatibleProviderLabel('http://localhost:1234/v1')).toBe(
    'LM Studio',
  )
 })

-test('detects common local openai-compatible providers by hostname', () => {
+test('detects common local openai-compatible providers by hostname', async () => {
+  const { getLocalOpenAICompatibleProviderLabel } =
+    await loadProviderDiscoveryModule()
+
  expect(
    getLocalOpenAICompatibleProviderLabel('http://localai.local:8080/v1'),
  ).toBe('LocalAI')
@@ -71,8 +81,212 @@ test('detects common local openai-compatible providers by hostname', () => {
  ).toBe('vLLM')
 })

-test('falls back to a generic local openai-compatible label', () => {
+test('falls back to a generic local openai-compatible label', async () => {
+  const { getLocalOpenAICompatibleProviderLabel } =
+    await loadProviderDiscoveryModule()
+
  expect(
    getLocalOpenAICompatibleProviderLabel('http://127.0.0.1:8080/v1'),
  ).toBe('Local OpenAI-compatible')
+})
+
+test('ollama generation readiness reports unreachable when tags endpoint is down', async () => {
+  const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
+
+  const calledUrls: string[] = []
+  globalThis.fetch = mock(input => {
+    const url = typeof input === 'string' ? input : input.url
+    calledUrls.push(url)
+    return Promise.resolve(new Response('not available', { status: 503 }))
+  }) as typeof globalThis.fetch
+
+  await expect(
+    probeOllamaGenerationReadiness({
+      baseUrl: 'http://localhost:11434',
+    }),
+  ).resolves.toMatchObject({
+    state: 'unreachable',
+    models: [],
+  })
+
+  expect(calledUrls).toEqual([
+    'http://localhost:11434/api/tags',
+  ])
+})
+
+test('ollama generation readiness reports no models when server is reachable', async () => {
+  const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
+
+  const calledUrls: string[] = []
+  globalThis.fetch = mock(input => {
+    const url = typeof input === 'string' ? input : input.url
+    calledUrls.push(url)
+    return Promise.resolve(
+      new Response(JSON.stringify({ models: [] }), {
+        status: 200,
+        headers: { 'Content-Type': 'application/json' },
+      }),
+    )
+  }) as typeof globalThis.fetch
+
+  await expect(
+    probeOllamaGenerationReadiness({
+      baseUrl: 'http://localhost:11434',
+    }),
+  ).resolves.toMatchObject({
+    state: 'no_models',
+    models: [],
+  })
+
+  expect(calledUrls).toEqual([
+    'http://localhost:11434/api/tags',
+  ])
+})
+
+test('ollama generation readiness reports generation_failed when requested model is missing', async () => {
+  const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
+
+  const calledUrls: string[] = []
+  globalThis.fetch = mock(input => {
+    const url = typeof input === 'string' ? input : input.url
+    calledUrls.push(url)
+    return Promise.resolve(
+      new Response(
+        JSON.stringify({
+          models: [{ name: 'llama3.1:8b', size: 1024 }],
+        }),
+        {
+          status: 200,
+          headers: { 'Content-Type': 'application/json' },
+        },
+      ),
+    )
+  }) as typeof globalThis.fetch
+
+  await expect(
+    probeOllamaGenerationReadiness({
+      baseUrl: 'http://localhost:11434',
+      model: 'qwen2.5-coder:7b',
+    }),
+  ).resolves.toMatchObject({
+    state: 'generation_failed',
+    probeModel: 'qwen2.5-coder:7b',
+    detail: 'requested model not installed: qwen2.5-coder:7b',
+  })
+
+  expect(calledUrls).toEqual(['http://localhost:11434/api/tags'])
+})
+
+test('ollama generation readiness reports generation failures when chat probe fails', async () => {
+  const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
+
+  globalThis.fetch = mock(input => {
+    const url = typeof input === 'string' ? input : input.url
+    if (url.endsWith('/api/tags')) {
+      return Promise.resolve(
+        new Response(
+          JSON.stringify({
+            models: [{ name: 'qwen2.5-coder:7b', size: 42 }],
+          }),
+          {
+            status: 200,
+            headers: { 'Content-Type': 'application/json' },
+          },
+        ),
+      )
+    }
+
+    return Promise.resolve(new Response('model not found', { status: 404 }))
+  }) as typeof globalThis.fetch
+
+  await expect(
+    probeOllamaGenerationReadiness({
+      baseUrl: 'http://localhost:11434',
+      model: 'qwen2.5-coder:7b',
+    }),
+  ).resolves.toMatchObject({
+    state: 'generation_failed',
+    probeModel: 'qwen2.5-coder:7b',
+  })
+})
+
+test('ollama generation readiness reports generation_failed when chat probe returns invalid JSON', async () => {
+  const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
+
+  globalThis.fetch = mock(input => {
+    const url = typeof input === 'string' ? input : input.url
+    if (url.endsWith('/api/tags')) {
+      return Promise.resolve(
+        new Response(
+          JSON.stringify({
+            models: [{ name: 'llama3.1:8b', size: 1024 }],
+          }),
+          {
+            status: 200,
+            headers: { 'Content-Type': 'application/json' },
+          },
+        ),
+      )
+    }
+
+    return Promise.resolve(
+      new Response('<html>proxy error</html>', {
+        status: 200,
+        headers: { 'Content-Type': 'text/html' },
+      }),
+    )
+  }) as typeof globalThis.fetch
+
+  await expect(
+    probeOllamaGenerationReadiness({
+      baseUrl: 'http://localhost:11434',
+    }),
+  ).resolves.toMatchObject({
+    state: 'generation_failed',
+    probeModel: 'llama3.1:8b',
+    detail: 'invalid JSON response',
+  })
+})
+
+test('ollama generation readiness reports ready when chat probe succeeds', async () => {
+  const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
+
+  globalThis.fetch = mock(input => {
+    const url = typeof input === 'string' ? input : input.url
+    if (url.endsWith('/api/tags')) {
+      return Promise.resolve(
+        new Response(
+          JSON.stringify({
+            models: [{ name: 'llama3.1:8b', size: 1024 }],
+          }),
+          {
+            status: 200,
+            headers: { 'Content-Type': 'application/json' },
+          },
+        ),
+      )
+    }
+
+    return Promise.resolve(
+      new Response(
+        JSON.stringify({
+          message: { role: 'assistant', content: 'OK' },
+          done: true,
+        }),
+        {
+          status: 200,
+          headers: { 'Content-Type': 'application/json' },
+        },
+      ),
+    )
+  }) as typeof globalThis.fetch
+
+  await expect(
+    probeOllamaGenerationReadiness({
+      baseUrl: 'http://localhost:11434',
+    }),
+  ).resolves.toMatchObject({
+    state: 'ready',
+    probeModel: 'llama3.1:8b',
+  })
 })
--- a/src/utils/providerDiscovery.ts
+++ b/src/utils/providerDiscovery.ts
@@ -4,6 +4,13 @@ import { DEFAULT_OPENAI_BASE_URL } from '../services/api/providerConfig.js'
 export const DEFAULT_OLLAMA_BASE_URL = 'http://localhost:11434'
 export const DEFAULT_ATOMIC_CHAT_BASE_URL = 'http://127.0.0.1:1337'

+export type OllamaGenerationReadiness = {
+  state: 'ready' | 'unreachable' | 'no_models' | 'generation_failed'
+  models: OllamaModelDescriptor[]
+  probeModel?: string
+  detail?: string
+}
+
 function withTimeoutSignal(timeoutMs: number): {
  signal: AbortSignal
  clear: () => void
@@ -20,6 +27,83 @@ function trimTrailingSlash(value: string): string {
  return value.replace(/\/+$/, '')
 }

+function compactDetail(value: string, maxLength = 180): string {
+  const compact = value.trim().replace(/\s+/g, ' ')
+  if (!compact) {
+    return ''
+  }
+
+  if (compact.length <= maxLength) {
+    return compact
+  }
+
+  return `${compact.slice(0, maxLength)}...`
+}
+
+type OllamaTagsPayload = {
+  models?: Array<{
+    name?: string
+    size?: number
+    details?: {
+      family?: string
+      families?: string[]
+      parameter_size?: string
+      quantization_level?: string
+    }
+  }>
+}
+
+function normalizeOllamaModels(
+  payload: OllamaTagsPayload,
+): OllamaModelDescriptor[] {
+  return (payload.models ?? [])
+    .filter(model => Boolean(model.name))
+    .map(model => ({
+      name: model.name!,
+      sizeBytes: typeof model.size === 'number' ? model.size : null,
+      family: model.details?.family ?? null,
+      families: model.details?.families ?? [],
+      parameterSize: model.details?.parameter_size ?? null,
+      quantizationLevel: model.details?.quantization_level ?? null,
+    }))
+}
+
+async function fetchOllamaModelsProbe(
+  baseUrl?: string,
+  timeoutMs = 5000,
+): Promise<{
+  reachable: boolean
+  models: OllamaModelDescriptor[]
+}> {
+  const { signal, clear } = withTimeoutSignal(timeoutMs)
+  try {
+    const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
+      method: 'GET',
+      signal,
+    })
+
+    if (!response.ok) {
+      return {
+        reachable: false,
+        models: [],
+      }
+    }
+
+    const payload = (await response.json().catch(() => ({}))) as OllamaTagsPayload
+    return {
+      reachable: true,
+      models: normalizeOllamaModels(payload),
+    }
+  } catch {
+    return {
+      reachable: false,
+      models: [],
+    }
+  } finally {
+    clear()
+  }
+}
+
 export function getOllamaApiBaseUrl(baseUrl?: string): string {
  const parsed = new URL(
    baseUrl || process.env.OLLAMA_BASE_URL || DEFAULT_OLLAMA_BASE_URL,
@@ -121,61 +205,15 @@ export function getLocalOpenAICompatibleProviderLabel(baseUrl?: string): string
 }

 export async function hasLocalOllama(baseUrl?: string): Promise<boolean> {
-  const { signal, clear } = withTimeoutSignal(1200)
-  try {
-    const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
-      method: 'GET',
-      signal,
-    })
-    return response.ok
-  } catch {
-    return false
-  } finally {
-    clear()
-  }
+  const { reachable } = await fetchOllamaModelsProbe(baseUrl, 1200)
+  return reachable
 }

 export async function listOllamaModels(
  baseUrl?: string,
 ): Promise<OllamaModelDescriptor[]> {
-  const { signal, clear } = withTimeoutSignal(5000)
-  try {
-    const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
-      method: 'GET',
-      signal,
-    })
-    if (!response.ok) {
-      return []
-    }
-
-    const data = (await response.json()) as {
-      models?: Array<{
-        name?: string
-        size?: number
-        details?: {
-          family?: string
-          families?: string[]
-          parameter_size?: string
-          quantization_level?: string
-        }
-      }>
-    }
-
-    return (data.models ?? [])
-      .filter(model => Boolean(model.name))
-      .map(model => ({
-        name: model.name!,
-        sizeBytes: typeof model.size === 'number' ? model.size : null,
-        family: model.details?.family ?? null,
-        families: model.details?.families ?? [],
-        parameterSize: model.details?.parameter_size ?? null,
-        quantizationLevel: model.details?.quantization_level ?? null,
-      }))
-  } catch {
-    return []
-  } finally {
-    clear()
-  }
+  const { models } = await fetchOllamaModelsProbe(baseUrl, 5000)
+  return models
 }

 export async function listOpenAICompatibleModels(options?: {
@@ -294,3 +332,106 @@ export async function benchmarkOllamaModel(
    clear()
  }
 }
+
+export async function probeOllamaGenerationReadiness(options?: {
+  baseUrl?: string
+  model?: string
+  timeoutMs?: number
+}): Promise<OllamaGenerationReadiness> {
+  const timeoutMs = options?.timeoutMs ?? 8000
+  const { reachable, models } = await fetchOllamaModelsProbe(
+    options?.baseUrl,
+    timeoutMs,
+  )
+  if (!reachable) {
+    return {
+      state: 'unreachable',
+      models: [],
+    }
+  }
+
+  if (models.length === 0) {
+    return {
+      state: 'no_models',
+      models: [],
+    }
+  }
+
+  const requestedModel = options?.model?.trim() || undefined
+  if (requestedModel && !models.some(model => model.name === requestedModel)) {
+    return {
+      state: 'generation_failed',
+      models,
+      probeModel: requestedModel,
+      detail: `requested model not installed: ${requestedModel}`,
+    }
+  }
+
+  const probeModel = requestedModel ?? models[0]!.name
+  const { signal, clear } = withTimeoutSignal(timeoutMs)
+
+  try {
+    const response = await fetch(`${getOllamaApiBaseUrl(options?.baseUrl)}/api/chat`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      signal,
+      body: JSON.stringify({
+        model: probeModel,
+        stream: false,
+        messages: [{ role: 'user', content: 'Reply with OK.' }],
+        options: {
+          temperature: 0,
+          num_predict: 8,
+        },
+      }),
+    })
+
+    if (!response.ok) {
+      const responseBody = await response.text().catch(() => '')
+      const detailSuffix = compactDetail(responseBody)
+      return {
+        state: 'generation_failed',
+        models,
+        probeModel,
+        detail: detailSuffix
+          ? `status ${response.status}: ${detailSuffix}`
+          : `status ${response.status}`,
+      }
+    }
+
+    try {
+      await response.json()
+    } catch {
+      return {
+        state: 'generation_failed',
+        models,
+        probeModel,
+        detail: 'invalid JSON response',
+      }
+    }
+
+    return {
+      state: 'ready',
+      models,
+      probeModel,
+    }
+  } catch (error) {
+    const detail =
+      error instanceof Error
+        ? error.name === 'AbortError'
+          ? 'request timed out'
+          : error.message
+        : String(error)
+
+    return {
+      state: 'generation_failed',
+      models,
+      probeModel,
+      detail,
+    }
+  } finally {
+    clear()
+  }
+}
--- a/src/utils/settings/settings.ts
+++ b/src/utils/settings/settings.ts
@@ -300,9 +300,9 @@ export function getRelativeSettingsFilePathForSource(
 ): string {
  switch (source) {
    case 'projectSettings':
-      return join('.openclaude', 'settings.json')
+      return '.openclaude/settings.json'
    case 'localSettings':
-      return join('.openclaude', 'settings.local.json')
+      return '.openclaude/settings.local.json'
  }
 }

--- a/src/utils/urlRedaction.test.ts
+++ b/src/utils/urlRedaction.test.ts
@@ -0,0 +1,38 @@
+import { describe, expect, test } from 'bun:test'
+
+import { redactUrlForDisplay } from './urlRedaction.ts'
+
+describe('redactUrlForDisplay', () => {
+  test('redacts credentials and sensitive query params for valid URLs', () => {
+    const redacted = redactUrlForDisplay(
+      'http://user:pass@localhost:11434/v1?api_key=secret&foo=bar',
+    )
+
+    expect(redacted).toBe(
+      'http://redacted:redacted@localhost:11434/v1?api_key=redacted&foo=bar',
+    )
+  })
+
+  test('redacts token-like query parameter names', () => {
+    const redacted = redactUrlForDisplay(
+      'https://example.com/v1?x_access_token=abc123&model=qwen2.5-coder',
+    )
+
+    expect(redacted).toBe(
+      'https://example.com/v1?x_access_token=redacted&model=qwen2.5-coder',
+    )
+  })
+
+  test('falls back to regex redaction for malformed URLs', () => {
+    const redacted = redactUrlForDisplay(
+      '//user:pass@localhost:11434?token=abc&mode=test',
+    )
+
+    expect(redacted).toBe('//redacted@localhost:11434?token=redacted&mode=test')
+  })
+
+  test('keeps non-sensitive URLs unchanged', () => {
+    const url = 'http://localhost:11434/v1?model=llama3.1:8b'
+    expect(redactUrlForDisplay(url)).toBe(url)
+  })
+})
--- a/src/utils/urlRedaction.ts
+++ b/src/utils/urlRedaction.ts
@@ -0,0 +1,48 @@
+const SENSITIVE_URL_QUERY_PARAM_TOKENS = [
+  'api_key',
+  'apikey',
+  'key',
+  'token',
+  'access_token',
+  'refresh_token',
+  'signature',
+  'sig',
+  'secret',
+  'password',
+  'passwd',
+  'pwd',
+  'auth',
+  'authorization',
+]
+
+function shouldRedactUrlQueryParam(name: string): boolean {
+  const lower = name.toLowerCase()
+  return SENSITIVE_URL_QUERY_PARAM_TOKENS.some(token => lower.includes(token))
+}
+
+export function redactUrlForDisplay(rawUrl: string): string {
+  try {
+    const parsed = new URL(rawUrl)
+    if (parsed.username) {
+      parsed.username = 'redacted'
+    }
+    if (parsed.password) {
+      parsed.password = 'redacted'
+    }
+
+    for (const key of parsed.searchParams.keys()) {
+      if (shouldRedactUrlQueryParam(key)) {
+        parsed.searchParams.set(key, 'redacted')
+      }
+    }
+
+    return parsed.toString()
+  } catch {
+    return rawUrl
+      .replace(/\/\/[^/@\s]+(?::[^/@\s]*)?@/g, '//redacted@')
+      .replace(
+        /([?&](?:token|access_token|refresh_token|api_key|apikey|key|password|passwd|pwd|auth|authorization|signature|sig|secret)=)[^&#]*/gi,
+        '$1redacted',
+      )
+  }
+}