feat(api): improve local provider reliability with readiness and self-healing (#738)

* feat(api): classify openai-compatible provider failures * Update src/services/api/providerConfig.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat(api): harden openai-compatible diagnostics and env fallback * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix openaiShim duplicate requests and diagnostics * remove unused url from http failure classifier * dedupe env diagnostic warnings * Remove hardcoded URLs from OpenAI error tests Removed hardcoded URLs from network failure classification tests. * Update providerConfig.envDiagnostics.test.ts * fix(openai-shim): return successful responses and restore localhost classifier tests * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat(provider): add truthful local generation readiness checks Implement Phase 2 provider readiness behavior by adding structured Ollama generation probes, wiring setup flows to readiness states, extending system-check with generation readiness output, and updating focused tests. * feat(api): add local self-healing fallback retries Implement Phase 3 self-healing behavior for local OpenAI-compatible providers: retry base URL fallbacks for localhost resolution and endpoint mismatches, plus capability-gated toolless retry for tool-incompatible local models; include diagnostics and focused tests. * fix(api): address review blockers for local provider reliability * Update src/utils/providerDiscovery.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix: harden readiness probes and cross-platform test stability * fix: refresh toolless retry payload and stabilize osc clipboard test * fix: harden Ollama readiness parsing and redact provider URLs --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-04-20 12:24:02 +04:00
parent b09972f223
commit 4cb963e660
22 changed files with 1452 additions and 208 deletions
--- a/scripts/system-check.test.ts
+++ b/scripts/system-check.test.ts
@@ -20,6 +20,23 @@ describe('formatReachabilityFailureDetail', () => {
    )
  })
  test('redacts credentials and sensitive query parameters in endpoint details', () => {
    const detail = formatReachabilityFailureDetail(
      'http://user:pass@localhost:11434/v1/models?token=abc123&mode=test',
      502,
      'bad gateway',
      {
        transport: 'chat_completions',
        requestedModel: 'llama3.1:8b',
        resolvedModel: 'llama3.1:8b',
      },
    )
    expect(detail).toBe(
      'Unexpected status 502 from http://redacted:redacted@localhost:11434/v1/models?token=redacted&mode=test. Body: bad gateway',
    )
  })
  test('adds alias/entitlement hint for codex model support 400s', () => {
    const detail = formatReachabilityFailureDetail(
      'https://chatgpt.com/backend-api/codex/responses',
--- a/scripts/system-check.ts
+++ b/scripts/system-check.ts
@@ -7,6 +7,11 @@ import {
  resolveProviderRequest,
  isLocalProviderUrl as isProviderLocalUrl,
 } from '../src/services/api/providerConfig.js'
 import {
  getLocalOpenAICompatibleProviderLabel,
  probeOllamaGenerationReadiness,
 } from '../src/utils/providerDiscovery.js'
 import { redactUrlForDisplay } from '../src/utils/urlRedaction.js'
 type CheckResult = {
  ok: boolean
@@ -69,7 +74,7 @@ export function formatReachabilityFailureDetail(
  },
 ): string {
  const compactBody = responseBody.trim().replace(/\s+/g, ' ').slice(0, 240)
-  const base = `Unexpected status ${status} from ${endpoint}.`
+  const base = `Unexpected status ${status} from ${redactUrlForDisplay(endpoint)}.`
  const bodySuffix = compactBody ? ` Body: ${compactBody}` : ''
  if (request.transport !== 'codex_responses' || status !== 400) {
@@ -255,7 +260,7 @@ function checkOpenAIEnv(): CheckResult[] {
    results.push(pass('OPENAI_MODEL', process.env.OPENAI_MODEL))
  }
-  results.push(pass('OPENAI_BASE_URL', request.baseUrl))
+  results.push(pass('OPENAI_BASE_URL', redactUrlForDisplay(request.baseUrl)))
  if (request.transport === 'codex_responses') {
    const credentials = resolveCodexApiCredentials(process.env)
@@ -308,7 +313,7 @@ async function checkBaseUrlReachability(): Promise<CheckResult> {
    return pass('Provider reachability', 'Skipped (OpenAI-compatible mode disabled).')
  }
-  if (useGithub) {
+  if (useGithub && !useOpenAI) {
    return pass(
      'Provider reachability',
      'Skipped for GitHub Models (inference endpoint differs from OpenAI /models probe).',
@@ -326,6 +331,7 @@ async function checkBaseUrlReachability(): Promise<CheckResult> {
  const endpoint = request.transport === 'codex_responses'
    ? `${request.baseUrl}/responses`
    : `${request.baseUrl}/models`
  const redactedEndpoint = redactUrlForDisplay(endpoint)
  const controller = new AbortController()
  const timeout = setTimeout(() => controller.abort(), 4000)
@@ -375,7 +381,10 @@ async function checkBaseUrlReachability(): Promise<CheckResult> {
    })
    if (response.status === 200 || response.status === 401 || response.status === 403) {
-      return pass('Provider reachability', `Reached ${endpoint} (status ${response.status}).`)
+      return pass(
        'Provider reachability',
        `Reached ${redactedEndpoint} (status ${response.status}).`,
      )
    }
    const responseBody = await response.text().catch(() => '')
@@ -391,12 +400,100 @@ async function checkBaseUrlReachability(): Promise<CheckResult> {
    )
  } catch (error) {
    const message = error instanceof Error ? error.message : String(error)
-    return fail('Provider reachability', `Failed to reach ${endpoint}: ${message}`)
+    return fail(
      'Provider reachability',
      `Failed to reach ${redactedEndpoint}: ${message}`,
    )
  } finally {
    clearTimeout(timeout)
  }
 }
 async function checkProviderGenerationReadiness(): Promise<CheckResult> {
  const useGemini = isTruthy(process.env.CLAUDE_CODE_USE_GEMINI)
  const useOpenAI = isTruthy(process.env.CLAUDE_CODE_USE_OPENAI)
  const useGithub = isTruthy(process.env.CLAUDE_CODE_USE_GITHUB)
  const useMistral = isTruthy(process.env.CLAUDE_CODE_USE_MISTRAL)
  if (!useGemini && !useOpenAI && !useGithub && !useMistral) {
    return pass('Provider generation readiness', 'Skipped (OpenAI-compatible mode disabled).')
  }
  if (useGithub && !useOpenAI) {
    return pass(
      'Provider generation readiness',
      'Skipped for GitHub Models (runtime generation uses a different endpoint flow).',
    )
  }
  if (useGemini || useMistral) {
    return pass(
      'Provider generation readiness',
      'Skipped for managed provider mode.',
    )
  }
  if (!useOpenAI) {
    return pass('Provider generation readiness', 'Skipped (OpenAI-compatible mode disabled).')
  }
  const request = resolveProviderRequest({
    model: process.env.OPENAI_MODEL,
    baseUrl: process.env.OPENAI_BASE_URL,
  })
  if (request.transport === 'codex_responses') {
    return pass(
      'Provider generation readiness',
      'Skipped for Codex responses (reachability probe already performs a lightweight generation request).',
    )
  }
  if (!isLocalBaseUrl(request.baseUrl)) {
    return pass('Provider generation readiness', 'Skipped for non-local provider URL.')
  }
  const localProviderLabel = getLocalOpenAICompatibleProviderLabel(request.baseUrl)
  if (localProviderLabel !== 'Ollama') {
    return pass(
      'Provider generation readiness',
      `Skipped for ${localProviderLabel} (no provider-specific generation probe).`,
    )
  }
  const readiness = await probeOllamaGenerationReadiness({
    baseUrl: request.baseUrl,
    model: request.requestedModel,
  })
  if (readiness.state === 'ready') {
    return pass(
      'Provider generation readiness',
      `Generated a test response with ${readiness.probeModel ?? request.requestedModel}.`,
    )
  }
  if (readiness.state === 'unreachable') {
    return fail(
      'Provider generation readiness',
      `Could not reach Ollama at ${redactUrlForDisplay(request.baseUrl)}.`,
    )
  }
  if (readiness.state === 'no_models') {
    return fail(
      'Provider generation readiness',
      'Ollama is reachable, but no installed models were found. Pull a model first (for example: ollama pull qwen2.5-coder:7b).',
    )
  }
  const detailSuffix = readiness.detail ? ` Detail: ${readiness.detail}.` : ''
  return fail(
    'Provider generation readiness',
    `Ollama is reachable, but generation failed for ${readiness.probeModel ?? request.requestedModel}.${detailSuffix}`,
  )
 }
 function isAtomicChatUrl(baseUrl: string): boolean {
  try {
    const parsed = new URL(baseUrl)
@@ -567,6 +664,7 @@ async function main(): Promise<void> {
  results.push(checkBuildArtifacts())
  results.push(...checkOpenAIEnv())
  results.push(await checkBaseUrlReachability())
  results.push(await checkProviderGenerationReadiness())
  results.push(checkOllamaProcessorMode())
  if (!options.json) {
--- a/src/commands/provider/provider.tsx
+++ b/src/commands/provider/provider.tsx
@@ -66,10 +66,44 @@ import {
 import {
  getOllamaChatBaseUrl,
  getLocalOpenAICompatibleProviderLabel,
-  hasLocalOllama,
+  probeOllamaGenerationReadiness,
-  listOllamaModels,
+  type OllamaGenerationReadiness,
 } from '../../utils/providerDiscovery.js'
 function describeOllamaReadinessIssue(
  readiness: OllamaGenerationReadiness,
  options?: {
    baseUrl?: string
    allowManualFallback?: boolean
  },
 ): string {
  const endpoint = options?.baseUrl ?? 'http://localhost:11434'
  if (readiness.state === 'unreachable') {
    return `Could not reach Ollama at ${endpoint}. Start Ollama first, then run /provider again.`
  }
  if (readiness.state === 'no_models') {
    const manualSuffix = options?.allowManualFallback
      ? ', or enter details manually'
      : ''
    return `Ollama is running, but no installed models were found. Pull a chat model such as qwen2.5-coder:7b or llama3.1:8b first${manualSuffix}.`
  }
  if (readiness.state === 'generation_failed') {
    const modelHint = readiness.probeModel ?? 'the selected model'
    const detailSuffix = readiness.detail
      ? ` Details: ${readiness.detail}.`
      : ''
    const manualSuffix = options?.allowManualFallback
      ? ' You can also enter details manually.'
      : ''
    return `Ollama is reachable and models are installed, but a generation probe failed for ${modelHint}.${detailSuffix} Run "ollama run ${modelHint}" once and retry.${manualSuffix}`
  }
  return ''
 }
 type ProviderChoice = 'auto' | ProviderProfile | 'codex-oauth' | 'clear'
 type Step =
@@ -715,6 +749,7 @@ function AutoRecommendationStep({
    | {
        state: 'openai'
        defaultModel: string
        reason: string
      }
    | {
        state: 'error'
@@ -728,19 +763,27 @@ function AutoRecommendationStep({
    void (async () => {
      const defaultModel = getGoalDefaultOpenAIModel(goal)
      try {
-        const ollamaAvailable = await hasLocalOllama()
+        const readiness = await probeOllamaGenerationReadiness()
-        if (!ollamaAvailable) {
+        if (readiness.state !== 'ready') {
          if (!cancelled) {
-            setStatus({ state: 'openai', defaultModel })
+            setStatus({
              state: 'openai',
              defaultModel,
              reason: describeOllamaReadinessIssue(readiness),
            })
          }
          return
        }
-        const models = await listOllamaModels()
+        const recommended = recommendOllamaModel(readiness.models, goal)
        const recommended = recommendOllamaModel(models, goal)
        if (!recommended) {
          if (!cancelled) {
-            setStatus({ state: 'openai', defaultModel })
+            setStatus({
              state: 'openai',
              defaultModel,
              reason:
                'Ollama responded to a generation probe, but no recommended chat model matched this goal.',
            })
          }
          return
        }
@@ -796,10 +839,10 @@ function AutoRecommendationStep({
      <Dialog title="Auto setup fallback" onCancel={onCancel}>
        <Box flexDirection="column" gap={1}>
          <Text>
-            No viable local Ollama chat model was detected. Auto setup can
+            Auto setup can continue into OpenAI-compatible setup with a default model of{' '}
            continue into OpenAI-compatible setup with a default model of{' '}
            {status.defaultModel}.
          </Text>
          <Text dimColor>{status.reason}</Text>
          <Select
            options={[
              { label: 'Continue to OpenAI-compatible setup', value: 'continue' },
@@ -883,32 +926,19 @@ function OllamaModelStep({
    let cancelled = false
    void (async () => {
-      const available = await hasLocalOllama()
+      const readiness = await probeOllamaGenerationReadiness()
-      if (!available) {
+      if (readiness.state !== 'ready') {
        if (!cancelled) {
          setStatus({
            state: 'unavailable',
-            message:
+            message: describeOllamaReadinessIssue(readiness),
              'Could not reach Ollama at http://localhost:11434. Start Ollama first, then run /provider again.',
          })
        }
        return
      }
-      const models = await listOllamaModels()
+      const ranked = rankOllamaModels(readiness.models, 'balanced')
-      if (models.length === 0) {
+      const recommended = recommendOllamaModel(readiness.models, 'balanced')
        if (!cancelled) {
          setStatus({
            state: 'unavailable',
            message:
              'Ollama is running, but no installed models were found. Pull a chat model such as qwen2.5-coder:7b or llama3.1:8b first.',
          })
        }
        return
      }
      const ranked = rankOllamaModels(models, 'balanced')
      const recommended = recommendOllamaModel(models, 'balanced')
      if (!cancelled) {
        setStatus({
          state: 'ready',
--- a/src/components/ProviderManager.test.tsx
+++ b/src/components/ProviderManager.test.tsx
@@ -149,17 +149,21 @@ function mockProviderManagerDependencies(
    applySavedProfileToCurrentSession?: (...args: unknown[]) => Promise<string | null>
    clearCodexCredentials?: () => { success: boolean; warning?: string }
    getProviderProfiles?: () => unknown[]
-    hasLocalOllama?: () => Promise<boolean>
+    probeOllamaGenerationReadiness?: () => Promise<{
-    listOllamaModels?: () => Promise<
+      state: 'ready' | 'unreachable' | 'no_models' | 'generation_failed'
-      Array<{
+      models: Array<
-        name: string
+        {
-        sizeBytes?: number | null
+          name: string
-        family?: string | null
+          sizeBytes?: number | null
-        families?: string[]
+          family?: string | null
-        parameterSize?: string | null
+          families?: string[]
-        quantizationLevel?: string | null
+          parameterSize?: string | null
-      }>
+          quantizationLevel?: string | null
-    >
+        }
      >
      probeModel?: string
      detail?: string
    }>
    codexSyncRead?: () => unknown
    codexAsyncRead?: () => Promise<unknown>
    updateProviderProfile?: (...args: unknown[]) => unknown
@@ -189,8 +193,12 @@ function mockProviderManagerDependencies(
  })
  mock.module('../utils/providerDiscovery.js', () => ({
-    hasLocalOllama: options?.hasLocalOllama ?? (async () => false),
+    probeOllamaGenerationReadiness:
-    listOllamaModels: options?.listOllamaModels ?? (async () => []),
+      options?.probeOllamaGenerationReadiness ??
      (async () => ({
        state: 'unreachable' as const,
        models: [],
      })),
  }))
  mock.module('../utils/githubModelsCredentials.js', () => ({
@@ -455,19 +463,22 @@ test('ProviderManager first-run Ollama preset auto-detects installed models', as
    async () => undefined,
    {
      addProviderProfile,
-      hasLocalOllama: async () => true,
+      probeOllamaGenerationReadiness: async () => ({
-      listOllamaModels: async () => [
+        state: 'ready',
-        {
+        models: [
-          name: 'gemma4:31b-cloud',
+          {
-          family: 'gemma',
+            name: 'gemma4:31b-cloud',
-          parameterSize: '31b',
+            family: 'gemma',
-        },
+            parameterSize: '31b',
-        {
+          },
-          name: 'kimi-k2.5:cloud',
+          {
-          family: 'kimi',
+            name: 'kimi-k2.5:cloud',
-          parameterSize: '2.5b',
+            family: 'kimi',
-        },
+            parameterSize: '2.5b',
-      ],
+          },
        ],
        probeModel: 'gemma4:31b-cloud',
      }),
    },
  )
--- a/src/components/ProviderManager.tsx
+++ b/src/components/ProviderManager.tsx
@@ -37,13 +37,14 @@ import {
  readGithubModelsTokenAsync,
 } from '../utils/githubModelsCredentials.js'
 import {
-  hasLocalOllama,
+  probeOllamaGenerationReadiness,
-  listOllamaModels,
+  type OllamaGenerationReadiness,
 } from '../utils/providerDiscovery.js'
 import {
  rankOllamaModels,
  recommendOllamaModel,
 } from '../utils/providerRecommendation.js'
 import { redactUrlForDisplay } from '../utils/urlRedaction.js'
 import { updateSettingsForSource } from '../utils/settings/settings.js'
 import {
  type OptionWithDescription,
@@ -52,7 +53,6 @@ import {
 import { Pane } from './design-system/Pane.js'
 import TextInput from './TextInput.js'
 import { useCodexOAuthFlow } from './useCodexOAuthFlow.js'
 import { useSetAppState } from '../state/AppState.js'
 export type ProviderManagerResult = {
  action: 'saved' | 'cancelled'
@@ -222,6 +222,29 @@ function getGithubProviderSummary(
  return `github-models · ${GITHUB_PROVIDER_DEFAULT_BASE_URL} · ${getGithubProviderModel(processEnv)} · ${credentialSummary}${activeSuffix}`
 }
 function describeOllamaSelectionIssue(
  readiness: OllamaGenerationReadiness,
  baseUrl: string,
 ): string {
  if (readiness.state === 'unreachable') {
    return `Could not reach Ollama at ${redactUrlForDisplay(baseUrl)}. Start Ollama first, or enter the endpoint manually.`
  }
  if (readiness.state === 'no_models') {
    return 'Ollama is running, but no installed models were found. Pull a chat model such as qwen2.5-coder:7b or llama3.1:8b first, or enter details manually.'
  }
  if (readiness.state === 'generation_failed') {
    const modelHint = readiness.probeModel ?? 'the selected model'
    const detailSuffix = readiness.detail
      ? ` Details: ${readiness.detail}.`
      : ''
    return `Ollama is reachable and models are installed, but a generation probe failed for ${modelHint}.${detailSuffix} Run "ollama run ${modelHint}" once and retry, or enter details manually.`
  }
  return ''
 }
 function findCodexOAuthProfile(
  profiles: ProviderProfile[],
  profileId?: string,
@@ -450,32 +473,21 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
    setOllamaSelection({ state: 'loading' })
    void (async () => {
-      const available = await hasLocalOllama(draft.baseUrl)
+      const readiness = await probeOllamaGenerationReadiness({
-      if (!available) {
+        baseUrl: draft.baseUrl,
      })
      if (readiness.state !== 'ready') {
        if (!cancelled) {
          setOllamaSelection({
            state: 'unavailable',
-            message:
+            message: describeOllamaSelectionIssue(readiness, draft.baseUrl),
              'Could not reach Ollama. Start Ollama first, or enter the endpoint manually.',
          })
        }
        return
      }
-      const models = await listOllamaModels(draft.baseUrl)
+      const ranked = rankOllamaModels(readiness.models, 'balanced')
-      if (models.length === 0) {
+      const recommended = recommendOllamaModel(readiness.models, 'balanced')
        if (!cancelled) {
          setOllamaSelection({
            state: 'unavailable',
            message:
              'Ollama is running, but no installed models were found. Pull a chat model such as qwen2.5-coder:7b or llama3.1:8b first, or enter details manually.',
          })
        }
        return
      }
      const ranked = rankOllamaModels(models, 'balanced')
      const recommended = recommendOllamaModel(models, 'balanced')
      if (!cancelled) {
        setOllamaSelection({
          state: 'ready',
--- a/src/components/memory/memoryFileSelectorPaths.test.ts
+++ b/src/components/memory/memoryFileSelectorPaths.test.ts
@@ -53,17 +53,20 @@ describe('getProjectMemoryPathForSelector', () => {
  })
  test('defaults to a new AGENTS.md in the current cwd when no project file is loaded', () => {
-    expect(getProjectMemoryPathForSelector([], '/repo/packages/app')).toBe(
+    const cwd = join('/repo', 'packages', 'app')
-      '/repo/packages/app/AGENTS.md',
+    expect(getProjectMemoryPathForSelector([], cwd)).toBe(
      join(cwd, 'AGENTS.md'),
    )
  })
  test('ignores loaded project instruction files outside the current cwd ancestry', () => {
    const outsideRepoPath = join('/other-worktree', 'AGENTS.md')
    const cwd = join('/repo', 'packages', 'app')
    expect(
      getProjectMemoryPathForSelector(
-        [projectFile('/other-worktree/AGENTS.md')],
+        [projectFile(outsideRepoPath)],
-        '/repo/packages/app',
+        cwd,
      ),
-    ).toBe('/repo/packages/app/AGENTS.md')
+    ).toBe(join(cwd, 'AGENTS.md'))
  })
 })
--- a/src/ink/termio/osc.test.ts
+++ b/src/ink/termio/osc.test.ts
@@ -11,14 +11,16 @@ const execFileNoThrowMock = mock(
  async () => ({ code: 0, stdout: '', stderr: '' }),
 )
-mock.module('../../utils/execFileNoThrow.js', () => ({
+function installOscMocks(): void {
-  execFileNoThrow: execFileNoThrowMock,
+  mock.module('../../utils/execFileNoThrow.js', () => ({
-  execFileNoThrowWithCwd: execFileNoThrowMock,
+    execFileNoThrow: execFileNoThrowMock,
-}))
+    execFileNoThrowWithCwd: execFileNoThrowMock,
  }))
-mock.module('../../utils/tempfile.js', () => ({
+  mock.module('../../utils/tempfile.js', () => ({
-  generateTempFilePath: generateTempFilePathMock,
+    generateTempFilePath: generateTempFilePathMock,
-}))
+  }))
 }
 async function importFreshOscModule() {
  return import(`./osc.ts?ts=${Date.now()}-${Math.random()}`)
@@ -45,6 +47,7 @@ async function waitForExecCall(
 describe('Windows clipboard fallback', () => {
  beforeEach(() => {
    installOscMocks()
    execFileNoThrowMock.mockClear()
    generateTempFilePathMock.mockClear()
    process.env = { ...originalEnv }
@@ -62,14 +65,12 @@ describe('Windows clipboard fallback', () => {
    const { setClipboard } = await importFreshOscModule()
    await setClipboard('Привет мир')
-    await flushClipboardCopy()
+    const windowsCall = await waitForExecCall('powershell')
    expect(execFileNoThrowMock.mock.calls.some(([cmd]) => cmd === 'clip')).toBe(
      false,
    )
-    expect(
+    expect(windowsCall).toBeDefined()
      execFileNoThrowMock.mock.calls.some(([cmd]) => cmd === 'powershell'),
    ).toBe(true)
  })
  test('passes Windows clipboard text through a UTF-8 temp file instead of stdin', async () => {
@@ -97,6 +98,7 @@ describe('Windows clipboard fallback', () => {
 describe('clipboard path behavior remains stable', () => {
  beforeEach(() => {
    installOscMocks()
    execFileNoThrowMock.mockClear()
    process.env = { ...originalEnv }
    delete process.env['SSH_CONNECTION']
--- a/src/services/api/openaiErrorClassification.ts
+++ b/src/services/api/openaiErrorClassification.ts
@@ -320,10 +320,7 @@ export function classifyOpenAIHttpFailure(options: {
    }
  }
-  if (
+  if (options.status >= 400 && isMalformedProviderResponse(body)) {
    (options.status >= 200 && options.status < 300 && isMalformedProviderResponse(body)) ||
    (options.status >= 400 && isMalformedProviderResponse(body))
  ) {
    return {
      source: 'http',
      category: 'malformed_provider_response',
--- a/src/services/api/openaiShim.diagnostics.test.ts
+++ b/src/services/api/openaiShim.diagnostics.test.ts
@@ -117,3 +117,170 @@ test('redacts credentials in transport diagnostic URL logs', async () => {
  expect(logLine).not.toContain('user:supersecret')
  expect(logLine).not.toContain('supersecret@')
 })
 test('logs self-heal localhost fallback with redacted from/to URLs', async () => {
  const debugSpy = mock(() => {})
  mock.module('../../utils/debug.js', () => ({
    logForDebugging: debugSpy,
  }))
  const nonce = `${Date.now()}-${Math.random()}`
  const { createOpenAIShimClient } = await import(`./openaiShim.ts?ts=${nonce}`)
  process.env.OPENAI_BASE_URL = 'http://user:supersecret@localhost:11434/v1'
  process.env.OPENAI_API_KEY = 'supersecret'
  globalThis.fetch = mock(async (input: string | Request) => {
    const url = typeof input === 'string' ? input : input.url
    if (url.includes('localhost')) {
      throw Object.assign(new TypeError('fetch failed'), {
        code: 'ENOTFOUND',
      })
    }
    return new Response(
      JSON.stringify({
        id: 'chatcmpl-1',
        model: 'qwen2.5-coder:7b',
        choices: [
          {
            message: {
              role: 'assistant',
              content: 'ok',
            },
            finish_reason: 'stop',
          },
        ],
        usage: {
          prompt_tokens: 5,
          completion_tokens: 2,
          total_tokens: 7,
        },
      }),
      {
        status: 200,
        headers: {
          'Content-Type': 'application/json',
        },
      },
    )
  }) as typeof globalThis.fetch
  const client = createOpenAIShimClient({}) as {
    beta: {
      messages: {
        create: (params: Record<string, unknown>) => Promise<unknown>
      }
    }
  }
  await expect(
    client.beta.messages.create({
      model: 'qwen2.5-coder:7b',
      messages: [{ role: 'user', content: 'hello' }],
      max_tokens: 64,
      stream: false,
    }),
  ).resolves.toBeDefined()
  const fallbackLog = debugSpy.mock.calls.find(call =>
    typeof call?.[0] === 'string' &&
    call[0].includes('self-heal retry reason=localhost_resolution_failed'),
  )
  expect(fallbackLog).toBeDefined()
  const logLine = String(fallbackLog?.[0])
  expect(logLine).toContain('from=http://redacted:redacted@localhost:11434/v1/chat/completions')
  expect(logLine).toContain('to=http://redacted:redacted@127.0.0.1:11434/v1/chat/completions')
  expect(logLine).not.toContain('supersecret')
 })
 test('logs self-heal toolless retry for local tool-call incompatibility', async () => {
  const debugSpy = mock(() => {})
  mock.module('../../utils/debug.js', () => ({
    logForDebugging: debugSpy,
  }))
  const nonce = `${Date.now()}-${Math.random()}`
  const { createOpenAIShimClient } = await import(`./openaiShim.ts?ts=${nonce}`)
  process.env.OPENAI_BASE_URL = 'http://localhost:11434/v1'
  process.env.OPENAI_API_KEY = 'ollama'
  let callCount = 0
  globalThis.fetch = mock(async () => {
    callCount += 1
    if (callCount === 1) {
      return new Response('tool_calls are not supported', {
        status: 400,
        headers: {
          'Content-Type': 'text/plain',
        },
      })
    }
    return new Response(
      JSON.stringify({
        id: 'chatcmpl-1',
        model: 'qwen2.5-coder:7b',
        choices: [
          {
            message: {
              role: 'assistant',
              content: 'ok',
            },
            finish_reason: 'stop',
          },
        ],
        usage: {
          prompt_tokens: 7,
          completion_tokens: 3,
          total_tokens: 10,
        },
      }),
      {
        status: 200,
        headers: {
          'Content-Type': 'application/json',
        },
      },
    )
  }) as typeof globalThis.fetch
  const client = createOpenAIShimClient({}) as {
    beta: {
      messages: {
        create: (params: Record<string, unknown>) => Promise<unknown>
      }
    }
  }
  await expect(
    client.beta.messages.create({
      model: 'qwen2.5-coder:7b',
      messages: [{ role: 'user', content: 'hello' }],
      tools: [
        {
          name: 'Read',
          description: 'Read file',
          input_schema: {
            type: 'object',
            properties: {
              filePath: { type: 'string' },
            },
            required: ['filePath'],
          },
        },
      ],
      max_tokens: 64,
      stream: false,
    }),
  ).resolves.toBeDefined()
  const fallbackLog = debugSpy.mock.calls.find(call =>
    typeof call?.[0] === 'string' &&
    call[0].includes('self-heal retry reason=tool_call_incompatible mode=toolless'),
  )
  expect(fallbackLog).toBeDefined()
  expect(fallbackLog?.[1]).toEqual({ level: 'warn' })
 })
--- a/src/services/api/openaiShim.test.ts
+++ b/src/services/api/openaiShim.test.ts
@@ -2931,6 +2931,204 @@ test('classifies chat-completions endpoint 404 failures with endpoint_not_found
    }),
  ).rejects.toThrow('openai_category=endpoint_not_found')
 })
 test('self-heals localhost resolution failures by retrying local loopback base URL', async () => {
  process.env.OPENAI_BASE_URL = 'http://localhost:11434/v1'
  const requestUrls: string[] = []
  globalThis.fetch = (async (input, _init) => {
    const url = typeof input === 'string' ? input : input.url
    requestUrls.push(url)
    if (url.includes('localhost')) {
      const error = Object.assign(new TypeError('fetch failed'), {
        code: 'ENOTFOUND',
      })
      throw error
    }
    return new Response(
      JSON.stringify({
        id: 'chatcmpl-1',
        model: 'qwen2.5-coder:7b',
        choices: [
          {
            message: {
              role: 'assistant',
              content: 'hello from loopback',
            },
            finish_reason: 'stop',
          },
        ],
        usage: {
          prompt_tokens: 4,
          completion_tokens: 3,
          total_tokens: 7,
        },
      }),
      {
        status: 200,
        headers: {
          'Content-Type': 'application/json',
        },
      },
    )
  }) as FetchType
  const client = createOpenAIShimClient({}) as OpenAIShimClient
  await expect(
    client.beta.messages.create({
      model: 'qwen2.5-coder:7b',
      messages: [{ role: 'user', content: 'hello' }],
      max_tokens: 64,
      stream: false,
    }),
  ).resolves.toBeDefined()
  expect(requestUrls[0]).toBe('http://localhost:11434/v1/chat/completions')
  expect(requestUrls).toContain('http://127.0.0.1:11434/v1/chat/completions')
 })
 test('self-heals local endpoint_not_found by retrying with /v1 base URL', async () => {
  process.env.OPENAI_BASE_URL = 'http://localhost:11434'
  const requestUrls: string[] = []
  globalThis.fetch = (async (input, _init) => {
    const url = typeof input === 'string' ? input : input.url
    requestUrls.push(url)
    if (url === 'http://localhost:11434/chat/completions') {
      return new Response('Not Found', {
        status: 404,
        headers: {
          'Content-Type': 'text/plain',
        },
      })
    }
    return new Response(
      JSON.stringify({
        id: 'chatcmpl-1',
        model: 'qwen2.5-coder:7b',
        choices: [
          {
            message: {
              role: 'assistant',
              content: 'hello from /v1',
            },
            finish_reason: 'stop',
          },
        ],
        usage: {
          prompt_tokens: 5,
          completion_tokens: 2,
          total_tokens: 7,
        },
      }),
      {
        status: 200,
        headers: {
          'Content-Type': 'application/json',
        },
      },
    )
  }) as FetchType
  const client = createOpenAIShimClient({}) as OpenAIShimClient
  await expect(
    client.beta.messages.create({
      model: 'qwen2.5-coder:7b',
      messages: [{ role: 'user', content: 'hello' }],
      max_tokens: 64,
      stream: false,
    }),
  ).resolves.toBeDefined()
  expect(requestUrls).toEqual([
    'http://localhost:11434/chat/completions',
    'http://localhost:11434/v1/chat/completions',
  ])
 })
 test('self-heals tool-call incompatibility by retrying local Ollama requests without tools', async () => {
  process.env.OPENAI_BASE_URL = 'http://localhost:11434/v1'
  const requestBodies: Array<Record<string, unknown>> = []
  globalThis.fetch = (async (_input, init) => {
    const requestBody = JSON.parse(String(init?.body)) as Record<string, unknown>
    requestBodies.push(requestBody)
    if (requestBodies.length === 1) {
      return new Response('tool_calls are not supported', {
        status: 400,
        headers: {
          'Content-Type': 'text/plain',
        },
      })
    }
    return new Response(
      JSON.stringify({
        id: 'chatcmpl-1',
        model: 'qwen2.5-coder:7b',
        choices: [
          {
            message: {
              role: 'assistant',
              content: 'fallback without tools',
            },
            finish_reason: 'stop',
          },
        ],
        usage: {
          prompt_tokens: 8,
          completion_tokens: 4,
          total_tokens: 12,
        },
      }),
      {
        status: 200,
        headers: {
          'Content-Type': 'application/json',
        },
      },
    )
  }) as FetchType
  const client = createOpenAIShimClient({}) as OpenAIShimClient
  await expect(
    client.beta.messages.create({
      model: 'qwen2.5-coder:7b',
      messages: [{ role: 'user', content: 'hello' }],
      tools: [
        {
          name: 'Read',
          description: 'Read a file',
          input_schema: {
            type: 'object',
            properties: {
              filePath: { type: 'string' },
            },
            required: ['filePath'],
          },
        },
      ],
      max_tokens: 64,
      stream: false,
    }),
  ).resolves.toBeDefined()
  expect(requestBodies).toHaveLength(2)
  expect(Array.isArray(requestBodies[0]?.tools)).toBe(true)
  expect(requestBodies[0]?.tool_choice).toBeUndefined()
  expect(
    requestBodies[1]?.tools === undefined ||
      (Array.isArray(requestBodies[1]?.tools) && requestBodies[1]?.tools.length === 0),
  ).toBe(true)
  expect(requestBodies[1]?.tool_choice).toBeUndefined()
 })
 test('preserves valid tool_result and drops orphan tool_result', async () => {
  let requestBody: Record<string, unknown> | undefined
@@ -2999,7 +3197,7 @@ test('preserves valid tool_result and drops orphan tool_result', async () => {
          {
            role: 'user',
            content: 'What happened?',
-          }
+          },
        ],
      },
    ],
@@ -3008,14 +3206,14 @@ test('preserves valid tool_result and drops orphan tool_result', async () => {
  })
  const messages = requestBody?.messages as Array<Record<string, unknown>>
-  
+
  // Should have: system, user, assistant (tool_use), tool (valid_call_1), user
  // Should NOT have: tool (orphan_call_2)
-  
+
  const toolMessages = messages.filter(m => m.role === 'tool')
  expect(toolMessages.length).toBe(1)
  expect(toolMessages[0].tool_call_id).toBe('valid_call_1')
-  
+
  const orphanMessage = toolMessages.find(m => m.tool_call_id === 'orphan_call_2')
  expect(orphanMessage).toBeUndefined()
 })
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -48,10 +48,12 @@ import {
 } from './codexShim.js'
 import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
 import {
  getLocalProviderRetryBaseUrls,
  getGithubEndpointType,
  isLocalProviderUrl,
  resolveRuntimeCodexCredentials,
  resolveProviderRequest,
-  getGithubEndpointType,
+  shouldAttemptLocalToollessRetry,
 } from './providerConfig.js'
 import {
  buildOpenAICompatibilityErrorMessage,
@@ -1427,48 +1429,95 @@ class OpenAIShimMessages {
      headers['X-GitHub-Api-Version'] = '2022-11-28'
    }
-    // Build the chat completions URL
+    const buildChatCompletionsUrl = (baseUrl: string): string => {
-    // Azure Cognitive Services / Azure OpenAI require a deployment-specific path
+      // Azure Cognitive Services / Azure OpenAI require a deployment-specific
-    // and an api-version query parameter.
+      // path and an api-version query parameter.
-    // Standard format: {base}/openai/deployments/{model}/chat/completions?api-version={version}
+      if (isAzure) {
-    // Non-Azure: {base}/chat/completions
+        const apiVersion = process.env.AZURE_OPENAI_API_VERSION ?? '2024-12-01-preview'
-    let chatCompletionsUrl: string
+        const deployment = request.resolvedModel ?? process.env.OPENAI_MODEL ?? 'gpt-4o'
-    if (isAzure) {
+
-      const apiVersion = process.env.AZURE_OPENAI_API_VERSION ?? '2024-12-01-preview'
+        // If base URL already contains /deployments/, use it as-is with api-version.
-      const deployment = request.resolvedModel ?? process.env.OPENAI_MODEL ?? 'gpt-4o'
+        if (/\/deployments\//i.test(baseUrl)) {
-      // If base URL already contains /deployments/, use it as-is with api-version
+          const normalizedBase = baseUrl.replace(/\/+$/, '')
-      if (/\/deployments\//i.test(request.baseUrl)) {
+          return `${normalizedBase}/chat/completions?api-version=${apiVersion}`
-        const base = request.baseUrl.replace(/\/+$/, '')
+        }
-        chatCompletionsUrl = `${base}/chat/completions?api-version=${apiVersion}`
+
-      } else {
+        // Strip trailing /v1 or /openai/v1 if present, then build Azure path.
-        // Strip trailing /v1 or /openai/v1 if present, then build Azure path
+        const normalizedBase = baseUrl
-        const base = request.baseUrl.replace(/\/(openai\/)?v1\/?$/, '').replace(/\/+$/, '')
+          .replace(/\/(openai\/)?v1\/?$/, '')
-        chatCompletionsUrl = `${base}/openai/deployments/${deployment}/chat/completions?api-version=${apiVersion}`
+          .replace(/\/+$/, '')
        return `${normalizedBase}/openai/deployments/${deployment}/chat/completions?api-version=${apiVersion}`
      }
-    } else {
+
-      chatCompletionsUrl = `${request.baseUrl}/chat/completions`
+      return `${baseUrl}/chat/completions`
    }
-    const fetchInit = {
+    const localRetryBaseUrls = isLocal
      ? getLocalProviderRetryBaseUrls(request.baseUrl)
      : []
    let activeBaseUrl = request.baseUrl
    let chatCompletionsUrl = buildChatCompletionsUrl(activeBaseUrl)
    const attemptedLocalBaseUrls = new Set<string>([activeBaseUrl])
    let didRetryWithoutTools = false
    const promoteNextLocalBaseUrl = (
      reason: 'endpoint_not_found' | 'localhost_resolution_failed',
    ): boolean => {
      for (const candidateBaseUrl of localRetryBaseUrls) {
        if (attemptedLocalBaseUrls.has(candidateBaseUrl)) {
          continue
        }
        const previousUrl = chatCompletionsUrl
        attemptedLocalBaseUrls.add(candidateBaseUrl)
        activeBaseUrl = candidateBaseUrl
        chatCompletionsUrl = buildChatCompletionsUrl(activeBaseUrl)
        logForDebugging(
          `[OpenAIShim] self-heal retry reason=${reason} method=POST from=${redactUrlForDiagnostics(previousUrl)} to=${redactUrlForDiagnostics(chatCompletionsUrl)} model=${request.resolvedModel}`,
          { level: 'warn' },
        )
        return true
      }
      return false
    }
    let serializedBody = JSON.stringify(body)
    const refreshSerializedBody = (): void => {
      serializedBody = JSON.stringify(body)
    }
    const buildFetchInit = () => ({
      method: 'POST' as const,
      headers,
-      body: JSON.stringify(body),
+      body: serializedBody,
      signal: options?.signal,
-    }
+    })
-    const maxAttempts = isGithub ? GITHUB_429_MAX_RETRIES : 1
+    const maxSelfHealAttempts = isLocal
      ? localRetryBaseUrls.length + 1
      : 0
    const maxAttempts = (isGithub ? GITHUB_429_MAX_RETRIES : 1) + maxSelfHealAttempts
    const throwClassifiedTransportError = (
      error: unknown,
      requestUrl: string,
      preclassifiedFailure?: ReturnType<typeof classifyOpenAINetworkFailure>,
    ): never => {
      if (options?.signal?.aborted) {
        throw error
      }
-      const failure = classifyOpenAINetworkFailure(error, {
+      const failure =
-        url: requestUrl,
+        preclassifiedFailure ??
-      })
+        classifyOpenAINetworkFailure(error, {
          url: requestUrl,
        })
      const redactedUrl = redactUrlForDiagnostics(requestUrl)
      const safeMessage =
        redactSecretValueForDisplay(
@@ -1499,11 +1548,14 @@ class OpenAIShimMessages {
      responseHeaders: Headers,
      requestUrl: string,
      rateHint = '',
      preclassifiedFailure?: ReturnType<typeof classifyOpenAIHttpFailure>,
    ): never => {
-      const failure = classifyOpenAIHttpFailure({
+      const failure =
-        status,
+        preclassifiedFailure ??
-        body: errorBody,
+        classifyOpenAIHttpFailure({
-      })
+          status,
          body: errorBody,
        })
      const redactedUrl = redactUrlForDiagnostics(requestUrl)
      logForDebugging(
@@ -1525,10 +1577,13 @@ class OpenAIShimMessages {
    let response: Response | undefined
    for (let attempt = 0; attempt < maxAttempts; attempt++) {
      try {
-        response = await fetchWithProxyRetry(chatCompletionsUrl, fetchInit)
+        response = await fetchWithProxyRetry(
          chatCompletionsUrl,
          buildFetchInit(),
        )
      } catch (error) {
        const isAbortError =
-          fetchInit.signal?.aborted === true ||
+          options?.signal?.aborted === true ||
          (typeof DOMException !== 'undefined' &&
            error instanceof DOMException &&
            error.name === 'AbortError') ||
@@ -1541,7 +1596,19 @@ class OpenAIShimMessages {
          throw error
        }
-        throwClassifiedTransportError(error, chatCompletionsUrl)
+        const failure = classifyOpenAINetworkFailure(error, {
          url: chatCompletionsUrl,
        })
        if (
          isLocal &&
          failure.category === 'localhost_resolution_failed' &&
          promoteNextLocalBaseUrl('localhost_resolution_failed')
        ) {
          continue
        }
        throwClassifiedTransportError(error, chatCompletionsUrl, failure)
      }
      if (response.ok) {
@@ -1633,6 +1700,10 @@ class OpenAIShimMessages {
            return responsesResponse
          }
          const responsesErrorBody = await responsesResponse.text().catch(() => 'unknown error')
          const responsesFailure = classifyOpenAIHttpFailure({
            status: responsesResponse.status,
            body: responsesErrorBody,
          })
          let responsesErrorResponse: object | undefined
          try { responsesErrorResponse = JSON.parse(responsesErrorBody) } catch { /* raw text */ }
          throwClassifiedHttpError(
@@ -1641,10 +1712,49 @@ class OpenAIShimMessages {
            responsesErrorResponse,
            responsesResponse.headers,
            responsesUrl,
            '',
            responsesFailure,
          )
        }
      }
      const failure = classifyOpenAIHttpFailure({
        status: response.status,
        body: errorBody,
      })
      if (
        isLocal &&
        failure.category === 'endpoint_not_found' &&
        promoteNextLocalBaseUrl('endpoint_not_found')
      ) {
        continue
      }
      const hasToolsPayload =
        Array.isArray(body.tools) &&
        body.tools.length > 0
      if (
        !didRetryWithoutTools &&
        failure.category === 'tool_call_incompatible' &&
        shouldAttemptLocalToollessRetry({
          baseUrl: activeBaseUrl,
          hasTools: hasToolsPayload,
        })
      ) {
        didRetryWithoutTools = true
        delete body.tools
        delete body.tool_choice
        refreshSerializedBody()
        logForDebugging(
          `[OpenAIShim] self-heal retry reason=tool_call_incompatible mode=toolless method=POST url=${redactUrlForDiagnostics(chatCompletionsUrl)} model=${request.resolvedModel}`,
          { level: 'warn' },
        )
        continue
      }
      let errorResponse: object | undefined
      try { errorResponse = JSON.parse(errorBody) } catch { /* raw text */ }
      throwClassifiedHttpError(
@@ -1654,6 +1764,7 @@ class OpenAIShimMessages {
        response.headers as unknown as Headers,
        chatCompletionsUrl,
        rateHint,
        failure,
      )
    }
--- a/src/services/api/providerConfig.local.test.ts
+++ b/src/services/api/providerConfig.local.test.ts
@@ -2,8 +2,10 @@ import { afterEach, expect, test } from 'bun:test'
 import {
  getAdditionalModelOptionsCacheScope,
  getLocalProviderRetryBaseUrls,
  isLocalProviderUrl,
  resolveProviderRequest,
  shouldAttemptLocalToollessRetry,
 } from './providerConfig.js'
 const originalEnv = {
@@ -83,3 +85,42 @@ test('skips local model cache scope for remote openai-compatible providers', ()
  expect(getAdditionalModelOptionsCacheScope()).toBeNull()
 })
 test('derives local retry base URLs with /v1 and loopback fallback candidates', () => {
  expect(getLocalProviderRetryBaseUrls('http://localhost:11434')).toEqual([
    'http://localhost:11434/v1',
    'http://127.0.0.1:11434',
    'http://127.0.0.1:11434/v1',
  ])
 })
 test('does not derive local retry base URLs for remote providers', () => {
  expect(getLocalProviderRetryBaseUrls('https://api.openai.com/v1')).toEqual([])
 })
 test('enables local toolless retry for likely Ollama endpoints with tools', () => {
  expect(
    shouldAttemptLocalToollessRetry({
      baseUrl: 'http://localhost:11434/v1',
      hasTools: true,
    }),
  ).toBe(true)
 })
 test('disables local toolless retry when no tools are present', () => {
  expect(
    shouldAttemptLocalToollessRetry({
      baseUrl: 'http://localhost:11434/v1',
      hasTools: false,
    }),
  ).toBe(false)
 })
 test('disables local toolless retry for non-Ollama local endpoints', () => {
  expect(
    shouldAttemptLocalToollessRetry({
      baseUrl: 'http://localhost:1234/v1',
      hasTools: true,
    }),
  ).toBe(false)
 })
--- a/src/services/api/providerConfig.ts
+++ b/src/services/api/providerConfig.ts
@@ -305,6 +305,101 @@ export function isLocalProviderUrl(baseUrl: string | undefined): boolean {
  }
 }
 function trimTrailingSlash(value: string): string {
  return value.replace(/\/+$/, '')
 }
 function normalizePathWithV1(pathname: string): string {
  const trimmed = trimTrailingSlash(pathname)
  if (!trimmed || trimmed === '/') {
    return '/v1'
  }
  if (trimmed.toLowerCase().endsWith('/v1')) {
    return trimmed
  }
  return `${trimmed}/v1`
 }
 function isLikelyOllamaEndpoint(baseUrl: string): boolean {
  try {
    const parsed = new URL(baseUrl)
    const hostname = parsed.hostname.toLowerCase()
    const pathname = parsed.pathname.toLowerCase()
    if (parsed.port === '11434') {
      return true
    }
    return (
      hostname.includes('ollama') ||
      pathname.includes('ollama')
    )
  } catch {
    return false
  }
 }
 export function getLocalProviderRetryBaseUrls(baseUrl: string): string[] {
  if (!isLocalProviderUrl(baseUrl)) {
    return []
  }
  try {
    const parsed = new URL(baseUrl)
    const original = trimTrailingSlash(parsed.toString())
    const seen = new Set<string>([original])
    const candidates: string[] = []
    const addCandidate = (hostname: string, pathname: string): void => {
      const next = new URL(parsed.toString())
      next.hostname = hostname
      next.pathname = pathname
      next.search = ''
      next.hash = ''
      const normalized = trimTrailingSlash(next.toString())
      if (seen.has(normalized)) {
        return
      }
      seen.add(normalized)
      candidates.push(normalized)
    }
    const v1Pathname = normalizePathWithV1(parsed.pathname)
    if (v1Pathname !== trimTrailingSlash(parsed.pathname)) {
      addCandidate(parsed.hostname, v1Pathname)
    }
    const hostname = parsed.hostname.toLowerCase().replace(/^\[|\]$/g, '')
    if (hostname === 'localhost' || hostname === '::1') {
      addCandidate('127.0.0.1', parsed.pathname || '/')
      addCandidate('127.0.0.1', v1Pathname)
    }
    return candidates
  } catch {
    return []
  }
 }
 export function shouldAttemptLocalToollessRetry(options: {
  baseUrl: string
  hasTools: boolean
 }): boolean {
  if (!options.hasTools) {
    return false
  }
  if (!isLocalProviderUrl(options.baseUrl)) {
    return false
  }
  return isLikelyOllamaEndpoint(options.baseUrl)
 }
 export function isCodexBaseUrl(baseUrl: string | undefined): boolean {
  if (!baseUrl) return false
  try {
@@ -412,6 +507,9 @@ export function resolveProviderRequest(options?: {
    ? normalizedGeminiEnvBaseUrl
    : asNamedEnvUrl(process.env.OPENAI_BASE_URL, 'OPENAI_BASE_URL')
  // In Mistral mode, a literal "undefined" MISTRAL_BASE_URL is treated as
  // misconfiguration and falls back to OPENAI_API_BASE, then
  // DEFAULT_MISTRAL_BASE_URL for a safe default endpoint.
  const fallbackEnvBaseUrl = isMistralMode
    ? (primaryEnvBaseUrl === undefined
      ? asNamedEnvUrl(process.env.OPENAI_API_BASE, 'OPENAI_API_BASE') ?? DEFAULT_MISTRAL_BASE_URL
--- a/src/services/autoFix/autoFixRunner.test.ts
+++ b/src/services/autoFix/autoFixRunner.test.ts
@@ -70,7 +70,7 @@ describe('runAutoFixCheck', () => {
  test('handles timeout gracefully', async () => {
    const result = await runAutoFixCheck({
-      lint: 'sleep 10',
+      lint: 'node -e "setTimeout(() => {}, 10000)"',
      timeout: 100,
      cwd: '/tmp',
--- a/src/services/autoFix/autoFixRunner.ts
+++ b/src/services/autoFix/autoFixRunner.ts
@@ -46,14 +46,31 @@ async function runCommand(
    const killTree = () => {
      try {
-        if (!isWindows && proc.pid) {
+        if (isWindows && proc.pid) {
          // shell=true on Windows can leave child commands running unless we
          // terminate the full process tree.
          const killer = spawn('taskkill', ['/pid', String(proc.pid), '/T', '/F'], {
            windowsHide: true,
            stdio: 'ignore',
          })
          killer.unref()
          return
        }
        if (proc.pid) {
          // Kill the entire process group
          process.kill(-proc.pid, 'SIGTERM')
-        } else {
+          return
          proc.kill('SIGTERM')
        }
        proc.kill('SIGTERM')
      } catch {
-        // Process may have already exited
+        // Process may have already exited; fallback to direct child kill.
        try {
          proc.kill('SIGTERM')
        } catch {
          // Ignore final fallback errors.
        }
      }
    }
--- a/src/services/wiki/init.test.ts
+++ b/src/services/wiki/init.test.ts
@@ -26,10 +26,10 @@ test('initializeWiki creates the expected wiki scaffold', async () => {
  expect(result.alreadyExisted).toBe(false)
  expect(result.createdFiles).toEqual([
-    '.openclaude/wiki/schema.md',
+    join('.openclaude', 'wiki', 'schema.md'),
-    '.openclaude/wiki/index.md',
+    join('.openclaude', 'wiki', 'index.md'),
-    '.openclaude/wiki/log.md',
+    join('.openclaude', 'wiki', 'log.md'),
-    '.openclaude/wiki/pages/architecture.md',
+    join('.openclaude', 'wiki', 'pages', 'architecture.md'),
  ])
  expect(await readFile(paths.schemaFile, 'utf8')).toContain(
    '# OpenClaude Wiki Schema',
--- a/src/utils/localInstaller.ts
+++ b/src/utils/localInstaller.ts
@@ -44,9 +44,10 @@ function getCandidateLocalBinaryPaths(localInstallDir: string): string[] {
 }
 export function isManagedLocalInstallationPath(execPath: string): boolean {
  const normalizedExecPath = execPath.replace(/\\+/g, '/')
  return (
-    execPath.includes('/.openclaude/local/node_modules/') ||
+    normalizedExecPath.includes('/.openclaude/local/node_modules/') ||
-    execPath.includes('/.claude/local/node_modules/')
+    normalizedExecPath.includes('/.claude/local/node_modules/')
  )
 }
--- a/src/utils/providerDiscovery.test.ts
+++ b/src/utils/providerDiscovery.test.ts
@@ -1,9 +1,9 @@
 import { afterEach, expect, mock, test } from 'bun:test'
-import {
+async function loadProviderDiscoveryModule() {
-  getLocalOpenAICompatibleProviderLabel,
+  // @ts-expect-error cache-busting query string for Bun module mocks
-  listOpenAICompatibleModels,
+  return import(`./providerDiscovery.js?ts=${Date.now()}-${Math.random()}`)
-} from './providerDiscovery.js'
+}
 const originalFetch = globalThis.fetch
 const originalEnv = {
@@ -16,6 +16,8 @@ afterEach(() => {
 })
 test('lists models from a local openai-compatible /models endpoint', async () => {
  const { listOpenAICompatibleModels } = await loadProviderDiscoveryModule()
  globalThis.fetch = mock((input, init) => {
    const url = typeof input === 'string' ? input : input.url
    expect(url).toBe('http://localhost:1234/v1/models')
@@ -47,6 +49,8 @@ test('lists models from a local openai-compatible /models endpoint', async () =>
 })
 test('returns null when a local openai-compatible /models request fails', async () => {
  const { listOpenAICompatibleModels } = await loadProviderDiscoveryModule()
  globalThis.fetch = mock(() =>
    Promise.resolve(new Response('not available', { status: 503 })),
  ) as typeof globalThis.fetch
@@ -56,13 +60,19 @@ test('returns null when a local openai-compatible /models request fails', async
  ).resolves.toBeNull()
 })
-test('detects LM Studio from the default localhost port', () => {
+test('detects LM Studio from the default localhost port', async () => {
  const { getLocalOpenAICompatibleProviderLabel } =
    await loadProviderDiscoveryModule()
  expect(getLocalOpenAICompatibleProviderLabel('http://localhost:1234/v1')).toBe(
    'LM Studio',
  )
 })
-test('detects common local openai-compatible providers by hostname', () => {
+test('detects common local openai-compatible providers by hostname', async () => {
  const { getLocalOpenAICompatibleProviderLabel } =
    await loadProviderDiscoveryModule()
  expect(
    getLocalOpenAICompatibleProviderLabel('http://localai.local:8080/v1'),
  ).toBe('LocalAI')
@@ -71,8 +81,212 @@ test('detects common local openai-compatible providers by hostname', () => {
  ).toBe('vLLM')
 })
-test('falls back to a generic local openai-compatible label', () => {
+test('falls back to a generic local openai-compatible label', async () => {
  const { getLocalOpenAICompatibleProviderLabel } =
    await loadProviderDiscoveryModule()
  expect(
    getLocalOpenAICompatibleProviderLabel('http://127.0.0.1:8080/v1'),
  ).toBe('Local OpenAI-compatible')
 })
 test('ollama generation readiness reports unreachable when tags endpoint is down', async () => {
  const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
  const calledUrls: string[] = []
  globalThis.fetch = mock(input => {
    const url = typeof input === 'string' ? input : input.url
    calledUrls.push(url)
    return Promise.resolve(new Response('not available', { status: 503 }))
  }) as typeof globalThis.fetch
  await expect(
    probeOllamaGenerationReadiness({
      baseUrl: 'http://localhost:11434',
    }),
  ).resolves.toMatchObject({
    state: 'unreachable',
    models: [],
  })
  expect(calledUrls).toEqual([
    'http://localhost:11434/api/tags',
  ])
 })
 test('ollama generation readiness reports no models when server is reachable', async () => {
  const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
  const calledUrls: string[] = []
  globalThis.fetch = mock(input => {
    const url = typeof input === 'string' ? input : input.url
    calledUrls.push(url)
    return Promise.resolve(
      new Response(JSON.stringify({ models: [] }), {
        status: 200,
        headers: { 'Content-Type': 'application/json' },
      }),
    )
  }) as typeof globalThis.fetch
  await expect(
    probeOllamaGenerationReadiness({
      baseUrl: 'http://localhost:11434',
    }),
  ).resolves.toMatchObject({
    state: 'no_models',
    models: [],
  })
  expect(calledUrls).toEqual([
    'http://localhost:11434/api/tags',
  ])
 })
 test('ollama generation readiness reports generation_failed when requested model is missing', async () => {
  const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
  const calledUrls: string[] = []
  globalThis.fetch = mock(input => {
    const url = typeof input === 'string' ? input : input.url
    calledUrls.push(url)
    return Promise.resolve(
      new Response(
        JSON.stringify({
          models: [{ name: 'llama3.1:8b', size: 1024 }],
        }),
        {
          status: 200,
          headers: { 'Content-Type': 'application/json' },
        },
      ),
    )
  }) as typeof globalThis.fetch
  await expect(
    probeOllamaGenerationReadiness({
      baseUrl: 'http://localhost:11434',
      model: 'qwen2.5-coder:7b',
    }),
  ).resolves.toMatchObject({
    state: 'generation_failed',
    probeModel: 'qwen2.5-coder:7b',
    detail: 'requested model not installed: qwen2.5-coder:7b',
  })
  expect(calledUrls).toEqual(['http://localhost:11434/api/tags'])
 })
 test('ollama generation readiness reports generation failures when chat probe fails', async () => {
  const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
  globalThis.fetch = mock(input => {
    const url = typeof input === 'string' ? input : input.url
    if (url.endsWith('/api/tags')) {
      return Promise.resolve(
        new Response(
          JSON.stringify({
            models: [{ name: 'qwen2.5-coder:7b', size: 42 }],
          }),
          {
            status: 200,
            headers: { 'Content-Type': 'application/json' },
          },
        ),
      )
    }
    return Promise.resolve(new Response('model not found', { status: 404 }))
  }) as typeof globalThis.fetch
  await expect(
    probeOllamaGenerationReadiness({
      baseUrl: 'http://localhost:11434',
      model: 'qwen2.5-coder:7b',
    }),
  ).resolves.toMatchObject({
    state: 'generation_failed',
    probeModel: 'qwen2.5-coder:7b',
  })
 })
 test('ollama generation readiness reports generation_failed when chat probe returns invalid JSON', async () => {
  const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
  globalThis.fetch = mock(input => {
    const url = typeof input === 'string' ? input : input.url
    if (url.endsWith('/api/tags')) {
      return Promise.resolve(
        new Response(
          JSON.stringify({
            models: [{ name: 'llama3.1:8b', size: 1024 }],
          }),
          {
            status: 200,
            headers: { 'Content-Type': 'application/json' },
          },
        ),
      )
    }
    return Promise.resolve(
      new Response('<html>proxy error</html>', {
        status: 200,
        headers: { 'Content-Type': 'text/html' },
      }),
    )
  }) as typeof globalThis.fetch
  await expect(
    probeOllamaGenerationReadiness({
      baseUrl: 'http://localhost:11434',
    }),
  ).resolves.toMatchObject({
    state: 'generation_failed',
    probeModel: 'llama3.1:8b',
    detail: 'invalid JSON response',
  })
 })
 test('ollama generation readiness reports ready when chat probe succeeds', async () => {
  const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
  globalThis.fetch = mock(input => {
    const url = typeof input === 'string' ? input : input.url
    if (url.endsWith('/api/tags')) {
      return Promise.resolve(
        new Response(
          JSON.stringify({
            models: [{ name: 'llama3.1:8b', size: 1024 }],
          }),
          {
            status: 200,
            headers: { 'Content-Type': 'application/json' },
          },
        ),
      )
    }
    return Promise.resolve(
      new Response(
        JSON.stringify({
          message: { role: 'assistant', content: 'OK' },
          done: true,
        }),
        {
          status: 200,
          headers: { 'Content-Type': 'application/json' },
        },
      ),
    )
  }) as typeof globalThis.fetch
  await expect(
    probeOllamaGenerationReadiness({
      baseUrl: 'http://localhost:11434',
    }),
  ).resolves.toMatchObject({
    state: 'ready',
    probeModel: 'llama3.1:8b',
  })
 })
--- a/src/utils/providerDiscovery.ts
+++ b/src/utils/providerDiscovery.ts
@@ -4,6 +4,13 @@ import { DEFAULT_OPENAI_BASE_URL } from '../services/api/providerConfig.js'
 export const DEFAULT_OLLAMA_BASE_URL = 'http://localhost:11434'
 export const DEFAULT_ATOMIC_CHAT_BASE_URL = 'http://127.0.0.1:1337'
 export type OllamaGenerationReadiness = {
  state: 'ready' | 'unreachable' | 'no_models' | 'generation_failed'
  models: OllamaModelDescriptor[]
  probeModel?: string
  detail?: string
 }
 function withTimeoutSignal(timeoutMs: number): {
  signal: AbortSignal
  clear: () => void
@@ -20,6 +27,83 @@ function trimTrailingSlash(value: string): string {
  return value.replace(/\/+$/, '')
 }
 function compactDetail(value: string, maxLength = 180): string {
  const compact = value.trim().replace(/\s+/g, ' ')
  if (!compact) {
    return ''
  }
  if (compact.length <= maxLength) {
    return compact
  }
  return `${compact.slice(0, maxLength)}...`
 }
 type OllamaTagsPayload = {
  models?: Array<{
    name?: string
    size?: number
    details?: {
      family?: string
      families?: string[]
      parameter_size?: string
      quantization_level?: string
    }
  }>
 }
 function normalizeOllamaModels(
  payload: OllamaTagsPayload,
 ): OllamaModelDescriptor[] {
  return (payload.models ?? [])
    .filter(model => Boolean(model.name))
    .map(model => ({
      name: model.name!,
      sizeBytes: typeof model.size === 'number' ? model.size : null,
      family: model.details?.family ?? null,
      families: model.details?.families ?? [],
      parameterSize: model.details?.parameter_size ?? null,
      quantizationLevel: model.details?.quantization_level ?? null,
    }))
 }
 async function fetchOllamaModelsProbe(
  baseUrl?: string,
  timeoutMs = 5000,
 ): Promise<{
  reachable: boolean
  models: OllamaModelDescriptor[]
 }> {
  const { signal, clear } = withTimeoutSignal(timeoutMs)
  try {
    const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
      method: 'GET',
      signal,
    })
    if (!response.ok) {
      return {
        reachable: false,
        models: [],
      }
    }
    const payload = (await response.json().catch(() => ({}))) as OllamaTagsPayload
    return {
      reachable: true,
      models: normalizeOllamaModels(payload),
    }
  } catch {
    return {
      reachable: false,
      models: [],
    }
  } finally {
    clear()
  }
 }
 export function getOllamaApiBaseUrl(baseUrl?: string): string {
  const parsed = new URL(
    baseUrl || process.env.OLLAMA_BASE_URL || DEFAULT_OLLAMA_BASE_URL,
@@ -121,61 +205,15 @@ export function getLocalOpenAICompatibleProviderLabel(baseUrl?: string): string
 }
 export async function hasLocalOllama(baseUrl?: string): Promise<boolean> {
-  const { signal, clear } = withTimeoutSignal(1200)
+  const { reachable } = await fetchOllamaModelsProbe(baseUrl, 1200)
-  try {
+  return reachable
    const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
      method: 'GET',
      signal,
    })
    return response.ok
  } catch {
    return false
  } finally {
    clear()
  }
 }
 export async function listOllamaModels(
  baseUrl?: string,
 ): Promise<OllamaModelDescriptor[]> {
-  const { signal, clear } = withTimeoutSignal(5000)
+  const { models } = await fetchOllamaModelsProbe(baseUrl, 5000)
-  try {
+  return models
    const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
      method: 'GET',
      signal,
    })
    if (!response.ok) {
      return []
    }
    const data = (await response.json()) as {
      models?: Array<{
        name?: string
        size?: number
        details?: {
          family?: string
          families?: string[]
          parameter_size?: string
          quantization_level?: string
        }
      }>
    }
    return (data.models ?? [])
      .filter(model => Boolean(model.name))
      .map(model => ({
        name: model.name!,
        sizeBytes: typeof model.size === 'number' ? model.size : null,
        family: model.details?.family ?? null,
        families: model.details?.families ?? [],
        parameterSize: model.details?.parameter_size ?? null,
        quantizationLevel: model.details?.quantization_level ?? null,
      }))
  } catch {
    return []
  } finally {
    clear()
  }
 }
 export async function listOpenAICompatibleModels(options?: {
@@ -294,3 +332,106 @@ export async function benchmarkOllamaModel(
    clear()
  }
 }
 export async function probeOllamaGenerationReadiness(options?: {
  baseUrl?: string
  model?: string
  timeoutMs?: number
 }): Promise<OllamaGenerationReadiness> {
  const timeoutMs = options?.timeoutMs ?? 8000
  const { reachable, models } = await fetchOllamaModelsProbe(
    options?.baseUrl,
    timeoutMs,
  )
  if (!reachable) {
    return {
      state: 'unreachable',
      models: [],
    }
  }
  if (models.length === 0) {
    return {
      state: 'no_models',
      models: [],
    }
  }
  const requestedModel = options?.model?.trim() || undefined
  if (requestedModel && !models.some(model => model.name === requestedModel)) {
    return {
      state: 'generation_failed',
      models,
      probeModel: requestedModel,
      detail: `requested model not installed: ${requestedModel}`,
    }
  }
  const probeModel = requestedModel ?? models[0]!.name
  const { signal, clear } = withTimeoutSignal(timeoutMs)
  try {
    const response = await fetch(`${getOllamaApiBaseUrl(options?.baseUrl)}/api/chat`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      signal,
      body: JSON.stringify({
        model: probeModel,
        stream: false,
        messages: [{ role: 'user', content: 'Reply with OK.' }],
        options: {
          temperature: 0,
          num_predict: 8,
        },
      }),
    })
    if (!response.ok) {
      const responseBody = await response.text().catch(() => '')
      const detailSuffix = compactDetail(responseBody)
      return {
        state: 'generation_failed',
        models,
        probeModel,
        detail: detailSuffix
          ? `status ${response.status}: ${detailSuffix}`
          : `status ${response.status}`,
      }
    }
    try {
      await response.json()
    } catch {
      return {
        state: 'generation_failed',
        models,
        probeModel,
        detail: 'invalid JSON response',
      }
    }
    return {
      state: 'ready',
      models,
      probeModel,
    }
  } catch (error) {
    const detail =
      error instanceof Error
        ? error.name === 'AbortError'
          ? 'request timed out'
          : error.message
        : String(error)
    return {
      state: 'generation_failed',
      models,
      probeModel,
      detail,
    }
  } finally {
    clear()
  }
 }
--- a/src/utils/settings/settings.ts
+++ b/src/utils/settings/settings.ts
@@ -300,9 +300,9 @@ export function getRelativeSettingsFilePathForSource(
 ): string {
  switch (source) {
    case 'projectSettings':
-      return join('.openclaude', 'settings.json')
+      return '.openclaude/settings.json'
    case 'localSettings':
-      return join('.openclaude', 'settings.local.json')
+      return '.openclaude/settings.local.json'
  }
 }
--- a/src/utils/urlRedaction.test.ts
+++ b/src/utils/urlRedaction.test.ts
@@ -0,0 +1,38 @@
 import { describe, expect, test } from 'bun:test'
 import { redactUrlForDisplay } from './urlRedaction.ts'
 describe('redactUrlForDisplay', () => {
  test('redacts credentials and sensitive query params for valid URLs', () => {
    const redacted = redactUrlForDisplay(
      'http://user:pass@localhost:11434/v1?api_key=secret&foo=bar',
    )
    expect(redacted).toBe(
      'http://redacted:redacted@localhost:11434/v1?api_key=redacted&foo=bar',
    )
  })
  test('redacts token-like query parameter names', () => {
    const redacted = redactUrlForDisplay(
      'https://example.com/v1?x_access_token=abc123&model=qwen2.5-coder',
    )
    expect(redacted).toBe(
      'https://example.com/v1?x_access_token=redacted&model=qwen2.5-coder',
    )
  })
  test('falls back to regex redaction for malformed URLs', () => {
    const redacted = redactUrlForDisplay(
      '//user:pass@localhost:11434?token=abc&mode=test',
    )
    expect(redacted).toBe('//redacted@localhost:11434?token=redacted&mode=test')
  })
  test('keeps non-sensitive URLs unchanged', () => {
    const url = 'http://localhost:11434/v1?model=llama3.1:8b'
    expect(redactUrlForDisplay(url)).toBe(url)
  })
 })
--- a/src/utils/urlRedaction.ts
+++ b/src/utils/urlRedaction.ts
@@ -0,0 +1,48 @@
 const SENSITIVE_URL_QUERY_PARAM_TOKENS = [
  'api_key',
  'apikey',
  'key',
  'token',
  'access_token',
  'refresh_token',
  'signature',
  'sig',
  'secret',
  'password',
  'passwd',
  'pwd',
  'auth',
  'authorization',
 ]
 function shouldRedactUrlQueryParam(name: string): boolean {
  const lower = name.toLowerCase()
  return SENSITIVE_URL_QUERY_PARAM_TOKENS.some(token => lower.includes(token))
 }
 export function redactUrlForDisplay(rawUrl: string): string {
  try {
    const parsed = new URL(rawUrl)
    if (parsed.username) {
      parsed.username = 'redacted'
    }
    if (parsed.password) {
      parsed.password = 'redacted'
    }
    for (const key of parsed.searchParams.keys()) {
      if (shouldRedactUrlQueryParam(key)) {
        parsed.searchParams.set(key, 'redacted')
      }
    }
    return parsed.toString()
  } catch {
    return rawUrl
      .replace(/\/\/[^/@\s]+(?::[^/@\s]*)?@/g, '//redacted@')
      .replace(
        /([?&](?:token|access_token|refresh_token|api_key|apikey|key|password|passwd|pwd|auth|authorization|signature|sig|secret)=)[^&#]*/gi,
        '$1redacted',
      )
  }
 }