feat(api): improve local provider reliability with readiness and self-healing (#738)

* feat(api): classify openai-compatible provider failures

* Update src/services/api/providerConfig.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/services/api/errors.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* feat(api): harden openai-compatible diagnostics and env fallback

* Update src/services/api/openaiShim.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/services/api/openaiShim.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/services/api/errors.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/services/api/errors.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* fix openaiShim duplicate requests and diagnostics

* remove unused url from http failure classifier

* dedupe env diagnostic warnings

* Remove hardcoded URLs from OpenAI error tests

Removed hardcoded URLs from network failure classification tests.

* Update providerConfig.envDiagnostics.test.ts

* fix(openai-shim): return successful responses and restore localhost classifier tests

* Update src/services/api/openaiShim.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/services/api/openaiShim.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/services/api/openaiShim.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* feat(provider): add truthful local generation readiness checks

Implement Phase 2 provider readiness behavior by adding structured Ollama generation probes, wiring setup flows to readiness states, extending system-check with generation readiness output, and updating focused tests.

* feat(api): add local self-healing fallback retries

Implement Phase 3 self-healing behavior for local OpenAI-compatible providers: retry base URL fallbacks for localhost resolution and endpoint mismatches, plus capability-gated toolless retry for tool-incompatible local models; include diagnostics and focused tests.

* fix(api): address review blockers for local provider reliability

* Update src/utils/providerDiscovery.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/services/api/openaiShim.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* fix: harden readiness probes and cross-platform test stability

* fix: refresh toolless retry payload and stabilize osc clipboard test

* fix: harden Ollama readiness parsing and redact provider URLs

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
nehan
2026-04-20 12:24:02 +04:00
committed by GitHub
parent b09972f223
commit 4cb963e660
22 changed files with 1452 additions and 208 deletions

View File

@@ -44,9 +44,10 @@ function getCandidateLocalBinaryPaths(localInstallDir: string): string[] {
}
export function isManagedLocalInstallationPath(execPath: string): boolean {
const normalizedExecPath = execPath.replace(/\\+/g, '/')
return (
execPath.includes('/.openclaude/local/node_modules/') ||
execPath.includes('/.claude/local/node_modules/')
normalizedExecPath.includes('/.openclaude/local/node_modules/') ||
normalizedExecPath.includes('/.claude/local/node_modules/')
)
}

View File

@@ -1,9 +1,9 @@
import { afterEach, expect, mock, test } from 'bun:test'
import {
getLocalOpenAICompatibleProviderLabel,
listOpenAICompatibleModels,
} from './providerDiscovery.js'
async function loadProviderDiscoveryModule() {
// @ts-expect-error cache-busting query string for Bun module mocks
return import(`./providerDiscovery.js?ts=${Date.now()}-${Math.random()}`)
}
const originalFetch = globalThis.fetch
const originalEnv = {
@@ -16,6 +16,8 @@ afterEach(() => {
})
test('lists models from a local openai-compatible /models endpoint', async () => {
const { listOpenAICompatibleModels } = await loadProviderDiscoveryModule()
globalThis.fetch = mock((input, init) => {
const url = typeof input === 'string' ? input : input.url
expect(url).toBe('http://localhost:1234/v1/models')
@@ -47,6 +49,8 @@ test('lists models from a local openai-compatible /models endpoint', async () =>
})
test('returns null when a local openai-compatible /models request fails', async () => {
const { listOpenAICompatibleModels } = await loadProviderDiscoveryModule()
globalThis.fetch = mock(() =>
Promise.resolve(new Response('not available', { status: 503 })),
) as typeof globalThis.fetch
@@ -56,13 +60,19 @@ test('returns null when a local openai-compatible /models request fails', async
).resolves.toBeNull()
})
test('detects LM Studio from the default localhost port', () => {
test('detects LM Studio from the default localhost port', async () => {
const { getLocalOpenAICompatibleProviderLabel } =
await loadProviderDiscoveryModule()
expect(getLocalOpenAICompatibleProviderLabel('http://localhost:1234/v1')).toBe(
'LM Studio',
)
})
test('detects common local openai-compatible providers by hostname', () => {
test('detects common local openai-compatible providers by hostname', async () => {
const { getLocalOpenAICompatibleProviderLabel } =
await loadProviderDiscoveryModule()
expect(
getLocalOpenAICompatibleProviderLabel('http://localai.local:8080/v1'),
).toBe('LocalAI')
@@ -71,8 +81,212 @@ test('detects common local openai-compatible providers by hostname', () => {
).toBe('vLLM')
})
test('falls back to a generic local openai-compatible label', () => {
test('falls back to a generic local openai-compatible label', async () => {
const { getLocalOpenAICompatibleProviderLabel } =
await loadProviderDiscoveryModule()
expect(
getLocalOpenAICompatibleProviderLabel('http://127.0.0.1:8080/v1'),
).toBe('Local OpenAI-compatible')
})
test('ollama generation readiness reports unreachable when tags endpoint is down', async () => {
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
const calledUrls: string[] = []
globalThis.fetch = mock(input => {
const url = typeof input === 'string' ? input : input.url
calledUrls.push(url)
return Promise.resolve(new Response('not available', { status: 503 }))
}) as typeof globalThis.fetch
await expect(
probeOllamaGenerationReadiness({
baseUrl: 'http://localhost:11434',
}),
).resolves.toMatchObject({
state: 'unreachable',
models: [],
})
expect(calledUrls).toEqual([
'http://localhost:11434/api/tags',
])
})
test('ollama generation readiness reports no models when server is reachable', async () => {
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
const calledUrls: string[] = []
globalThis.fetch = mock(input => {
const url = typeof input === 'string' ? input : input.url
calledUrls.push(url)
return Promise.resolve(
new Response(JSON.stringify({ models: [] }), {
status: 200,
headers: { 'Content-Type': 'application/json' },
}),
)
}) as typeof globalThis.fetch
await expect(
probeOllamaGenerationReadiness({
baseUrl: 'http://localhost:11434',
}),
).resolves.toMatchObject({
state: 'no_models',
models: [],
})
expect(calledUrls).toEqual([
'http://localhost:11434/api/tags',
])
})
test('ollama generation readiness reports generation_failed when requested model is missing', async () => {
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
const calledUrls: string[] = []
globalThis.fetch = mock(input => {
const url = typeof input === 'string' ? input : input.url
calledUrls.push(url)
return Promise.resolve(
new Response(
JSON.stringify({
models: [{ name: 'llama3.1:8b', size: 1024 }],
}),
{
status: 200,
headers: { 'Content-Type': 'application/json' },
},
),
)
}) as typeof globalThis.fetch
await expect(
probeOllamaGenerationReadiness({
baseUrl: 'http://localhost:11434',
model: 'qwen2.5-coder:7b',
}),
).resolves.toMatchObject({
state: 'generation_failed',
probeModel: 'qwen2.5-coder:7b',
detail: 'requested model not installed: qwen2.5-coder:7b',
})
expect(calledUrls).toEqual(['http://localhost:11434/api/tags'])
})
test('ollama generation readiness reports generation failures when chat probe fails', async () => {
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
globalThis.fetch = mock(input => {
const url = typeof input === 'string' ? input : input.url
if (url.endsWith('/api/tags')) {
return Promise.resolve(
new Response(
JSON.stringify({
models: [{ name: 'qwen2.5-coder:7b', size: 42 }],
}),
{
status: 200,
headers: { 'Content-Type': 'application/json' },
},
),
)
}
return Promise.resolve(new Response('model not found', { status: 404 }))
}) as typeof globalThis.fetch
await expect(
probeOllamaGenerationReadiness({
baseUrl: 'http://localhost:11434',
model: 'qwen2.5-coder:7b',
}),
).resolves.toMatchObject({
state: 'generation_failed',
probeModel: 'qwen2.5-coder:7b',
})
})
test('ollama generation readiness reports generation_failed when chat probe returns invalid JSON', async () => {
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
globalThis.fetch = mock(input => {
const url = typeof input === 'string' ? input : input.url
if (url.endsWith('/api/tags')) {
return Promise.resolve(
new Response(
JSON.stringify({
models: [{ name: 'llama3.1:8b', size: 1024 }],
}),
{
status: 200,
headers: { 'Content-Type': 'application/json' },
},
),
)
}
return Promise.resolve(
new Response('<html>proxy error</html>', {
status: 200,
headers: { 'Content-Type': 'text/html' },
}),
)
}) as typeof globalThis.fetch
await expect(
probeOllamaGenerationReadiness({
baseUrl: 'http://localhost:11434',
}),
).resolves.toMatchObject({
state: 'generation_failed',
probeModel: 'llama3.1:8b',
detail: 'invalid JSON response',
})
})
test('ollama generation readiness reports ready when chat probe succeeds', async () => {
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
globalThis.fetch = mock(input => {
const url = typeof input === 'string' ? input : input.url
if (url.endsWith('/api/tags')) {
return Promise.resolve(
new Response(
JSON.stringify({
models: [{ name: 'llama3.1:8b', size: 1024 }],
}),
{
status: 200,
headers: { 'Content-Type': 'application/json' },
},
),
)
}
return Promise.resolve(
new Response(
JSON.stringify({
message: { role: 'assistant', content: 'OK' },
done: true,
}),
{
status: 200,
headers: { 'Content-Type': 'application/json' },
},
),
)
}) as typeof globalThis.fetch
await expect(
probeOllamaGenerationReadiness({
baseUrl: 'http://localhost:11434',
}),
).resolves.toMatchObject({
state: 'ready',
probeModel: 'llama3.1:8b',
})
})

View File

@@ -4,6 +4,13 @@ import { DEFAULT_OPENAI_BASE_URL } from '../services/api/providerConfig.js'
export const DEFAULT_OLLAMA_BASE_URL = 'http://localhost:11434'
export const DEFAULT_ATOMIC_CHAT_BASE_URL = 'http://127.0.0.1:1337'
export type OllamaGenerationReadiness = {
state: 'ready' | 'unreachable' | 'no_models' | 'generation_failed'
models: OllamaModelDescriptor[]
probeModel?: string
detail?: string
}
function withTimeoutSignal(timeoutMs: number): {
signal: AbortSignal
clear: () => void
@@ -20,6 +27,83 @@ function trimTrailingSlash(value: string): string {
return value.replace(/\/+$/, '')
}
function compactDetail(value: string, maxLength = 180): string {
const compact = value.trim().replace(/\s+/g, ' ')
if (!compact) {
return ''
}
if (compact.length <= maxLength) {
return compact
}
return `${compact.slice(0, maxLength)}...`
}
type OllamaTagsPayload = {
models?: Array<{
name?: string
size?: number
details?: {
family?: string
families?: string[]
parameter_size?: string
quantization_level?: string
}
}>
}
function normalizeOllamaModels(
payload: OllamaTagsPayload,
): OllamaModelDescriptor[] {
return (payload.models ?? [])
.filter(model => Boolean(model.name))
.map(model => ({
name: model.name!,
sizeBytes: typeof model.size === 'number' ? model.size : null,
family: model.details?.family ?? null,
families: model.details?.families ?? [],
parameterSize: model.details?.parameter_size ?? null,
quantizationLevel: model.details?.quantization_level ?? null,
}))
}
async function fetchOllamaModelsProbe(
baseUrl?: string,
timeoutMs = 5000,
): Promise<{
reachable: boolean
models: OllamaModelDescriptor[]
}> {
const { signal, clear } = withTimeoutSignal(timeoutMs)
try {
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
method: 'GET',
signal,
})
if (!response.ok) {
return {
reachable: false,
models: [],
}
}
const payload = (await response.json().catch(() => ({}))) as OllamaTagsPayload
return {
reachable: true,
models: normalizeOllamaModels(payload),
}
} catch {
return {
reachable: false,
models: [],
}
} finally {
clear()
}
}
export function getOllamaApiBaseUrl(baseUrl?: string): string {
const parsed = new URL(
baseUrl || process.env.OLLAMA_BASE_URL || DEFAULT_OLLAMA_BASE_URL,
@@ -121,61 +205,15 @@ export function getLocalOpenAICompatibleProviderLabel(baseUrl?: string): string
}
export async function hasLocalOllama(baseUrl?: string): Promise<boolean> {
const { signal, clear } = withTimeoutSignal(1200)
try {
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
method: 'GET',
signal,
})
return response.ok
} catch {
return false
} finally {
clear()
}
const { reachable } = await fetchOllamaModelsProbe(baseUrl, 1200)
return reachable
}
export async function listOllamaModels(
baseUrl?: string,
): Promise<OllamaModelDescriptor[]> {
const { signal, clear } = withTimeoutSignal(5000)
try {
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
method: 'GET',
signal,
})
if (!response.ok) {
return []
}
const data = (await response.json()) as {
models?: Array<{
name?: string
size?: number
details?: {
family?: string
families?: string[]
parameter_size?: string
quantization_level?: string
}
}>
}
return (data.models ?? [])
.filter(model => Boolean(model.name))
.map(model => ({
name: model.name!,
sizeBytes: typeof model.size === 'number' ? model.size : null,
family: model.details?.family ?? null,
families: model.details?.families ?? [],
parameterSize: model.details?.parameter_size ?? null,
quantizationLevel: model.details?.quantization_level ?? null,
}))
} catch {
return []
} finally {
clear()
}
const { models } = await fetchOllamaModelsProbe(baseUrl, 5000)
return models
}
export async function listOpenAICompatibleModels(options?: {
@@ -294,3 +332,106 @@ export async function benchmarkOllamaModel(
clear()
}
}
export async function probeOllamaGenerationReadiness(options?: {
baseUrl?: string
model?: string
timeoutMs?: number
}): Promise<OllamaGenerationReadiness> {
const timeoutMs = options?.timeoutMs ?? 8000
const { reachable, models } = await fetchOllamaModelsProbe(
options?.baseUrl,
timeoutMs,
)
if (!reachable) {
return {
state: 'unreachable',
models: [],
}
}
if (models.length === 0) {
return {
state: 'no_models',
models: [],
}
}
const requestedModel = options?.model?.trim() || undefined
if (requestedModel && !models.some(model => model.name === requestedModel)) {
return {
state: 'generation_failed',
models,
probeModel: requestedModel,
detail: `requested model not installed: ${requestedModel}`,
}
}
const probeModel = requestedModel ?? models[0]!.name
const { signal, clear } = withTimeoutSignal(timeoutMs)
try {
const response = await fetch(`${getOllamaApiBaseUrl(options?.baseUrl)}/api/chat`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
signal,
body: JSON.stringify({
model: probeModel,
stream: false,
messages: [{ role: 'user', content: 'Reply with OK.' }],
options: {
temperature: 0,
num_predict: 8,
},
}),
})
if (!response.ok) {
const responseBody = await response.text().catch(() => '')
const detailSuffix = compactDetail(responseBody)
return {
state: 'generation_failed',
models,
probeModel,
detail: detailSuffix
? `status ${response.status}: ${detailSuffix}`
: `status ${response.status}`,
}
}
try {
await response.json()
} catch {
return {
state: 'generation_failed',
models,
probeModel,
detail: 'invalid JSON response',
}
}
return {
state: 'ready',
models,
probeModel,
}
} catch (error) {
const detail =
error instanceof Error
? error.name === 'AbortError'
? 'request timed out'
: error.message
: String(error)
return {
state: 'generation_failed',
models,
probeModel,
detail,
}
} finally {
clear()
}
}

View File

@@ -300,9 +300,9 @@ export function getRelativeSettingsFilePathForSource(
): string {
switch (source) {
case 'projectSettings':
return join('.openclaude', 'settings.json')
return '.openclaude/settings.json'
case 'localSettings':
return join('.openclaude', 'settings.local.json')
return '.openclaude/settings.local.json'
}
}

View File

@@ -0,0 +1,38 @@
import { describe, expect, test } from 'bun:test'
import { redactUrlForDisplay } from './urlRedaction.ts'
describe('redactUrlForDisplay', () => {
test('redacts credentials and sensitive query params for valid URLs', () => {
const redacted = redactUrlForDisplay(
'http://user:pass@localhost:11434/v1?api_key=secret&foo=bar',
)
expect(redacted).toBe(
'http://redacted:redacted@localhost:11434/v1?api_key=redacted&foo=bar',
)
})
test('redacts token-like query parameter names', () => {
const redacted = redactUrlForDisplay(
'https://example.com/v1?x_access_token=abc123&model=qwen2.5-coder',
)
expect(redacted).toBe(
'https://example.com/v1?x_access_token=redacted&model=qwen2.5-coder',
)
})
test('falls back to regex redaction for malformed URLs', () => {
const redacted = redactUrlForDisplay(
'//user:pass@localhost:11434?token=abc&mode=test',
)
expect(redacted).toBe('//redacted@localhost:11434?token=redacted&mode=test')
})
test('keeps non-sensitive URLs unchanged', () => {
const url = 'http://localhost:11434/v1?model=llama3.1:8b'
expect(redactUrlForDisplay(url)).toBe(url)
})
})

48
src/utils/urlRedaction.ts Normal file
View File

@@ -0,0 +1,48 @@
const SENSITIVE_URL_QUERY_PARAM_TOKENS = [
'api_key',
'apikey',
'key',
'token',
'access_token',
'refresh_token',
'signature',
'sig',
'secret',
'password',
'passwd',
'pwd',
'auth',
'authorization',
]
function shouldRedactUrlQueryParam(name: string): boolean {
const lower = name.toLowerCase()
return SENSITIVE_URL_QUERY_PARAM_TOKENS.some(token => lower.includes(token))
}
export function redactUrlForDisplay(rawUrl: string): string {
try {
const parsed = new URL(rawUrl)
if (parsed.username) {
parsed.username = 'redacted'
}
if (parsed.password) {
parsed.password = 'redacted'
}
for (const key of parsed.searchParams.keys()) {
if (shouldRedactUrlQueryParam(key)) {
parsed.searchParams.set(key, 'redacted')
}
}
return parsed.toString()
} catch {
return rawUrl
.replace(/\/\/[^/@\s]+(?::[^/@\s]*)?@/g, '//redacted@')
.replace(
/([?&](?:token|access_token|refresh_token|api_key|apikey|key|password|passwd|pwd|auth|authorization|signature|sig|secret)=)[^&#]*/gi,
'$1redacted',
)
}
}