feat(api): improve local provider reliability with readiness and self-healing (#738)
* feat(api): classify openai-compatible provider failures * Update src/services/api/providerConfig.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat(api): harden openai-compatible diagnostics and env fallback * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix openaiShim duplicate requests and diagnostics * remove unused url from http failure classifier * dedupe env diagnostic warnings * Remove hardcoded URLs from OpenAI error tests Removed hardcoded URLs from network failure classification tests. * Update providerConfig.envDiagnostics.test.ts * fix(openai-shim): return successful responses and restore localhost classifier tests * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat(provider): add truthful local generation readiness checks Implement Phase 2 provider readiness behavior by adding structured Ollama generation probes, wiring setup flows to readiness states, extending system-check with generation readiness output, and updating focused tests. * feat(api): add local self-healing fallback retries Implement Phase 3 self-healing behavior for local OpenAI-compatible providers: retry base URL fallbacks for localhost resolution and endpoint mismatches, plus capability-gated toolless retry for tool-incompatible local models; include diagnostics and focused tests. * fix(api): address review blockers for local provider reliability * Update src/utils/providerDiscovery.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix: harden readiness probes and cross-platform test stability * fix: refresh toolless retry payload and stabilize osc clipboard test * fix: harden Ollama readiness parsing and redact provider URLs --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
import { afterEach, expect, mock, test } from 'bun:test'
|
||||
|
||||
import {
|
||||
getLocalOpenAICompatibleProviderLabel,
|
||||
listOpenAICompatibleModels,
|
||||
} from './providerDiscovery.js'
|
||||
async function loadProviderDiscoveryModule() {
|
||||
// @ts-expect-error cache-busting query string for Bun module mocks
|
||||
return import(`./providerDiscovery.js?ts=${Date.now()}-${Math.random()}`)
|
||||
}
|
||||
|
||||
const originalFetch = globalThis.fetch
|
||||
const originalEnv = {
|
||||
@@ -16,6 +16,8 @@ afterEach(() => {
|
||||
})
|
||||
|
||||
test('lists models from a local openai-compatible /models endpoint', async () => {
|
||||
const { listOpenAICompatibleModels } = await loadProviderDiscoveryModule()
|
||||
|
||||
globalThis.fetch = mock((input, init) => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
expect(url).toBe('http://localhost:1234/v1/models')
|
||||
@@ -47,6 +49,8 @@ test('lists models from a local openai-compatible /models endpoint', async () =>
|
||||
})
|
||||
|
||||
test('returns null when a local openai-compatible /models request fails', async () => {
|
||||
const { listOpenAICompatibleModels } = await loadProviderDiscoveryModule()
|
||||
|
||||
globalThis.fetch = mock(() =>
|
||||
Promise.resolve(new Response('not available', { status: 503 })),
|
||||
) as typeof globalThis.fetch
|
||||
@@ -56,13 +60,19 @@ test('returns null when a local openai-compatible /models request fails', async
|
||||
).resolves.toBeNull()
|
||||
})
|
||||
|
||||
test('detects LM Studio from the default localhost port', () => {
|
||||
test('detects LM Studio from the default localhost port', async () => {
|
||||
const { getLocalOpenAICompatibleProviderLabel } =
|
||||
await loadProviderDiscoveryModule()
|
||||
|
||||
expect(getLocalOpenAICompatibleProviderLabel('http://localhost:1234/v1')).toBe(
|
||||
'LM Studio',
|
||||
)
|
||||
})
|
||||
|
||||
test('detects common local openai-compatible providers by hostname', () => {
|
||||
test('detects common local openai-compatible providers by hostname', async () => {
|
||||
const { getLocalOpenAICompatibleProviderLabel } =
|
||||
await loadProviderDiscoveryModule()
|
||||
|
||||
expect(
|
||||
getLocalOpenAICompatibleProviderLabel('http://localai.local:8080/v1'),
|
||||
).toBe('LocalAI')
|
||||
@@ -71,8 +81,212 @@ test('detects common local openai-compatible providers by hostname', () => {
|
||||
).toBe('vLLM')
|
||||
})
|
||||
|
||||
test('falls back to a generic local openai-compatible label', () => {
|
||||
test('falls back to a generic local openai-compatible label', async () => {
|
||||
const { getLocalOpenAICompatibleProviderLabel } =
|
||||
await loadProviderDiscoveryModule()
|
||||
|
||||
expect(
|
||||
getLocalOpenAICompatibleProviderLabel('http://127.0.0.1:8080/v1'),
|
||||
).toBe('Local OpenAI-compatible')
|
||||
})
|
||||
|
||||
test('ollama generation readiness reports unreachable when tags endpoint is down', async () => {
|
||||
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
const calledUrls: string[] = []
|
||||
globalThis.fetch = mock(input => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
calledUrls.push(url)
|
||||
return Promise.resolve(new Response('not available', { status: 503 }))
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeOllamaGenerationReadiness({
|
||||
baseUrl: 'http://localhost:11434',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
state: 'unreachable',
|
||||
models: [],
|
||||
})
|
||||
|
||||
expect(calledUrls).toEqual([
|
||||
'http://localhost:11434/api/tags',
|
||||
])
|
||||
})
|
||||
|
||||
test('ollama generation readiness reports no models when server is reachable', async () => {
|
||||
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
const calledUrls: string[] = []
|
||||
globalThis.fetch = mock(input => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
calledUrls.push(url)
|
||||
return Promise.resolve(
|
||||
new Response(JSON.stringify({ models: [] }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
}),
|
||||
)
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeOllamaGenerationReadiness({
|
||||
baseUrl: 'http://localhost:11434',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
state: 'no_models',
|
||||
models: [],
|
||||
})
|
||||
|
||||
expect(calledUrls).toEqual([
|
||||
'http://localhost:11434/api/tags',
|
||||
])
|
||||
})
|
||||
|
||||
test('ollama generation readiness reports generation_failed when requested model is missing', async () => {
|
||||
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
const calledUrls: string[] = []
|
||||
globalThis.fetch = mock(input => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
calledUrls.push(url)
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
models: [{ name: 'llama3.1:8b', size: 1024 }],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
),
|
||||
)
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeOllamaGenerationReadiness({
|
||||
baseUrl: 'http://localhost:11434',
|
||||
model: 'qwen2.5-coder:7b',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
state: 'generation_failed',
|
||||
probeModel: 'qwen2.5-coder:7b',
|
||||
detail: 'requested model not installed: qwen2.5-coder:7b',
|
||||
})
|
||||
|
||||
expect(calledUrls).toEqual(['http://localhost:11434/api/tags'])
|
||||
})
|
||||
|
||||
test('ollama generation readiness reports generation failures when chat probe fails', async () => {
|
||||
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
globalThis.fetch = mock(input => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
if (url.endsWith('/api/tags')) {
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
models: [{ name: 'qwen2.5-coder:7b', size: 42 }],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
return Promise.resolve(new Response('model not found', { status: 404 }))
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeOllamaGenerationReadiness({
|
||||
baseUrl: 'http://localhost:11434',
|
||||
model: 'qwen2.5-coder:7b',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
state: 'generation_failed',
|
||||
probeModel: 'qwen2.5-coder:7b',
|
||||
})
|
||||
})
|
||||
|
||||
test('ollama generation readiness reports generation_failed when chat probe returns invalid JSON', async () => {
|
||||
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
globalThis.fetch = mock(input => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
if (url.endsWith('/api/tags')) {
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
models: [{ name: 'llama3.1:8b', size: 1024 }],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
return Promise.resolve(
|
||||
new Response('<html>proxy error</html>', {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'text/html' },
|
||||
}),
|
||||
)
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeOllamaGenerationReadiness({
|
||||
baseUrl: 'http://localhost:11434',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
state: 'generation_failed',
|
||||
probeModel: 'llama3.1:8b',
|
||||
detail: 'invalid JSON response',
|
||||
})
|
||||
})
|
||||
|
||||
test('ollama generation readiness reports ready when chat probe succeeds', async () => {
|
||||
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
globalThis.fetch = mock(input => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
if (url.endsWith('/api/tags')) {
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
models: [{ name: 'llama3.1:8b', size: 1024 }],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
message: { role: 'assistant', content: 'OK' },
|
||||
done: true,
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
),
|
||||
)
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeOllamaGenerationReadiness({
|
||||
baseUrl: 'http://localhost:11434',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
state: 'ready',
|
||||
probeModel: 'llama3.1:8b',
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user