feat(api): improve local provider reliability with readiness and self-healing (#738)
* feat(api): classify openai-compatible provider failures * Update src/services/api/providerConfig.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat(api): harden openai-compatible diagnostics and env fallback * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix openaiShim duplicate requests and diagnostics * remove unused url from http failure classifier * dedupe env diagnostic warnings * Remove hardcoded URLs from OpenAI error tests Removed hardcoded URLs from network failure classification tests. * Update providerConfig.envDiagnostics.test.ts * fix(openai-shim): return successful responses and restore localhost classifier tests * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat(provider): add truthful local generation readiness checks Implement Phase 2 provider readiness behavior by adding structured Ollama generation probes, wiring setup flows to readiness states, extending system-check with generation readiness output, and updating focused tests. * feat(api): add local self-healing fallback retries Implement Phase 3 self-healing behavior for local OpenAI-compatible providers: retry base URL fallbacks for localhost resolution and endpoint mismatches, plus capability-gated toolless retry for tool-incompatible local models; include diagnostics and focused tests. * fix(api): address review blockers for local provider reliability * Update src/utils/providerDiscovery.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix: harden readiness probes and cross-platform test stability * fix: refresh toolless retry payload and stabilize osc clipboard test * fix: harden Ollama readiness parsing and redact provider URLs --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -4,6 +4,13 @@ import { DEFAULT_OPENAI_BASE_URL } from '../services/api/providerConfig.js'
|
||||
export const DEFAULT_OLLAMA_BASE_URL = 'http://localhost:11434'
|
||||
export const DEFAULT_ATOMIC_CHAT_BASE_URL = 'http://127.0.0.1:1337'
|
||||
|
||||
export type OllamaGenerationReadiness = {
|
||||
state: 'ready' | 'unreachable' | 'no_models' | 'generation_failed'
|
||||
models: OllamaModelDescriptor[]
|
||||
probeModel?: string
|
||||
detail?: string
|
||||
}
|
||||
|
||||
function withTimeoutSignal(timeoutMs: number): {
|
||||
signal: AbortSignal
|
||||
clear: () => void
|
||||
@@ -20,6 +27,83 @@ function trimTrailingSlash(value: string): string {
|
||||
return value.replace(/\/+$/, '')
|
||||
}
|
||||
|
||||
function compactDetail(value: string, maxLength = 180): string {
|
||||
const compact = value.trim().replace(/\s+/g, ' ')
|
||||
if (!compact) {
|
||||
return ''
|
||||
}
|
||||
|
||||
if (compact.length <= maxLength) {
|
||||
return compact
|
||||
}
|
||||
|
||||
return `${compact.slice(0, maxLength)}...`
|
||||
}
|
||||
|
||||
type OllamaTagsPayload = {
|
||||
models?: Array<{
|
||||
name?: string
|
||||
size?: number
|
||||
details?: {
|
||||
family?: string
|
||||
families?: string[]
|
||||
parameter_size?: string
|
||||
quantization_level?: string
|
||||
}
|
||||
}>
|
||||
}
|
||||
|
||||
function normalizeOllamaModels(
|
||||
payload: OllamaTagsPayload,
|
||||
): OllamaModelDescriptor[] {
|
||||
return (payload.models ?? [])
|
||||
.filter(model => Boolean(model.name))
|
||||
.map(model => ({
|
||||
name: model.name!,
|
||||
sizeBytes: typeof model.size === 'number' ? model.size : null,
|
||||
family: model.details?.family ?? null,
|
||||
families: model.details?.families ?? [],
|
||||
parameterSize: model.details?.parameter_size ?? null,
|
||||
quantizationLevel: model.details?.quantization_level ?? null,
|
||||
}))
|
||||
}
|
||||
|
||||
async function fetchOllamaModelsProbe(
|
||||
baseUrl?: string,
|
||||
timeoutMs = 5000,
|
||||
): Promise<{
|
||||
reachable: boolean
|
||||
models: OllamaModelDescriptor[]
|
||||
}> {
|
||||
const { signal, clear } = withTimeoutSignal(timeoutMs)
|
||||
try {
|
||||
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
|
||||
method: 'GET',
|
||||
signal,
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
return {
|
||||
reachable: false,
|
||||
models: [],
|
||||
}
|
||||
}
|
||||
|
||||
const payload = (await response.json().catch(() => ({}))) as OllamaTagsPayload
|
||||
return {
|
||||
reachable: true,
|
||||
models: normalizeOllamaModels(payload),
|
||||
}
|
||||
} catch {
|
||||
return {
|
||||
reachable: false,
|
||||
models: [],
|
||||
}
|
||||
} finally {
|
||||
clear()
|
||||
}
|
||||
}
|
||||
|
||||
export function getOllamaApiBaseUrl(baseUrl?: string): string {
|
||||
const parsed = new URL(
|
||||
baseUrl || process.env.OLLAMA_BASE_URL || DEFAULT_OLLAMA_BASE_URL,
|
||||
@@ -121,61 +205,15 @@ export function getLocalOpenAICompatibleProviderLabel(baseUrl?: string): string
|
||||
}
|
||||
|
||||
export async function hasLocalOllama(baseUrl?: string): Promise<boolean> {
|
||||
const { signal, clear } = withTimeoutSignal(1200)
|
||||
try {
|
||||
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
|
||||
method: 'GET',
|
||||
signal,
|
||||
})
|
||||
return response.ok
|
||||
} catch {
|
||||
return false
|
||||
} finally {
|
||||
clear()
|
||||
}
|
||||
const { reachable } = await fetchOllamaModelsProbe(baseUrl, 1200)
|
||||
return reachable
|
||||
}
|
||||
|
||||
export async function listOllamaModels(
|
||||
baseUrl?: string,
|
||||
): Promise<OllamaModelDescriptor[]> {
|
||||
const { signal, clear } = withTimeoutSignal(5000)
|
||||
try {
|
||||
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
|
||||
method: 'GET',
|
||||
signal,
|
||||
})
|
||||
if (!response.ok) {
|
||||
return []
|
||||
}
|
||||
|
||||
const data = (await response.json()) as {
|
||||
models?: Array<{
|
||||
name?: string
|
||||
size?: number
|
||||
details?: {
|
||||
family?: string
|
||||
families?: string[]
|
||||
parameter_size?: string
|
||||
quantization_level?: string
|
||||
}
|
||||
}>
|
||||
}
|
||||
|
||||
return (data.models ?? [])
|
||||
.filter(model => Boolean(model.name))
|
||||
.map(model => ({
|
||||
name: model.name!,
|
||||
sizeBytes: typeof model.size === 'number' ? model.size : null,
|
||||
family: model.details?.family ?? null,
|
||||
families: model.details?.families ?? [],
|
||||
parameterSize: model.details?.parameter_size ?? null,
|
||||
quantizationLevel: model.details?.quantization_level ?? null,
|
||||
}))
|
||||
} catch {
|
||||
return []
|
||||
} finally {
|
||||
clear()
|
||||
}
|
||||
const { models } = await fetchOllamaModelsProbe(baseUrl, 5000)
|
||||
return models
|
||||
}
|
||||
|
||||
export async function listOpenAICompatibleModels(options?: {
|
||||
@@ -294,3 +332,106 @@ export async function benchmarkOllamaModel(
|
||||
clear()
|
||||
}
|
||||
}
|
||||
|
||||
export async function probeOllamaGenerationReadiness(options?: {
|
||||
baseUrl?: string
|
||||
model?: string
|
||||
timeoutMs?: number
|
||||
}): Promise<OllamaGenerationReadiness> {
|
||||
const timeoutMs = options?.timeoutMs ?? 8000
|
||||
const { reachable, models } = await fetchOllamaModelsProbe(
|
||||
options?.baseUrl,
|
||||
timeoutMs,
|
||||
)
|
||||
if (!reachable) {
|
||||
return {
|
||||
state: 'unreachable',
|
||||
models: [],
|
||||
}
|
||||
}
|
||||
|
||||
if (models.length === 0) {
|
||||
return {
|
||||
state: 'no_models',
|
||||
models: [],
|
||||
}
|
||||
}
|
||||
|
||||
const requestedModel = options?.model?.trim() || undefined
|
||||
if (requestedModel && !models.some(model => model.name === requestedModel)) {
|
||||
return {
|
||||
state: 'generation_failed',
|
||||
models,
|
||||
probeModel: requestedModel,
|
||||
detail: `requested model not installed: ${requestedModel}`,
|
||||
}
|
||||
}
|
||||
|
||||
const probeModel = requestedModel ?? models[0]!.name
|
||||
const { signal, clear } = withTimeoutSignal(timeoutMs)
|
||||
|
||||
try {
|
||||
const response = await fetch(`${getOllamaApiBaseUrl(options?.baseUrl)}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
signal,
|
||||
body: JSON.stringify({
|
||||
model: probeModel,
|
||||
stream: false,
|
||||
messages: [{ role: 'user', content: 'Reply with OK.' }],
|
||||
options: {
|
||||
temperature: 0,
|
||||
num_predict: 8,
|
||||
},
|
||||
}),
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
const responseBody = await response.text().catch(() => '')
|
||||
const detailSuffix = compactDetail(responseBody)
|
||||
return {
|
||||
state: 'generation_failed',
|
||||
models,
|
||||
probeModel,
|
||||
detail: detailSuffix
|
||||
? `status ${response.status}: ${detailSuffix}`
|
||||
: `status ${response.status}`,
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await response.json()
|
||||
} catch {
|
||||
return {
|
||||
state: 'generation_failed',
|
||||
models,
|
||||
probeModel,
|
||||
detail: 'invalid JSON response',
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
state: 'ready',
|
||||
models,
|
||||
probeModel,
|
||||
}
|
||||
} catch (error) {
|
||||
const detail =
|
||||
error instanceof Error
|
||||
? error.name === 'AbortError'
|
||||
? 'request timed out'
|
||||
: error.message
|
||||
: String(error)
|
||||
|
||||
return {
|
||||
state: 'generation_failed',
|
||||
models,
|
||||
probeModel,
|
||||
detail,
|
||||
}
|
||||
} finally {
|
||||
clear()
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user