feat(api): improve local provider reliability with readiness and self-healing (#738)
* feat(api): classify openai-compatible provider failures * Update src/services/api/providerConfig.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat(api): harden openai-compatible diagnostics and env fallback * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/errors.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix openaiShim duplicate requests and diagnostics * remove unused url from http failure classifier * dedupe env diagnostic warnings * Remove hardcoded URLs from OpenAI error tests Removed hardcoded URLs from network failure classification tests. * Update providerConfig.envDiagnostics.test.ts * fix(openai-shim): return successful responses and restore localhost classifier tests * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat(provider): add truthful local generation readiness checks Implement Phase 2 provider readiness behavior by adding structured Ollama generation probes, wiring setup flows to readiness states, extending system-check with generation readiness output, and updating focused tests. * feat(api): add local self-healing fallback retries Implement Phase 3 self-healing behavior for local OpenAI-compatible providers: retry base URL fallbacks for localhost resolution and endpoint mismatches, plus capability-gated toolless retry for tool-incompatible local models; include diagnostics and focused tests. * fix(api): address review blockers for local provider reliability * Update src/utils/providerDiscovery.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/services/api/openaiShim.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix: harden readiness probes and cross-platform test stability * fix: refresh toolless retry payload and stabilize osc clipboard test * fix: harden Ollama readiness parsing and redact provider URLs --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -37,13 +37,14 @@ import {
|
||||
readGithubModelsTokenAsync,
|
||||
} from '../utils/githubModelsCredentials.js'
|
||||
import {
|
||||
hasLocalOllama,
|
||||
listOllamaModels,
|
||||
probeOllamaGenerationReadiness,
|
||||
type OllamaGenerationReadiness,
|
||||
} from '../utils/providerDiscovery.js'
|
||||
import {
|
||||
rankOllamaModels,
|
||||
recommendOllamaModel,
|
||||
} from '../utils/providerRecommendation.js'
|
||||
import { redactUrlForDisplay } from '../utils/urlRedaction.js'
|
||||
import { updateSettingsForSource } from '../utils/settings/settings.js'
|
||||
import {
|
||||
type OptionWithDescription,
|
||||
@@ -52,7 +53,6 @@ import {
|
||||
import { Pane } from './design-system/Pane.js'
|
||||
import TextInput from './TextInput.js'
|
||||
import { useCodexOAuthFlow } from './useCodexOAuthFlow.js'
|
||||
import { useSetAppState } from '../state/AppState.js'
|
||||
|
||||
export type ProviderManagerResult = {
|
||||
action: 'saved' | 'cancelled'
|
||||
@@ -222,6 +222,29 @@ function getGithubProviderSummary(
|
||||
return `github-models · ${GITHUB_PROVIDER_DEFAULT_BASE_URL} · ${getGithubProviderModel(processEnv)} · ${credentialSummary}${activeSuffix}`
|
||||
}
|
||||
|
||||
function describeOllamaSelectionIssue(
|
||||
readiness: OllamaGenerationReadiness,
|
||||
baseUrl: string,
|
||||
): string {
|
||||
if (readiness.state === 'unreachable') {
|
||||
return `Could not reach Ollama at ${redactUrlForDisplay(baseUrl)}. Start Ollama first, or enter the endpoint manually.`
|
||||
}
|
||||
|
||||
if (readiness.state === 'no_models') {
|
||||
return 'Ollama is running, but no installed models were found. Pull a chat model such as qwen2.5-coder:7b or llama3.1:8b first, or enter details manually.'
|
||||
}
|
||||
|
||||
if (readiness.state === 'generation_failed') {
|
||||
const modelHint = readiness.probeModel ?? 'the selected model'
|
||||
const detailSuffix = readiness.detail
|
||||
? ` Details: ${readiness.detail}.`
|
||||
: ''
|
||||
return `Ollama is reachable and models are installed, but a generation probe failed for ${modelHint}.${detailSuffix} Run "ollama run ${modelHint}" once and retry, or enter details manually.`
|
||||
}
|
||||
|
||||
return ''
|
||||
}
|
||||
|
||||
function findCodexOAuthProfile(
|
||||
profiles: ProviderProfile[],
|
||||
profileId?: string,
|
||||
@@ -450,32 +473,21 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
setOllamaSelection({ state: 'loading' })
|
||||
|
||||
void (async () => {
|
||||
const available = await hasLocalOllama(draft.baseUrl)
|
||||
if (!available) {
|
||||
const readiness = await probeOllamaGenerationReadiness({
|
||||
baseUrl: draft.baseUrl,
|
||||
})
|
||||
if (readiness.state !== 'ready') {
|
||||
if (!cancelled) {
|
||||
setOllamaSelection({
|
||||
state: 'unavailable',
|
||||
message:
|
||||
'Could not reach Ollama. Start Ollama first, or enter the endpoint manually.',
|
||||
message: describeOllamaSelectionIssue(readiness, draft.baseUrl),
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
const models = await listOllamaModels(draft.baseUrl)
|
||||
if (models.length === 0) {
|
||||
if (!cancelled) {
|
||||
setOllamaSelection({
|
||||
state: 'unavailable',
|
||||
message:
|
||||
'Ollama is running, but no installed models were found. Pull a chat model such as qwen2.5-coder:7b or llama3.1:8b first, or enter details manually.',
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
const ranked = rankOllamaModels(models, 'balanced')
|
||||
const recommended = recommendOllamaModel(models, 'balanced')
|
||||
const ranked = rankOllamaModels(readiness.models, 'balanced')
|
||||
const recommended = recommendOllamaModel(readiness.models, 'balanced')
|
||||
if (!cancelled) {
|
||||
setOllamaSelection({
|
||||
state: 'ready',
|
||||
|
||||
Reference in New Issue
Block a user