Files
orcs-code/src/utils/providerDiscovery.ts
ArkhAngelLifeJiggy 51191d6132 feat: add NVIDIA NIM and MiniMax provider support (#552)
* feat: add NVIDIA NIM and MiniMax provider support

- Add nvidia-nim and minimax to --provider CLI flag
- Add model discovery for NVIDIA NIM (160+ models) and MiniMax
- Update /model picker to show provider-specific models
- Fix provider detection in startup banner
- Update .env.example with new provider options

Supported providers:
- NVIDIA NIM: https://integrate.api.nvidia.com/v1
- MiniMax: https://api.minimax.io/v1

* fix: resolve conflict in StartupScreen (keep NVIDIA/MiniMax + add Codex detection)

* fix: resolve providerProfile conflict (add imports from main, keep NVIDIA/MiniMax)

* fix: revert providerSecrets to match main (NVIDIA/MiniMax handled elsewhere)

* fix: add context window entries for NVIDIA NIM and new MiniMax models

* fix: use GLM-5 as NVIDIA NIM default and MiniMax-M2.5 for consistency

* fix: address remaining review items - add GLM/Kimi context entries, max output tokens, fix .env.example, revert to Nemotron default

* fix: filter NVIDIA NIM picker to chat/instruct models only, set provider-specific API keys from saved profiles

* chore: add more NVIDIA NIM context window entries for popular models

* fix: address remaining non-blocking items - fix base model, clear provider API keys on profile switch
2026-04-15 20:26:13 +08:00

297 lines
7.5 KiB
TypeScript

import type { OllamaModelDescriptor } from './providerRecommendation.ts'
import { DEFAULT_OPENAI_BASE_URL } from '../services/api/providerConfig.js'
export const DEFAULT_OLLAMA_BASE_URL = 'http://localhost:11434'
export const DEFAULT_ATOMIC_CHAT_BASE_URL = 'http://127.0.0.1:1337'
function withTimeoutSignal(timeoutMs: number): {
signal: AbortSignal
clear: () => void
} {
const controller = new AbortController()
const timeout = setTimeout(() => controller.abort(), timeoutMs)
return {
signal: controller.signal,
clear: () => clearTimeout(timeout),
}
}
function trimTrailingSlash(value: string): string {
return value.replace(/\/+$/, '')
}
export function getOllamaApiBaseUrl(baseUrl?: string): string {
const parsed = new URL(
baseUrl || process.env.OLLAMA_BASE_URL || DEFAULT_OLLAMA_BASE_URL,
)
const pathname = trimTrailingSlash(parsed.pathname)
parsed.pathname = pathname.endsWith('/v1')
? pathname.slice(0, -3) || '/'
: pathname || '/'
parsed.search = ''
parsed.hash = ''
return trimTrailingSlash(parsed.toString())
}
export function getOllamaChatBaseUrl(baseUrl?: string): string {
return `${getOllamaApiBaseUrl(baseUrl)}/v1`
}
export function getAtomicChatApiBaseUrl(baseUrl?: string): string {
const parsed = new URL(
baseUrl || process.env.ATOMIC_CHAT_BASE_URL || DEFAULT_ATOMIC_CHAT_BASE_URL,
)
const pathname = trimTrailingSlash(parsed.pathname)
parsed.pathname = pathname.endsWith('/v1')
? pathname.slice(0, -3) || '/'
: pathname || '/'
parsed.search = ''
parsed.hash = ''
return trimTrailingSlash(parsed.toString())
}
export function getAtomicChatChatBaseUrl(baseUrl?: string): string {
return `${getAtomicChatApiBaseUrl(baseUrl)}/v1`
}
export function getOpenAICompatibleModelsBaseUrl(baseUrl?: string): string {
return (
baseUrl || process.env.OPENAI_BASE_URL || DEFAULT_OPENAI_BASE_URL
).replace(/\/+$/, '')
}
export function getLocalOpenAICompatibleProviderLabel(baseUrl?: string): string {
try {
const parsed = new URL(getOpenAICompatibleModelsBaseUrl(baseUrl))
const host = parsed.host.toLowerCase()
const hostname = parsed.hostname.toLowerCase()
const path = parsed.pathname.toLowerCase()
const haystack = `${hostname} ${path}`
if (
host.endsWith(':1234') ||
haystack.includes('lmstudio') ||
haystack.includes('lm-studio')
) {
return 'LM Studio'
}
if (host.endsWith(':11434') || haystack.includes('ollama')) {
return 'Ollama'
}
if (haystack.includes('localai')) {
return 'LocalAI'
}
if (haystack.includes('jan')) {
return 'Jan'
}
if (haystack.includes('kobold')) {
return 'KoboldCpp'
}
if (haystack.includes('llama.cpp') || haystack.includes('llamacpp')) {
return 'llama.cpp'
}
if (haystack.includes('vllm')) {
return 'vLLM'
}
if (
haystack.includes('open-webui') ||
haystack.includes('openwebui')
) {
return 'Open WebUI'
}
if (
haystack.includes('text-generation-webui') ||
haystack.includes('oobabooga')
) {
return 'text-generation-webui'
}
// Check for NVIDIA NIM
if (host.includes('nvidia') || haystack.includes('nvidia') || host.includes('integrate.api.nvidia')) {
return 'NVIDIA NIM'
}
// Check for MiniMax (both api.minimax.io and api.minimax.chat)
if (host.includes('minimax') || haystack.includes('minimax')) {
return 'MiniMax'
}
} catch {
// Fall back to the generic label when the base URL is malformed.
}
return 'Local OpenAI-compatible'
}
export async function hasLocalOllama(baseUrl?: string): Promise<boolean> {
const { signal, clear } = withTimeoutSignal(1200)
try {
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
method: 'GET',
signal,
})
return response.ok
} catch {
return false
} finally {
clear()
}
}
export async function listOllamaModels(
baseUrl?: string,
): Promise<OllamaModelDescriptor[]> {
const { signal, clear } = withTimeoutSignal(5000)
try {
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
method: 'GET',
signal,
})
if (!response.ok) {
return []
}
const data = (await response.json()) as {
models?: Array<{
name?: string
size?: number
details?: {
family?: string
families?: string[]
parameter_size?: string
quantization_level?: string
}
}>
}
return (data.models ?? [])
.filter(model => Boolean(model.name))
.map(model => ({
name: model.name!,
sizeBytes: typeof model.size === 'number' ? model.size : null,
family: model.details?.family ?? null,
families: model.details?.families ?? [],
parameterSize: model.details?.parameter_size ?? null,
quantizationLevel: model.details?.quantization_level ?? null,
}))
} catch {
return []
} finally {
clear()
}
}
export async function listOpenAICompatibleModels(options?: {
baseUrl?: string
apiKey?: string
}): Promise<string[] | null> {
const { signal, clear } = withTimeoutSignal(5000)
try {
const response = await fetch(
`${getOpenAICompatibleModelsBaseUrl(options?.baseUrl)}/models`,
{
method: 'GET',
headers: options?.apiKey
? {
Authorization: `Bearer ${options.apiKey}`,
}
: undefined,
signal,
},
)
if (!response.ok) {
return null
}
const data = (await response.json()) as {
data?: Array<{ id?: string }>
}
return Array.from(
new Set(
(data.data ?? [])
.filter(model => Boolean(model.id))
.map(model => model.id!),
),
)
} catch {
return null
} finally {
clear()
}
}
export async function hasLocalAtomicChat(baseUrl?: string): Promise<boolean> {
const { signal, clear } = withTimeoutSignal(1200)
try {
const response = await fetch(`${getAtomicChatChatBaseUrl(baseUrl)}/models`, {
method: 'GET',
signal,
})
return response.ok
} catch {
return false
} finally {
clear()
}
}
export async function listAtomicChatModels(
baseUrl?: string,
): Promise<string[]> {
const { signal, clear } = withTimeoutSignal(5000)
try {
const response = await fetch(`${getAtomicChatChatBaseUrl(baseUrl)}/models`, {
method: 'GET',
signal,
})
if (!response.ok) {
return []
}
const data = (await response.json()) as {
data?: Array<{ id?: string }>
}
return (data.data ?? [])
.filter(model => Boolean(model.id))
.map(model => model.id!)
} catch {
return []
} finally {
clear()
}
}
export async function benchmarkOllamaModel(
modelName: string,
baseUrl?: string,
): Promise<number | null> {
const start = Date.now()
const { signal, clear } = withTimeoutSignal(20000)
try {
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/chat`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
signal,
body: JSON.stringify({
model: modelName,
stream: false,
messages: [{ role: 'user', content: 'Reply with OK.' }],
options: {
temperature: 0,
num_predict: 8,
},
}),
})
if (!response.ok) {
return null
}
await response.json()
return Date.now() - start
} catch {
return null
} finally {
clear()
}
}