Merge pull request #5 from Vasanthdev2004/codex/provider-profile-recommendations
feat: add intelligent provider profile recommendation
This commit is contained in:
28
PLAYBOOK.md
28
PLAYBOOK.md
@@ -37,6 +37,18 @@ If everything is healthy, OpenClaude starts directly.
|
||||
bun run profile:init -- --provider ollama --model llama3.1:8b
|
||||
```
|
||||
|
||||
Or let OpenClaude recommend the best local model for your goal:
|
||||
|
||||
```powershell
|
||||
bun run profile:init -- --provider ollama --goal coding
|
||||
```
|
||||
|
||||
Preview recommendations before saving:
|
||||
|
||||
```powershell
|
||||
bun run profile:recommend -- --goal coding --benchmark
|
||||
```
|
||||
|
||||
### 3.2 Confirm profile file
|
||||
|
||||
```powershell
|
||||
@@ -171,6 +183,12 @@ Fix:
|
||||
bun run profile:init -- --provider ollama --model llama3.1:8b
|
||||
```
|
||||
|
||||
Or pick a local Ollama profile automatically by goal:
|
||||
|
||||
```powershell
|
||||
bun run profile:init -- --provider ollama --goal balanced
|
||||
```
|
||||
|
||||
## 6.5 Placeholder key (`SUA_CHAVE`) error
|
||||
|
||||
Cause:
|
||||
@@ -202,6 +220,16 @@ bun run profile:fast # llama3.2:3b
|
||||
bun run profile:code # qwen2.5-coder:7b
|
||||
```
|
||||
|
||||
Goal-based local auto-selection:
|
||||
|
||||
```powershell
|
||||
bun run profile:init -- --provider ollama --goal latency
|
||||
bun run profile:init -- --provider ollama --goal balanced
|
||||
bun run profile:init -- --provider ollama --goal coding
|
||||
```
|
||||
|
||||
`profile:auto` is a best-available provider picker, not a local-only command. Use `--provider ollama` when you want to stay on a local model.
|
||||
|
||||
## 8. Practical Prompt Playbook (Copy/Paste)
|
||||
|
||||
## 8.1 Code understanding
|
||||
|
||||
21
README.md
21
README.md
@@ -209,7 +209,7 @@ bun run doctor:runtime:json
|
||||
# persist a diagnostics report to reports/doctor-runtime.json
|
||||
bun run doctor:report
|
||||
|
||||
# full local hardening check (typecheck + smoke + runtime doctor)
|
||||
# full local hardening check (smoke + runtime doctor)
|
||||
bun run hardening:check
|
||||
|
||||
# strict hardening (includes project-wide typecheck)
|
||||
@@ -226,9 +226,15 @@ Notes:
|
||||
Use profile launchers to avoid repeated environment setup:
|
||||
|
||||
```bash
|
||||
# one-time profile bootstrap (auto-detect ollama, otherwise openai)
|
||||
# one-time profile bootstrap (prefer viable local Ollama, otherwise OpenAI)
|
||||
bun run profile:init
|
||||
|
||||
# preview the best provider/model for your goal
|
||||
bun run profile:recommend -- --goal coding --benchmark
|
||||
|
||||
# auto-apply the best available local/openai provider/model for your goal
|
||||
bun run profile:auto -- --goal latency
|
||||
|
||||
# codex bootstrap (defaults to codexplan and ~/.codex/auth.json)
|
||||
bun run profile:codex
|
||||
|
||||
@@ -238,6 +244,9 @@ bun run profile:init -- --provider openai --api-key sk-...
|
||||
# ollama bootstrap with custom model
|
||||
bun run profile:init -- --provider ollama --model llama3.1:8b
|
||||
|
||||
# ollama bootstrap with intelligent model auto-selection
|
||||
bun run profile:init -- --provider ollama --goal coding
|
||||
|
||||
# codex bootstrap with a fast model alias
|
||||
bun run profile:init -- --provider codex --model codexspark
|
||||
|
||||
@@ -254,6 +263,14 @@ bun run dev:openai
|
||||
bun run dev:ollama
|
||||
```
|
||||
|
||||
`profile:recommend` ranks installed Ollama models for `latency`, `balanced`, or `coding`, and `profile:auto` can persist the recommendation directly.
|
||||
If no profile exists yet, `dev:profile` now uses the same goal-aware defaults when picking the initial model.
|
||||
|
||||
Use `--provider ollama` when you want a local-only path. Auto mode falls back to OpenAI when no viable local chat model is installed.
|
||||
Goal-based Ollama selection only recommends among models that are already installed and reachable from Ollama.
|
||||
|
||||
Use `profile:codex` or `--provider codex` when you want the ChatGPT Codex backend.
|
||||
|
||||
`dev:openai`, `dev:ollama`, and `dev:codex` run `doctor:runtime` first and only launch the app if checks pass.
|
||||
For `dev:ollama`, make sure Ollama is running locally before launch.
|
||||
|
||||
|
||||
@@ -22,12 +22,15 @@
|
||||
"dev:ollama": "bun run scripts/provider-launch.ts ollama",
|
||||
"dev:ollama:fast": "bun run scripts/provider-launch.ts ollama --fast --bare",
|
||||
"profile:init": "bun run scripts/provider-bootstrap.ts",
|
||||
"profile:recommend": "bun run scripts/provider-recommend.ts",
|
||||
"profile:auto": "bun run scripts/provider-recommend.ts --apply",
|
||||
"profile:codex": "bun run profile:init -- --provider codex --model codexplan",
|
||||
"profile:fast": "bun run profile:init -- --provider ollama --model llama3.2:3b",
|
||||
"profile:code": "bun run profile:init -- --provider ollama --model qwen2.5-coder:7b",
|
||||
"dev:fast": "bun run profile:fast && bun run dev:ollama:fast",
|
||||
"dev:code": "bun run profile:code && bun run dev:profile",
|
||||
"start": "node dist/cli.mjs",
|
||||
"test:provider-recommendation": "node --test --experimental-strip-types src/utils/providerRecommendation.test.ts src/utils/providerProfile.test.ts",
|
||||
"typecheck": "tsc --noEmit",
|
||||
"smoke": "bun run build && node dist/cli.mjs --version",
|
||||
"test:provider": "bun test src/services/api/*.test.ts",
|
||||
|
||||
@@ -2,25 +2,28 @@
|
||||
import { writeFileSync } from 'node:fs'
|
||||
import { resolve } from 'node:path'
|
||||
import {
|
||||
DEFAULT_CODEX_BASE_URL,
|
||||
resolveCodexApiCredentials,
|
||||
} from '../src/services/api/providerConfig.js'
|
||||
|
||||
type ProviderProfile = 'openai' | 'ollama' | 'codex' | 'gemini'
|
||||
|
||||
type ProfileFile = {
|
||||
profile: ProviderProfile
|
||||
env: {
|
||||
OPENAI_BASE_URL?: string
|
||||
OPENAI_MODEL?: string
|
||||
OPENAI_API_KEY?: string
|
||||
CODEX_API_KEY?: string
|
||||
GEMINI_API_KEY?: string
|
||||
GEMINI_MODEL?: string
|
||||
GEMINI_BASE_URL?: string
|
||||
}
|
||||
createdAt: string
|
||||
}
|
||||
import {
|
||||
getGoalDefaultOpenAIModel,
|
||||
normalizeRecommendationGoal,
|
||||
recommendOllamaModel,
|
||||
} from '../src/utils/providerRecommendation.ts'
|
||||
import {
|
||||
buildCodexProfileEnv,
|
||||
buildGeminiProfileEnv,
|
||||
buildOllamaProfileEnv,
|
||||
buildOpenAIProfileEnv,
|
||||
createProfileFile,
|
||||
selectAutoProfile,
|
||||
type ProfileFile,
|
||||
type ProviderProfile,
|
||||
} from '../src/utils/providerProfile.ts'
|
||||
import {
|
||||
getOllamaChatBaseUrl,
|
||||
hasLocalOllama,
|
||||
listOllamaModels,
|
||||
} from './provider-discovery.ts'
|
||||
|
||||
function parseArg(name: string): string | null {
|
||||
const args = process.argv.slice(2)
|
||||
@@ -35,27 +38,16 @@ function parseProviderArg(): ProviderProfile | 'auto' {
|
||||
return 'auto'
|
||||
}
|
||||
|
||||
async function hasLocalOllama(): Promise<boolean> {
|
||||
const endpoint = 'http://localhost:11434/api/tags'
|
||||
const controller = new AbortController()
|
||||
const timeout = setTimeout(() => controller.abort(), 1200)
|
||||
async function resolveOllamaModel(
|
||||
argModel: string | null,
|
||||
argBaseUrl: string | null,
|
||||
goal: ReturnType<typeof normalizeRecommendationGoal>,
|
||||
): Promise<string | null> {
|
||||
if (argModel) return argModel
|
||||
|
||||
try {
|
||||
const response = await fetch(endpoint, {
|
||||
method: 'GET',
|
||||
signal: controller.signal,
|
||||
})
|
||||
return response.ok
|
||||
} catch {
|
||||
return false
|
||||
} finally {
|
||||
clearTimeout(timeout)
|
||||
}
|
||||
}
|
||||
|
||||
function sanitizeApiKey(key: string | null): string | undefined {
|
||||
if (!key || key === 'SUA_CHAVE') return undefined
|
||||
return key
|
||||
const discovered = await listOllamaModels(argBaseUrl || undefined)
|
||||
const recommended = recommendOllamaModel(discovered, goal)
|
||||
return recommended?.name ?? null
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
@@ -63,69 +55,104 @@ async function main(): Promise<void> {
|
||||
const argModel = parseArg('--model')
|
||||
const argBaseUrl = parseArg('--base-url')
|
||||
const argApiKey = parseArg('--api-key')
|
||||
const goal = normalizeRecommendationGoal(
|
||||
parseArg('--goal') || process.env.OPENCLAUDE_PROFILE_GOAL,
|
||||
)
|
||||
|
||||
let selected: ProviderProfile
|
||||
let resolvedOllamaModel: string | null = null
|
||||
if (provider === 'auto') {
|
||||
selected = (await hasLocalOllama()) ? 'ollama' : 'openai'
|
||||
if (await hasLocalOllama(argBaseUrl || undefined)) {
|
||||
resolvedOllamaModel = await resolveOllamaModel(argModel, argBaseUrl, goal)
|
||||
selected = selectAutoProfile(resolvedOllamaModel)
|
||||
} else {
|
||||
selected = 'openai'
|
||||
}
|
||||
} else {
|
||||
selected = provider
|
||||
}
|
||||
|
||||
const env: ProfileFile['env'] = {}
|
||||
|
||||
let env: ProfileFile['env']
|
||||
if (selected === 'gemini') {
|
||||
env.GEMINI_MODEL = argModel || process.env.GEMINI_MODEL || 'gemini-2.0-flash'
|
||||
const key = sanitizeApiKey(argApiKey || process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY || null)
|
||||
if (!key) {
|
||||
const builtEnv = buildGeminiProfileEnv({
|
||||
model: argModel || null,
|
||||
baseUrl: argBaseUrl || null,
|
||||
apiKey: argApiKey || null,
|
||||
processEnv: process.env,
|
||||
})
|
||||
|
||||
if (!builtEnv) {
|
||||
console.error('Gemini profile requires an API key. Use --api-key or set GEMINI_API_KEY.')
|
||||
console.error('Get a free key at: https://aistudio.google.com/apikey')
|
||||
process.exit(1)
|
||||
}
|
||||
env.GEMINI_API_KEY = key
|
||||
if (argBaseUrl) env.GEMINI_BASE_URL = argBaseUrl
|
||||
|
||||
env = builtEnv
|
||||
} else if (selected === 'ollama') {
|
||||
env.OPENAI_BASE_URL = argBaseUrl || 'http://localhost:11434/v1'
|
||||
env.OPENAI_MODEL = argModel || process.env.OPENAI_MODEL || 'llama3.1:8b'
|
||||
const key = sanitizeApiKey(argApiKey || process.env.OPENAI_API_KEY || null)
|
||||
if (key) env.OPENAI_API_KEY = key
|
||||
} else if (selected === 'codex') {
|
||||
env.OPENAI_BASE_URL =
|
||||
argBaseUrl || process.env.OPENAI_BASE_URL || DEFAULT_CODEX_BASE_URL
|
||||
env.OPENAI_MODEL = argModel || process.env.OPENAI_MODEL || 'codexplan'
|
||||
const key = sanitizeApiKey(argApiKey || process.env.CODEX_API_KEY || null)
|
||||
if (key) {
|
||||
env.CODEX_API_KEY = key
|
||||
} else {
|
||||
const credentials = resolveCodexApiCredentials(process.env)
|
||||
if (!credentials.apiKey) {
|
||||
const authHint = credentials.authPath
|
||||
? ` or make sure ${credentials.authPath} exists`
|
||||
: ''
|
||||
console.error(`Codex profile requires CODEX_API_KEY${authHint}.`)
|
||||
process.exit(1)
|
||||
}
|
||||
resolvedOllamaModel ??= await resolveOllamaModel(argModel, argBaseUrl, goal)
|
||||
if (!resolvedOllamaModel) {
|
||||
console.error('No viable Ollama chat model was discovered. Pull a chat model first or pass --model explicitly.')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
env = buildOllamaProfileEnv(
|
||||
resolvedOllamaModel,
|
||||
{
|
||||
baseUrl: argBaseUrl,
|
||||
getOllamaChatBaseUrl,
|
||||
},
|
||||
)
|
||||
} else if (selected === 'codex') {
|
||||
const builtEnv = buildCodexProfileEnv({
|
||||
model: argModel,
|
||||
baseUrl: argBaseUrl,
|
||||
apiKey: argApiKey || process.env.CODEX_API_KEY || null,
|
||||
processEnv: process.env,
|
||||
})
|
||||
|
||||
if (!builtEnv) {
|
||||
const credentials = resolveCodexApiCredentials(
|
||||
argApiKey
|
||||
? { ...process.env, CODEX_API_KEY: argApiKey }
|
||||
: process.env,
|
||||
)
|
||||
const authHint = credentials.authPath
|
||||
? ` or make sure ${credentials.authPath} exists`
|
||||
: ''
|
||||
if (!credentials.apiKey) {
|
||||
console.error(`Codex profile requires CODEX_API_KEY${authHint}.`)
|
||||
} else {
|
||||
console.error('Codex profile requires CHATGPT_ACCOUNT_ID or an auth.json that includes it.')
|
||||
}
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
env = builtEnv
|
||||
} else {
|
||||
env.OPENAI_BASE_URL = argBaseUrl || process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1'
|
||||
env.OPENAI_MODEL = argModel || process.env.OPENAI_MODEL || 'gpt-4o'
|
||||
const key = sanitizeApiKey(argApiKey || process.env.OPENAI_API_KEY || null)
|
||||
if (!key) {
|
||||
const builtEnv = buildOpenAIProfileEnv({
|
||||
goal,
|
||||
model: argModel || null,
|
||||
baseUrl: argBaseUrl || null,
|
||||
apiKey: argApiKey || process.env.OPENAI_API_KEY || null,
|
||||
processEnv: process.env,
|
||||
})
|
||||
|
||||
if (!builtEnv) {
|
||||
console.error('OpenAI profile requires a real API key. Use --api-key or set OPENAI_API_KEY.')
|
||||
process.exit(1)
|
||||
}
|
||||
env.OPENAI_API_KEY = key
|
||||
|
||||
env = builtEnv
|
||||
}
|
||||
|
||||
const profile: ProfileFile = {
|
||||
profile: selected,
|
||||
env,
|
||||
createdAt: new Date().toISOString(),
|
||||
}
|
||||
const profile = createProfileFile(selected, env)
|
||||
|
||||
const outputPath = resolve(process.cwd(), '.openclaude-profile.json')
|
||||
writeFileSync(outputPath, JSON.stringify(profile, null, 2), 'utf8')
|
||||
|
||||
console.log(`Saved profile: ${selected}`)
|
||||
console.log(`Goal: ${goal}`)
|
||||
console.log(`Model: ${profile.env.GEMINI_MODEL || profile.env.OPENAI_MODEL || getGoalDefaultOpenAIModel(goal)}`)
|
||||
console.log(`Path: ${outputPath}`)
|
||||
console.log('Next: bun run dev:profile')
|
||||
}
|
||||
|
||||
129
scripts/provider-discovery.ts
Normal file
129
scripts/provider-discovery.ts
Normal file
@@ -0,0 +1,129 @@
|
||||
import type { OllamaModelDescriptor } from '../src/utils/providerRecommendation.ts'
|
||||
|
||||
export const DEFAULT_OLLAMA_BASE_URL = 'http://localhost:11434'
|
||||
|
||||
function withTimeoutSignal(timeoutMs: number): {
|
||||
signal: AbortSignal
|
||||
clear: () => void
|
||||
} {
|
||||
const controller = new AbortController()
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs)
|
||||
return {
|
||||
signal: controller.signal,
|
||||
clear: () => clearTimeout(timeout),
|
||||
}
|
||||
}
|
||||
|
||||
function trimTrailingSlash(value: string): string {
|
||||
return value.replace(/\/+$/, '')
|
||||
}
|
||||
|
||||
export function getOllamaApiBaseUrl(baseUrl?: string): string {
|
||||
const parsed = new URL(
|
||||
baseUrl || process.env.OLLAMA_BASE_URL || DEFAULT_OLLAMA_BASE_URL,
|
||||
)
|
||||
const pathname = trimTrailingSlash(parsed.pathname)
|
||||
parsed.pathname = pathname.endsWith('/v1')
|
||||
? pathname.slice(0, -3) || '/'
|
||||
: pathname || '/'
|
||||
parsed.search = ''
|
||||
parsed.hash = ''
|
||||
return trimTrailingSlash(parsed.toString())
|
||||
}
|
||||
|
||||
export function getOllamaChatBaseUrl(baseUrl?: string): string {
|
||||
return `${getOllamaApiBaseUrl(baseUrl)}/v1`
|
||||
}
|
||||
|
||||
export async function hasLocalOllama(baseUrl?: string): Promise<boolean> {
|
||||
const { signal, clear } = withTimeoutSignal(1200)
|
||||
try {
|
||||
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
|
||||
method: 'GET',
|
||||
signal,
|
||||
})
|
||||
return response.ok
|
||||
} catch {
|
||||
return false
|
||||
} finally {
|
||||
clear()
|
||||
}
|
||||
}
|
||||
|
||||
export async function listOllamaModels(
|
||||
baseUrl?: string,
|
||||
): Promise<OllamaModelDescriptor[]> {
|
||||
const { signal, clear } = withTimeoutSignal(5000)
|
||||
try {
|
||||
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
|
||||
method: 'GET',
|
||||
signal,
|
||||
})
|
||||
if (!response.ok) {
|
||||
return []
|
||||
}
|
||||
|
||||
const data = await response.json() as {
|
||||
models?: Array<{
|
||||
name?: string
|
||||
size?: number
|
||||
details?: {
|
||||
family?: string
|
||||
families?: string[]
|
||||
parameter_size?: string
|
||||
quantization_level?: string
|
||||
}
|
||||
}>
|
||||
}
|
||||
|
||||
return (data.models ?? [])
|
||||
.filter(model => Boolean(model.name))
|
||||
.map(model => ({
|
||||
name: model.name!,
|
||||
sizeBytes: typeof model.size === 'number' ? model.size : null,
|
||||
family: model.details?.family ?? null,
|
||||
families: model.details?.families ?? [],
|
||||
parameterSize: model.details?.parameter_size ?? null,
|
||||
quantizationLevel: model.details?.quantization_level ?? null,
|
||||
}))
|
||||
} catch {
|
||||
return []
|
||||
} finally {
|
||||
clear()
|
||||
}
|
||||
}
|
||||
|
||||
export async function benchmarkOllamaModel(
|
||||
modelName: string,
|
||||
baseUrl?: string,
|
||||
): Promise<number | null> {
|
||||
const start = Date.now()
|
||||
const { signal, clear } = withTimeoutSignal(20000)
|
||||
try {
|
||||
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
signal,
|
||||
body: JSON.stringify({
|
||||
model: modelName,
|
||||
stream: false,
|
||||
messages: [{ role: 'user', content: 'Reply with OK.' }],
|
||||
options: {
|
||||
temperature: 0,
|
||||
num_predict: 8,
|
||||
},
|
||||
}),
|
||||
})
|
||||
if (!response.ok) {
|
||||
return null
|
||||
}
|
||||
await response.json()
|
||||
return Date.now() - start
|
||||
} catch {
|
||||
return null
|
||||
} finally {
|
||||
clear()
|
||||
}
|
||||
}
|
||||
@@ -3,43 +3,51 @@ import { spawn } from 'node:child_process'
|
||||
import { existsSync, readFileSync } from 'node:fs'
|
||||
import { resolve } from 'node:path'
|
||||
import {
|
||||
DEFAULT_CODEX_BASE_URL,
|
||||
resolveCodexApiCredentials,
|
||||
} from '../src/services/api/providerConfig.js'
|
||||
|
||||
type ProviderProfile = 'openai' | 'ollama' | 'codex' | 'gemini'
|
||||
|
||||
type ProfileFile = {
|
||||
profile: ProviderProfile
|
||||
env?: {
|
||||
OPENAI_BASE_URL?: string
|
||||
OPENAI_MODEL?: string
|
||||
OPENAI_API_KEY?: string
|
||||
CODEX_API_KEY?: string
|
||||
GEMINI_API_KEY?: string
|
||||
GEMINI_MODEL?: string
|
||||
GEMINI_BASE_URL?: string
|
||||
}
|
||||
}
|
||||
import {
|
||||
normalizeRecommendationGoal,
|
||||
recommendOllamaModel,
|
||||
} from '../src/utils/providerRecommendation.ts'
|
||||
import {
|
||||
buildLaunchEnv,
|
||||
selectAutoProfile,
|
||||
type ProfileFile,
|
||||
type ProviderProfile,
|
||||
} from '../src/utils/providerProfile.ts'
|
||||
import {
|
||||
getOllamaChatBaseUrl,
|
||||
hasLocalOllama,
|
||||
listOllamaModels,
|
||||
} from './provider-discovery.ts'
|
||||
|
||||
type LaunchOptions = {
|
||||
requestedProfile: ProviderProfile | 'auto' | null
|
||||
passthroughArgs: string[]
|
||||
fast: boolean
|
||||
goal: ReturnType<typeof normalizeRecommendationGoal>
|
||||
}
|
||||
|
||||
function parseLaunchOptions(argv: string[]): LaunchOptions {
|
||||
let requestedProfile: ProviderProfile | 'auto' | null = 'auto'
|
||||
const passthroughArgs: string[] = []
|
||||
let fast = false
|
||||
let goal = normalizeRecommendationGoal(process.env.OPENCLAUDE_PROFILE_GOAL)
|
||||
|
||||
for (const arg of argv) {
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const arg = argv[i]!
|
||||
const lower = arg.toLowerCase()
|
||||
if (lower === '--fast') {
|
||||
fast = true
|
||||
continue
|
||||
}
|
||||
|
||||
if (lower === '--goal') {
|
||||
goal = normalizeRecommendationGoal(argv[i + 1] ?? null)
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
if ((lower === 'auto' || lower === 'openai' || lower === 'ollama' || lower === 'codex' || lower === 'gemini') && requestedProfile === 'auto') {
|
||||
requestedProfile = lower as ProviderProfile | 'auto'
|
||||
continue
|
||||
@@ -62,6 +70,7 @@ function parseLaunchOptions(argv: string[]): LaunchOptions {
|
||||
requestedProfile,
|
||||
passthroughArgs,
|
||||
fast,
|
||||
goal,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,18 +88,12 @@ function loadPersistedProfile(): ProfileFile | null {
|
||||
}
|
||||
}
|
||||
|
||||
async function hasLocalOllama(): Promise<boolean> {
|
||||
const endpoint = 'http://localhost:11434/api/tags'
|
||||
const controller = new AbortController()
|
||||
const timeout = setTimeout(() => controller.abort(), 1200)
|
||||
try {
|
||||
const response = await fetch(endpoint, { signal: controller.signal })
|
||||
return response.ok
|
||||
} catch {
|
||||
return false
|
||||
} finally {
|
||||
clearTimeout(timeout)
|
||||
}
|
||||
async function resolveOllamaDefaultModel(
|
||||
goal: ReturnType<typeof normalizeRecommendationGoal>,
|
||||
): Promise<string | null> {
|
||||
const models = await listOllamaModels()
|
||||
const recommended = recommendOllamaModel(models, goal)
|
||||
return recommended?.name ?? null
|
||||
}
|
||||
|
||||
function runCommand(command: string, env: NodeJS.ProcessEnv): Promise<number> {
|
||||
@@ -107,57 +110,6 @@ function runCommand(command: string, env: NodeJS.ProcessEnv): Promise<number> {
|
||||
})
|
||||
}
|
||||
|
||||
function buildEnv(profile: ProviderProfile, persisted: ProfileFile | null): NodeJS.ProcessEnv {
|
||||
const persistedEnv = persisted?.env ?? {}
|
||||
|
||||
if (profile === 'gemini') {
|
||||
const env: NodeJS.ProcessEnv = {
|
||||
...process.env,
|
||||
CLAUDE_CODE_USE_GEMINI: '1',
|
||||
}
|
||||
delete env.CLAUDE_CODE_USE_OPENAI
|
||||
env.GEMINI_MODEL = process.env.GEMINI_MODEL || persistedEnv.GEMINI_MODEL || 'gemini-2.0-flash'
|
||||
env.GEMINI_API_KEY = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY || persistedEnv.GEMINI_API_KEY
|
||||
if (persistedEnv.GEMINI_BASE_URL || process.env.GEMINI_BASE_URL) {
|
||||
env.GEMINI_BASE_URL = process.env.GEMINI_BASE_URL || persistedEnv.GEMINI_BASE_URL
|
||||
}
|
||||
return env
|
||||
}
|
||||
|
||||
const env: NodeJS.ProcessEnv = {
|
||||
...process.env,
|
||||
CLAUDE_CODE_USE_OPENAI: '1',
|
||||
}
|
||||
|
||||
if (profile === 'ollama') {
|
||||
env.OPENAI_BASE_URL = persistedEnv.OPENAI_BASE_URL || process.env.OPENAI_BASE_URL || 'http://localhost:11434/v1'
|
||||
env.OPENAI_MODEL = persistedEnv.OPENAI_MODEL || process.env.OPENAI_MODEL || 'llama3.1:8b'
|
||||
if (!process.env.OPENAI_API_KEY || process.env.OPENAI_API_KEY === 'SUA_CHAVE') {
|
||||
delete env.OPENAI_API_KEY
|
||||
}
|
||||
return env
|
||||
}
|
||||
|
||||
if (profile === 'codex') {
|
||||
env.OPENAI_BASE_URL =
|
||||
process.env.OPENAI_BASE_URL ||
|
||||
persistedEnv.OPENAI_BASE_URL ||
|
||||
DEFAULT_CODEX_BASE_URL
|
||||
env.OPENAI_MODEL =
|
||||
process.env.OPENAI_MODEL ||
|
||||
persistedEnv.OPENAI_MODEL ||
|
||||
'codexplan'
|
||||
env.CODEX_API_KEY =
|
||||
process.env.CODEX_API_KEY || persistedEnv.CODEX_API_KEY
|
||||
return env
|
||||
}
|
||||
|
||||
env.OPENAI_BASE_URL = process.env.OPENAI_BASE_URL || persistedEnv.OPENAI_BASE_URL || 'https://api.openai.com/v1'
|
||||
env.OPENAI_MODEL = process.env.OPENAI_MODEL || persistedEnv.OPENAI_MODEL || 'gpt-4o'
|
||||
env.OPENAI_API_KEY = process.env.OPENAI_API_KEY || persistedEnv.OPENAI_API_KEY
|
||||
return env
|
||||
}
|
||||
|
||||
function applyFastFlags(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv {
|
||||
env.CLAUDE_CODE_SIMPLE ??= '1'
|
||||
env.CLAUDE_CODE_DISABLE_THINKING ??= '1'
|
||||
@@ -193,24 +145,45 @@ async function main(): Promise<void> {
|
||||
const options = parseLaunchOptions(process.argv.slice(2))
|
||||
const requestedProfile = options.requestedProfile
|
||||
if (!requestedProfile) {
|
||||
console.error('Usage: bun run scripts/provider-launch.ts [openai|ollama|codex|gemini|auto] [--fast] [-- <cli args>]')
|
||||
console.error('Usage: bun run scripts/provider-launch.ts [openai|ollama|codex|gemini|auto] [--fast] [--goal <latency|balanced|coding>] [-- <cli args>]')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
const persisted = loadPersistedProfile()
|
||||
let profile: ProviderProfile
|
||||
let resolvedOllamaModel: string | null = null
|
||||
|
||||
if (requestedProfile === 'auto') {
|
||||
if (persisted) {
|
||||
profile = persisted.profile
|
||||
} else if (await hasLocalOllama()) {
|
||||
resolvedOllamaModel = await resolveOllamaDefaultModel(options.goal)
|
||||
profile = selectAutoProfile(resolvedOllamaModel)
|
||||
} else {
|
||||
profile = (await hasLocalOllama()) ? 'ollama' : 'openai'
|
||||
profile = 'openai'
|
||||
}
|
||||
} else {
|
||||
profile = requestedProfile
|
||||
}
|
||||
|
||||
const env = buildEnv(profile, persisted)
|
||||
if (
|
||||
profile === 'ollama' &&
|
||||
(persisted?.profile !== 'ollama' || !persisted?.env?.OPENAI_MODEL)
|
||||
) {
|
||||
resolvedOllamaModel ??= await resolveOllamaDefaultModel(options.goal)
|
||||
if (!resolvedOllamaModel) {
|
||||
console.error('No viable Ollama chat model was discovered. Pull a chat model first or save one with `bun run profile:init -- --provider ollama --model <model>`.')
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
const env = await buildLaunchEnv({
|
||||
profile,
|
||||
persisted,
|
||||
goal: options.goal,
|
||||
getOllamaChatBaseUrl,
|
||||
resolveOllamaDefaultModel: async () => resolvedOllamaModel || 'llama3.1:8b',
|
||||
})
|
||||
if (options.fast) {
|
||||
applyFastFlags(env)
|
||||
}
|
||||
@@ -234,6 +207,11 @@ async function main(): Promise<void> {
|
||||
console.error(`CODEX_API_KEY is required for codex profile${authHint}. Run: bun run profile:init -- --provider codex --model codexplan`)
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
if (!credentials.accountId) {
|
||||
console.error('CHATGPT_ACCOUNT_ID is required for codex profile. Set CHATGPT_ACCOUNT_ID/CODEX_ACCOUNT_ID or use an auth.json that includes it.')
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
printSummary(profile, env)
|
||||
|
||||
270
scripts/provider-recommend.ts
Normal file
270
scripts/provider-recommend.ts
Normal file
@@ -0,0 +1,270 @@
|
||||
// @ts-nocheck
|
||||
import { writeFileSync } from 'node:fs'
|
||||
import { resolve } from 'node:path'
|
||||
|
||||
import {
|
||||
applyBenchmarkLatency,
|
||||
getGoalDefaultOpenAIModel,
|
||||
isViableOllamaChatModel,
|
||||
normalizeRecommendationGoal,
|
||||
rankOllamaModels,
|
||||
selectRecommendedOllamaModel,
|
||||
type BenchmarkedOllamaModel,
|
||||
type RecommendationGoal,
|
||||
} from '../src/utils/providerRecommendation.ts'
|
||||
import {
|
||||
buildOllamaProfileEnv,
|
||||
buildOpenAIProfileEnv,
|
||||
createProfileFile,
|
||||
sanitizeApiKey,
|
||||
type ProfileFile,
|
||||
type ProviderProfile,
|
||||
} from '../src/utils/providerProfile.ts'
|
||||
import {
|
||||
benchmarkOllamaModel,
|
||||
getOllamaChatBaseUrl,
|
||||
hasLocalOllama,
|
||||
listOllamaModels,
|
||||
} from './provider-discovery.ts'
|
||||
|
||||
type CliOptions = {
|
||||
apply: boolean
|
||||
benchmark: boolean
|
||||
goal: RecommendationGoal
|
||||
json: boolean
|
||||
provider: ProviderProfile | 'auto'
|
||||
baseUrl: string | null
|
||||
}
|
||||
|
||||
function parseOptions(argv: string[]): CliOptions {
|
||||
const options: CliOptions = {
|
||||
apply: false,
|
||||
benchmark: false,
|
||||
goal: normalizeRecommendationGoal(process.env.OPENCLAUDE_PROFILE_GOAL),
|
||||
json: false,
|
||||
provider: 'auto',
|
||||
baseUrl: null,
|
||||
}
|
||||
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const arg = argv[i]?.toLowerCase()
|
||||
if (!arg) continue
|
||||
|
||||
if (arg === '--apply') {
|
||||
options.apply = true
|
||||
continue
|
||||
}
|
||||
if (arg === '--benchmark') {
|
||||
options.benchmark = true
|
||||
continue
|
||||
}
|
||||
if (arg === '--json') {
|
||||
options.json = true
|
||||
continue
|
||||
}
|
||||
if (arg === '--goal') {
|
||||
options.goal = normalizeRecommendationGoal(argv[i + 1] ?? null)
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if (arg === '--provider') {
|
||||
const provider = argv[i + 1]?.toLowerCase()
|
||||
if (
|
||||
provider === 'openai' ||
|
||||
provider === 'ollama' ||
|
||||
provider === 'auto'
|
||||
) {
|
||||
options.provider = provider
|
||||
}
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if (arg === '--base-url') {
|
||||
options.baseUrl = argv[i + 1] ?? null
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
return options
|
||||
}
|
||||
|
||||
function printHumanSummary(payload: {
|
||||
goal: RecommendationGoal
|
||||
recommendedProfile: ProviderProfile
|
||||
recommendedModel: string
|
||||
rankedModels: BenchmarkedOllamaModel[]
|
||||
benchmarked: boolean
|
||||
applied: boolean
|
||||
}): void {
|
||||
console.log(`Recommendation goal: ${payload.goal}`)
|
||||
console.log(`Recommended profile: ${payload.recommendedProfile}`)
|
||||
console.log(`Recommended model: ${payload.recommendedModel}`)
|
||||
|
||||
if (payload.rankedModels.length > 0) {
|
||||
console.log('\nRanked Ollama models:')
|
||||
for (const [index, model] of payload.rankedModels.slice(0, 5).entries()) {
|
||||
const benchmarkPart =
|
||||
payload.benchmarked && model.benchmarkMs !== null
|
||||
? ` | ${Math.round(model.benchmarkMs)}ms`
|
||||
: ''
|
||||
console.log(
|
||||
`${index + 1}. ${model.name} | score=${model.score}${benchmarkPart} | ${model.summary}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if (payload.applied) {
|
||||
console.log('\nSaved .openclaude-profile.json with the recommended profile.')
|
||||
console.log('Next: bun run dev:profile')
|
||||
} else {
|
||||
console.log(
|
||||
'\nTip: run `bun run profile:auto -- --goal ' +
|
||||
payload.goal +
|
||||
'` to apply this automatically.',
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async function maybeApplyProfile(
|
||||
profile: ProviderProfile,
|
||||
model: string,
|
||||
goal: RecommendationGoal,
|
||||
baseUrl: string | null,
|
||||
): Promise<boolean> {
|
||||
let env: ProfileFile['env'] | null
|
||||
if (profile === 'ollama') {
|
||||
env = buildOllamaProfileEnv(model, {
|
||||
baseUrl,
|
||||
getOllamaChatBaseUrl,
|
||||
})
|
||||
} else {
|
||||
env = buildOpenAIProfileEnv({
|
||||
goal,
|
||||
model: model || getGoalDefaultOpenAIModel(goal),
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
processEnv: process.env,
|
||||
})
|
||||
|
||||
if (!env) {
|
||||
console.error('Cannot apply an OpenAI profile without OPENAI_API_KEY.')
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
const profileFile = createProfileFile(profile, env)
|
||||
|
||||
writeFileSync(
|
||||
resolve(process.cwd(), '.openclaude-profile.json'),
|
||||
JSON.stringify(profileFile, null, 2),
|
||||
'utf8',
|
||||
)
|
||||
return true
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const options = parseOptions(process.argv.slice(2))
|
||||
const ollamaAvailable =
|
||||
options.provider !== 'openai' &&
|
||||
(await hasLocalOllama(options.baseUrl ?? undefined))
|
||||
const ollamaModels = ollamaAvailable
|
||||
? await listOllamaModels(options.baseUrl ?? undefined)
|
||||
: []
|
||||
|
||||
const heuristicRanked = rankOllamaModels(ollamaModels, options.goal)
|
||||
const benchmarkInput = options.benchmark
|
||||
? heuristicRanked.filter(isViableOllamaChatModel).slice(0, 3)
|
||||
: []
|
||||
|
||||
const benchmarkResults: Record<string, number | null> = {}
|
||||
for (const model of benchmarkInput) {
|
||||
benchmarkResults[model.name] = await benchmarkOllamaModel(
|
||||
model.name,
|
||||
options.baseUrl ?? undefined,
|
||||
)
|
||||
}
|
||||
|
||||
const rankedModels: BenchmarkedOllamaModel[] = options.benchmark
|
||||
? applyBenchmarkLatency(heuristicRanked, benchmarkResults, options.goal)
|
||||
: heuristicRanked.map(model => ({
|
||||
...model,
|
||||
benchmarkMs: null,
|
||||
}))
|
||||
|
||||
const recommendedOllama = selectRecommendedOllamaModel(rankedModels)
|
||||
const openAIConfigured = Boolean(sanitizeApiKey(process.env.OPENAI_API_KEY))
|
||||
|
||||
let recommendedProfile: ProviderProfile
|
||||
let recommendedModel: string
|
||||
|
||||
if (options.provider === 'openai') {
|
||||
recommendedProfile = 'openai'
|
||||
recommendedModel = getGoalDefaultOpenAIModel(options.goal)
|
||||
} else if (options.provider === 'ollama') {
|
||||
if (!recommendedOllama) {
|
||||
console.error(
|
||||
'No Ollama models were discovered. Pull a model first or switch to --provider openai.',
|
||||
)
|
||||
process.exit(1)
|
||||
}
|
||||
recommendedProfile = 'ollama'
|
||||
recommendedModel = recommendedOllama.name
|
||||
} else if (recommendedOllama) {
|
||||
recommendedProfile = 'ollama'
|
||||
recommendedModel = recommendedOllama.name
|
||||
} else {
|
||||
recommendedProfile = 'openai'
|
||||
recommendedModel = getGoalDefaultOpenAIModel(options.goal)
|
||||
}
|
||||
|
||||
let applied = false
|
||||
if (options.apply) {
|
||||
applied = await maybeApplyProfile(
|
||||
recommendedProfile,
|
||||
recommendedModel,
|
||||
options.goal,
|
||||
options.baseUrl,
|
||||
)
|
||||
if (!applied) {
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
const payload = {
|
||||
goal: options.goal,
|
||||
provider: options.provider,
|
||||
ollamaAvailable,
|
||||
openAIConfigured,
|
||||
recommendedProfile,
|
||||
recommendedModel,
|
||||
benchmarked: options.benchmark,
|
||||
rankedModels,
|
||||
applied,
|
||||
}
|
||||
|
||||
if (options.json) {
|
||||
console.log(JSON.stringify(payload, null, 2))
|
||||
return
|
||||
}
|
||||
|
||||
printHumanSummary({
|
||||
goal: options.goal,
|
||||
recommendedProfile,
|
||||
recommendedModel,
|
||||
rankedModels,
|
||||
benchmarked: options.benchmark,
|
||||
applied,
|
||||
})
|
||||
|
||||
if (!recommendedOllama && !openAIConfigured) {
|
||||
console.log(
|
||||
'\nNo local Ollama model was detected and OPENAI_API_KEY is unset.',
|
||||
)
|
||||
console.log(
|
||||
'Next steps: `ollama pull qwen2.5-coder:7b` or set OPENAI_API_KEY.',
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
await main()
|
||||
|
||||
export {}
|
||||
383
src/utils/providerProfile.test.ts
Normal file
383
src/utils/providerProfile.test.ts
Normal file
@@ -0,0 +1,383 @@
|
||||
import assert from 'node:assert/strict'
|
||||
import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import test from 'node:test'
|
||||
|
||||
import {
|
||||
buildCodexProfileEnv,
|
||||
buildGeminiProfileEnv,
|
||||
buildLaunchEnv,
|
||||
buildOllamaProfileEnv,
|
||||
buildOpenAIProfileEnv,
|
||||
selectAutoProfile,
|
||||
type ProfileFile,
|
||||
} from './providerProfile.ts'
|
||||
|
||||
function profile(profile: ProfileFile['profile'], env: ProfileFile['env']): ProfileFile {
|
||||
return {
|
||||
profile,
|
||||
env,
|
||||
createdAt: '2026-04-01T00:00:00.000Z',
|
||||
}
|
||||
}
|
||||
|
||||
const missingCodexAuthPath = join(tmpdir(), 'openclaude-missing-codex-auth.json')
|
||||
|
||||
test('matching persisted ollama env is reused for ollama launch', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'ollama',
|
||||
persisted: profile('ollama', {
|
||||
OPENAI_BASE_URL: 'http://127.0.0.1:11435/v1',
|
||||
OPENAI_MODEL: 'mistral:7b-instruct',
|
||||
}),
|
||||
goal: 'balanced',
|
||||
processEnv: {},
|
||||
getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
|
||||
resolveOllamaDefaultModel: async () => 'llama3.1:8b',
|
||||
})
|
||||
|
||||
assert.equal(env.OPENAI_BASE_URL, 'http://127.0.0.1:11435/v1')
|
||||
assert.equal(env.OPENAI_MODEL, 'mistral:7b-instruct')
|
||||
})
|
||||
|
||||
test('ollama launch ignores mismatched persisted openai env and shell model fallback', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'ollama',
|
||||
persisted: profile('openai', {
|
||||
OPENAI_BASE_URL: 'https://api.openai.com/v1',
|
||||
OPENAI_MODEL: 'gpt-4o',
|
||||
OPENAI_API_KEY: 'sk-persisted',
|
||||
}),
|
||||
goal: 'coding',
|
||||
processEnv: {
|
||||
OPENAI_BASE_URL: 'https://api.deepseek.com/v1',
|
||||
OPENAI_MODEL: 'gpt-4o-mini',
|
||||
OPENAI_API_KEY: 'sk-live',
|
||||
CODEX_API_KEY: 'codex-live',
|
||||
CHATGPT_ACCOUNT_ID: 'acct_live',
|
||||
},
|
||||
getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
|
||||
resolveOllamaDefaultModel: async () => 'qwen2.5-coder:7b',
|
||||
})
|
||||
|
||||
assert.equal(env.OPENAI_BASE_URL, 'http://localhost:11434/v1')
|
||||
assert.equal(env.OPENAI_MODEL, 'qwen2.5-coder:7b')
|
||||
assert.equal(env.OPENAI_API_KEY, undefined)
|
||||
assert.equal(env.CODEX_API_KEY, undefined)
|
||||
assert.equal(env.CHATGPT_ACCOUNT_ID, undefined)
|
||||
})
|
||||
|
||||
test('openai launch ignores mismatched persisted ollama env', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'openai',
|
||||
persisted: profile('ollama', {
|
||||
OPENAI_BASE_URL: 'http://localhost:11434/v1',
|
||||
OPENAI_MODEL: 'llama3.1:8b',
|
||||
}),
|
||||
goal: 'latency',
|
||||
processEnv: {
|
||||
OPENAI_API_KEY: 'sk-live',
|
||||
CODEX_API_KEY: 'codex-live',
|
||||
CHATGPT_ACCOUNT_ID: 'acct_live',
|
||||
},
|
||||
getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
|
||||
resolveOllamaDefaultModel: async () => 'llama3.1:8b',
|
||||
})
|
||||
|
||||
assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1')
|
||||
assert.equal(env.OPENAI_MODEL, 'gpt-4o-mini')
|
||||
assert.equal(env.OPENAI_API_KEY, 'sk-live')
|
||||
assert.equal(env.CODEX_API_KEY, undefined)
|
||||
assert.equal(env.CHATGPT_ACCOUNT_ID, undefined)
|
||||
})
|
||||
|
||||
test('openai launch ignores codex shell transport hints', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'openai',
|
||||
persisted: null,
|
||||
goal: 'balanced',
|
||||
processEnv: {
|
||||
OPENAI_API_KEY: 'sk-live',
|
||||
OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
|
||||
OPENAI_MODEL: 'codexplan',
|
||||
},
|
||||
})
|
||||
|
||||
assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1')
|
||||
assert.equal(env.OPENAI_MODEL, 'gpt-4o')
|
||||
assert.equal(env.OPENAI_API_KEY, 'sk-live')
|
||||
})
|
||||
|
||||
test('openai launch ignores codex persisted transport hints', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'openai',
|
||||
persisted: profile('openai', {
|
||||
OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
|
||||
OPENAI_MODEL: 'codexplan',
|
||||
OPENAI_API_KEY: 'sk-persisted',
|
||||
}),
|
||||
goal: 'balanced',
|
||||
processEnv: {
|
||||
OPENAI_API_KEY: 'sk-live',
|
||||
},
|
||||
})
|
||||
|
||||
assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1')
|
||||
assert.equal(env.OPENAI_MODEL, 'gpt-4o')
|
||||
assert.equal(env.OPENAI_API_KEY, 'sk-live')
|
||||
})
|
||||
|
||||
test('matching persisted gemini env is reused for gemini launch', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'gemini',
|
||||
persisted: profile('gemini', {
|
||||
GEMINI_MODEL: 'gemini-2.5-flash',
|
||||
GEMINI_API_KEY: 'gem-persisted',
|
||||
GEMINI_BASE_URL: 'https://example.test/v1beta/openai',
|
||||
}),
|
||||
goal: 'balanced',
|
||||
processEnv: {},
|
||||
})
|
||||
|
||||
assert.equal(env.CLAUDE_CODE_USE_GEMINI, '1')
|
||||
assert.equal(env.CLAUDE_CODE_USE_OPENAI, undefined)
|
||||
assert.equal(env.GEMINI_MODEL, 'gemini-2.5-flash')
|
||||
assert.equal(env.GEMINI_API_KEY, 'gem-persisted')
|
||||
assert.equal(env.GEMINI_BASE_URL, 'https://example.test/v1beta/openai')
|
||||
})
|
||||
|
||||
test('gemini launch ignores mismatched persisted openai env and strips other provider secrets', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'gemini',
|
||||
persisted: profile('openai', {
|
||||
OPENAI_BASE_URL: 'https://api.openai.com/v1',
|
||||
OPENAI_MODEL: 'gpt-4o',
|
||||
OPENAI_API_KEY: 'sk-persisted',
|
||||
}),
|
||||
goal: 'balanced',
|
||||
processEnv: {
|
||||
GEMINI_API_KEY: 'gem-live',
|
||||
GOOGLE_API_KEY: 'google-live',
|
||||
OPENAI_API_KEY: 'sk-live',
|
||||
OPENAI_BASE_URL: 'https://api.openai.com/v1',
|
||||
OPENAI_MODEL: 'gpt-4o-mini',
|
||||
CODEX_API_KEY: 'codex-live',
|
||||
CHATGPT_ACCOUNT_ID: 'acct_live',
|
||||
CLAUDE_CODE_USE_OPENAI: '1',
|
||||
},
|
||||
})
|
||||
|
||||
assert.equal(env.CLAUDE_CODE_USE_GEMINI, '1')
|
||||
assert.equal(env.CLAUDE_CODE_USE_OPENAI, undefined)
|
||||
assert.equal(env.GEMINI_MODEL, 'gemini-2.0-flash')
|
||||
assert.equal(env.GEMINI_API_KEY, 'gem-live')
|
||||
assert.equal(
|
||||
env.GEMINI_BASE_URL,
|
||||
'https://generativelanguage.googleapis.com/v1beta/openai',
|
||||
)
|
||||
assert.equal(env.GOOGLE_API_KEY, undefined)
|
||||
assert.equal(env.OPENAI_API_KEY, undefined)
|
||||
assert.equal(env.CODEX_API_KEY, undefined)
|
||||
assert.equal(env.CHATGPT_ACCOUNT_ID, undefined)
|
||||
})
|
||||
|
||||
test('matching persisted codex env is reused for codex launch', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'codex',
|
||||
persisted: profile('codex', {
|
||||
OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
|
||||
OPENAI_MODEL: 'codexspark',
|
||||
CODEX_API_KEY: 'codex-persisted',
|
||||
CHATGPT_ACCOUNT_ID: 'acct_persisted',
|
||||
}),
|
||||
goal: 'balanced',
|
||||
processEnv: {
|
||||
CODEX_AUTH_JSON_PATH: missingCodexAuthPath,
|
||||
},
|
||||
})
|
||||
|
||||
assert.equal(env.OPENAI_BASE_URL, 'https://chatgpt.com/backend-api/codex')
|
||||
assert.equal(env.OPENAI_MODEL, 'codexspark')
|
||||
assert.equal(env.CODEX_API_KEY, 'codex-persisted')
|
||||
assert.equal(env.CHATGPT_ACCOUNT_ID, 'acct_persisted')
|
||||
})
|
||||
|
||||
test('codex launch normalizes poisoned persisted base urls', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'codex',
|
||||
persisted: profile('codex', {
|
||||
OPENAI_BASE_URL: 'https://api.openai.com/v1',
|
||||
OPENAI_MODEL: 'codexspark',
|
||||
CHATGPT_ACCOUNT_ID: 'acct_persisted',
|
||||
}),
|
||||
goal: 'balanced',
|
||||
processEnv: {
|
||||
CODEX_AUTH_JSON_PATH: missingCodexAuthPath,
|
||||
},
|
||||
})
|
||||
|
||||
assert.equal(env.OPENAI_BASE_URL, 'https://chatgpt.com/backend-api/codex')
|
||||
assert.equal(env.OPENAI_MODEL, 'codexspark')
|
||||
})
|
||||
|
||||
test('codex launch ignores mismatched persisted openai env', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'codex',
|
||||
persisted: profile('openai', {
|
||||
OPENAI_BASE_URL: 'https://api.openai.com/v1',
|
||||
OPENAI_MODEL: 'gpt-4o',
|
||||
OPENAI_API_KEY: 'sk-persisted',
|
||||
}),
|
||||
goal: 'balanced',
|
||||
processEnv: {
|
||||
OPENAI_BASE_URL: 'https://api.openai.com/v1',
|
||||
OPENAI_MODEL: 'gpt-4o-mini',
|
||||
OPENAI_API_KEY: 'sk-live',
|
||||
CODEX_API_KEY: 'codex-live',
|
||||
CHATGPT_ACCOUNT_ID: 'acct_live',
|
||||
},
|
||||
})
|
||||
|
||||
assert.equal(env.OPENAI_BASE_URL, 'https://chatgpt.com/backend-api/codex')
|
||||
assert.equal(env.OPENAI_MODEL, 'codexplan')
|
||||
assert.equal(env.OPENAI_API_KEY, undefined)
|
||||
assert.equal(env.CODEX_API_KEY, 'codex-live')
|
||||
assert.equal(env.CHATGPT_ACCOUNT_ID, 'acct_live')
|
||||
})
|
||||
|
||||
test('codex launch ignores placeholder codex env keys', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'codex',
|
||||
persisted: profile('codex', {
|
||||
OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
|
||||
OPENAI_MODEL: 'codexspark',
|
||||
CODEX_API_KEY: 'codex-persisted',
|
||||
CHATGPT_ACCOUNT_ID: 'acct_persisted',
|
||||
}),
|
||||
goal: 'balanced',
|
||||
processEnv: {
|
||||
CODEX_API_KEY: 'SUA_CHAVE',
|
||||
CODEX_AUTH_JSON_PATH: missingCodexAuthPath,
|
||||
},
|
||||
})
|
||||
|
||||
assert.equal(env.CODEX_API_KEY, 'codex-persisted')
|
||||
assert.equal(env.CHATGPT_ACCOUNT_ID, 'acct_persisted')
|
||||
})
|
||||
|
||||
test('codex launch prefers auth account id over stale persisted value', async () => {
|
||||
const codexHome = mkdtempSync(join(tmpdir(), 'openclaude-codex-'))
|
||||
try {
|
||||
writeFileSync(
|
||||
join(codexHome, 'auth.json'),
|
||||
JSON.stringify({
|
||||
access_token: 'codex-live',
|
||||
account_id: 'acct_auth',
|
||||
}),
|
||||
'utf8',
|
||||
)
|
||||
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'codex',
|
||||
persisted: profile('codex', {
|
||||
OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
|
||||
OPENAI_MODEL: 'codexspark',
|
||||
CHATGPT_ACCOUNT_ID: 'acct_persisted',
|
||||
}),
|
||||
goal: 'balanced',
|
||||
processEnv: {
|
||||
CODEX_HOME: codexHome,
|
||||
},
|
||||
})
|
||||
|
||||
assert.equal(env.CHATGPT_ACCOUNT_ID, 'acct_auth')
|
||||
} finally {
|
||||
rmSync(codexHome, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
test('ollama profiles never persist openai api keys', () => {
|
||||
const env = buildOllamaProfileEnv('llama3.1:8b', {
|
||||
getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
|
||||
})
|
||||
|
||||
assert.deepEqual(env, {
|
||||
OPENAI_BASE_URL: 'http://localhost:11434/v1',
|
||||
OPENAI_MODEL: 'llama3.1:8b',
|
||||
})
|
||||
assert.equal('OPENAI_API_KEY' in env, false)
|
||||
})
|
||||
|
||||
test('codex profiles accept explicit codex credentials', () => {
|
||||
const env = buildCodexProfileEnv({
|
||||
model: 'codexspark',
|
||||
apiKey: 'codex-live',
|
||||
processEnv: {
|
||||
CHATGPT_ACCOUNT_ID: 'acct_123',
|
||||
},
|
||||
})
|
||||
|
||||
assert.deepEqual(env, {
|
||||
OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
|
||||
OPENAI_MODEL: 'codexspark',
|
||||
CODEX_API_KEY: 'codex-live',
|
||||
CHATGPT_ACCOUNT_ID: 'acct_123',
|
||||
})
|
||||
})
|
||||
|
||||
test('codex profiles require a chatgpt account id', () => {
|
||||
const env = buildCodexProfileEnv({
|
||||
model: 'codexspark',
|
||||
apiKey: 'codex-live',
|
||||
processEnv: {
|
||||
CODEX_AUTH_JSON_PATH: missingCodexAuthPath,
|
||||
},
|
||||
})
|
||||
|
||||
assert.equal(env, null)
|
||||
})
|
||||
|
||||
test('gemini profiles accept google api key fallback', () => {
|
||||
const env = buildGeminiProfileEnv({
|
||||
processEnv: {
|
||||
GOOGLE_API_KEY: 'gem-live',
|
||||
},
|
||||
})
|
||||
|
||||
assert.deepEqual(env, {
|
||||
GEMINI_MODEL: 'gemini-2.0-flash',
|
||||
GEMINI_API_KEY: 'gem-live',
|
||||
})
|
||||
})
|
||||
|
||||
test('gemini profiles require a key', () => {
|
||||
const env = buildGeminiProfileEnv({
|
||||
processEnv: {},
|
||||
})
|
||||
|
||||
assert.equal(env, null)
|
||||
})
|
||||
|
||||
test('openai profiles ignore codex shell transport hints', () => {
|
||||
const env = buildOpenAIProfileEnv({
|
||||
goal: 'balanced',
|
||||
apiKey: 'sk-live',
|
||||
processEnv: {
|
||||
OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
|
||||
OPENAI_MODEL: 'codexplan',
|
||||
OPENAI_API_KEY: 'sk-live',
|
||||
},
|
||||
})
|
||||
|
||||
assert.deepEqual(env, {
|
||||
OPENAI_BASE_URL: 'https://api.openai.com/v1',
|
||||
OPENAI_MODEL: 'gpt-4o',
|
||||
OPENAI_API_KEY: 'sk-live',
|
||||
})
|
||||
})
|
||||
|
||||
test('auto profile falls back to openai when no viable ollama model exists', () => {
|
||||
assert.equal(selectAutoProfile(null), 'openai')
|
||||
assert.equal(selectAutoProfile('qwen2.5-coder:7b'), 'ollama')
|
||||
})
|
||||
314
src/utils/providerProfile.ts
Normal file
314
src/utils/providerProfile.ts
Normal file
@@ -0,0 +1,314 @@
|
||||
import {
|
||||
DEFAULT_CODEX_BASE_URL,
|
||||
DEFAULT_OPENAI_BASE_URL,
|
||||
isCodexBaseUrl,
|
||||
resolveCodexApiCredentials,
|
||||
resolveProviderRequest,
|
||||
} from '../services/api/providerConfig.ts'
|
||||
import {
|
||||
getGoalDefaultOpenAIModel,
|
||||
type RecommendationGoal,
|
||||
} from './providerRecommendation.ts'
|
||||
|
||||
const DEFAULT_GEMINI_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta/openai'
|
||||
const DEFAULT_GEMINI_MODEL = 'gemini-2.0-flash'
|
||||
|
||||
export type ProviderProfile = 'openai' | 'ollama' | 'codex' | 'gemini'
|
||||
|
||||
export type ProfileEnv = {
|
||||
OPENAI_BASE_URL?: string
|
||||
OPENAI_MODEL?: string
|
||||
OPENAI_API_KEY?: string
|
||||
CODEX_API_KEY?: string
|
||||
CHATGPT_ACCOUNT_ID?: string
|
||||
CODEX_ACCOUNT_ID?: string
|
||||
GEMINI_API_KEY?: string
|
||||
GEMINI_MODEL?: string
|
||||
GEMINI_BASE_URL?: string
|
||||
}
|
||||
|
||||
export type ProfileFile = {
|
||||
profile: ProviderProfile
|
||||
env: ProfileEnv
|
||||
createdAt: string
|
||||
}
|
||||
|
||||
export function sanitizeApiKey(
|
||||
key: string | null | undefined,
|
||||
): string | undefined {
|
||||
if (!key || key === 'SUA_CHAVE') return undefined
|
||||
return key
|
||||
}
|
||||
|
||||
export function buildOllamaProfileEnv(
|
||||
model: string,
|
||||
options: {
|
||||
baseUrl?: string | null
|
||||
getOllamaChatBaseUrl: (baseUrl?: string) => string
|
||||
},
|
||||
): ProfileEnv {
|
||||
return {
|
||||
OPENAI_BASE_URL: options.getOllamaChatBaseUrl(options.baseUrl ?? undefined),
|
||||
OPENAI_MODEL: model,
|
||||
}
|
||||
}
|
||||
|
||||
export function buildGeminiProfileEnv(options: {
|
||||
model?: string | null
|
||||
baseUrl?: string | null
|
||||
apiKey?: string | null
|
||||
processEnv?: NodeJS.ProcessEnv
|
||||
}): ProfileEnv | null {
|
||||
const processEnv = options.processEnv ?? process.env
|
||||
const key = sanitizeApiKey(
|
||||
options.apiKey ??
|
||||
processEnv.GEMINI_API_KEY ??
|
||||
processEnv.GOOGLE_API_KEY,
|
||||
)
|
||||
if (!key) {
|
||||
return null
|
||||
}
|
||||
|
||||
const env: ProfileEnv = {
|
||||
GEMINI_MODEL:
|
||||
options.model || processEnv.GEMINI_MODEL || DEFAULT_GEMINI_MODEL,
|
||||
GEMINI_API_KEY: key,
|
||||
}
|
||||
|
||||
const baseUrl = options.baseUrl || processEnv.GEMINI_BASE_URL
|
||||
if (baseUrl) {
|
||||
env.GEMINI_BASE_URL = baseUrl
|
||||
}
|
||||
|
||||
return env
|
||||
}
|
||||
|
||||
export function buildOpenAIProfileEnv(options: {
|
||||
goal: RecommendationGoal
|
||||
model?: string | null
|
||||
baseUrl?: string | null
|
||||
apiKey?: string | null
|
||||
processEnv?: NodeJS.ProcessEnv
|
||||
}): ProfileEnv | null {
|
||||
const processEnv = options.processEnv ?? process.env
|
||||
const key = sanitizeApiKey(options.apiKey ?? processEnv.OPENAI_API_KEY)
|
||||
if (!key) {
|
||||
return null
|
||||
}
|
||||
|
||||
const defaultModel = getGoalDefaultOpenAIModel(options.goal)
|
||||
const shellOpenAIRequest = resolveProviderRequest({
|
||||
model: processEnv.OPENAI_MODEL,
|
||||
baseUrl: processEnv.OPENAI_BASE_URL,
|
||||
fallbackModel: defaultModel,
|
||||
})
|
||||
const useShellOpenAIConfig = shellOpenAIRequest.transport === 'chat_completions'
|
||||
|
||||
return {
|
||||
OPENAI_BASE_URL:
|
||||
options.baseUrl ||
|
||||
(useShellOpenAIConfig ? processEnv.OPENAI_BASE_URL : undefined) ||
|
||||
DEFAULT_OPENAI_BASE_URL,
|
||||
OPENAI_MODEL:
|
||||
options.model ||
|
||||
(useShellOpenAIConfig ? processEnv.OPENAI_MODEL : undefined) ||
|
||||
defaultModel,
|
||||
OPENAI_API_KEY: key,
|
||||
}
|
||||
}
|
||||
|
||||
export function buildCodexProfileEnv(options: {
|
||||
model?: string | null
|
||||
baseUrl?: string | null
|
||||
apiKey?: string | null
|
||||
processEnv?: NodeJS.ProcessEnv
|
||||
}): ProfileEnv | null {
|
||||
const processEnv = options.processEnv ?? process.env
|
||||
const key = sanitizeApiKey(options.apiKey ?? processEnv.CODEX_API_KEY)
|
||||
const credentialEnv = key
|
||||
? ({ ...processEnv, CODEX_API_KEY: key } as NodeJS.ProcessEnv)
|
||||
: processEnv
|
||||
const credentials = resolveCodexApiCredentials(credentialEnv)
|
||||
if (!credentials.apiKey || !credentials.accountId) {
|
||||
return null
|
||||
}
|
||||
|
||||
const env: ProfileEnv = {
|
||||
OPENAI_BASE_URL: options.baseUrl || DEFAULT_CODEX_BASE_URL,
|
||||
OPENAI_MODEL: options.model || 'codexplan',
|
||||
}
|
||||
|
||||
if (key) {
|
||||
env.CODEX_API_KEY = key
|
||||
}
|
||||
|
||||
env.CHATGPT_ACCOUNT_ID = credentials.accountId
|
||||
|
||||
return env
|
||||
}
|
||||
|
||||
export function createProfileFile(
|
||||
profile: ProviderProfile,
|
||||
env: ProfileEnv,
|
||||
): ProfileFile {
|
||||
return {
|
||||
profile,
|
||||
env,
|
||||
createdAt: new Date().toISOString(),
|
||||
}
|
||||
}
|
||||
|
||||
export function selectAutoProfile(
|
||||
recommendedOllamaModel: string | null,
|
||||
): ProviderProfile {
|
||||
return recommendedOllamaModel ? 'ollama' : 'openai'
|
||||
}
|
||||
|
||||
export async function buildLaunchEnv(options: {
|
||||
profile: ProviderProfile
|
||||
persisted: ProfileFile | null
|
||||
goal: RecommendationGoal
|
||||
processEnv?: NodeJS.ProcessEnv
|
||||
getOllamaChatBaseUrl?: (baseUrl?: string) => string
|
||||
resolveOllamaDefaultModel?: (goal: RecommendationGoal) => Promise<string>
|
||||
}): Promise<NodeJS.ProcessEnv> {
|
||||
const processEnv = options.processEnv ?? process.env
|
||||
const persistedEnv =
|
||||
options.persisted?.profile === options.profile
|
||||
? options.persisted.env ?? {}
|
||||
: {}
|
||||
|
||||
const shellGeminiKey = sanitizeApiKey(
|
||||
processEnv.GEMINI_API_KEY ?? processEnv.GOOGLE_API_KEY,
|
||||
)
|
||||
const persistedGeminiKey = sanitizeApiKey(persistedEnv.GEMINI_API_KEY)
|
||||
|
||||
if (options.profile === 'gemini') {
|
||||
const env: NodeJS.ProcessEnv = {
|
||||
...processEnv,
|
||||
CLAUDE_CODE_USE_GEMINI: '1',
|
||||
}
|
||||
|
||||
delete env.CLAUDE_CODE_USE_OPENAI
|
||||
|
||||
env.GEMINI_MODEL =
|
||||
processEnv.GEMINI_MODEL ||
|
||||
persistedEnv.GEMINI_MODEL ||
|
||||
DEFAULT_GEMINI_MODEL
|
||||
env.GEMINI_BASE_URL =
|
||||
processEnv.GEMINI_BASE_URL ||
|
||||
persistedEnv.GEMINI_BASE_URL ||
|
||||
DEFAULT_GEMINI_BASE_URL
|
||||
|
||||
const geminiKey = shellGeminiKey || persistedGeminiKey
|
||||
if (geminiKey) {
|
||||
env.GEMINI_API_KEY = geminiKey
|
||||
} else {
|
||||
delete env.GEMINI_API_KEY
|
||||
}
|
||||
|
||||
delete env.GOOGLE_API_KEY
|
||||
delete env.OPENAI_BASE_URL
|
||||
delete env.OPENAI_MODEL
|
||||
delete env.OPENAI_API_KEY
|
||||
delete env.CODEX_API_KEY
|
||||
delete env.CHATGPT_ACCOUNT_ID
|
||||
delete env.CODEX_ACCOUNT_ID
|
||||
|
||||
return env
|
||||
}
|
||||
|
||||
const env: NodeJS.ProcessEnv = {
|
||||
...processEnv,
|
||||
CLAUDE_CODE_USE_OPENAI: '1',
|
||||
}
|
||||
|
||||
delete env.CLAUDE_CODE_USE_GEMINI
|
||||
delete env.GEMINI_API_KEY
|
||||
delete env.GEMINI_MODEL
|
||||
delete env.GEMINI_BASE_URL
|
||||
delete env.GOOGLE_API_KEY
|
||||
|
||||
if (options.profile === 'ollama') {
|
||||
const getOllamaBaseUrl =
|
||||
options.getOllamaChatBaseUrl ?? (() => 'http://localhost:11434/v1')
|
||||
const resolveOllamaModel =
|
||||
options.resolveOllamaDefaultModel ?? (async () => 'llama3.1:8b')
|
||||
|
||||
env.OPENAI_BASE_URL = persistedEnv.OPENAI_BASE_URL || getOllamaBaseUrl()
|
||||
env.OPENAI_MODEL =
|
||||
persistedEnv.OPENAI_MODEL ||
|
||||
(await resolveOllamaModel(options.goal))
|
||||
|
||||
delete env.OPENAI_API_KEY
|
||||
delete env.CODEX_API_KEY
|
||||
delete env.CHATGPT_ACCOUNT_ID
|
||||
delete env.CODEX_ACCOUNT_ID
|
||||
|
||||
return env
|
||||
}
|
||||
|
||||
if (options.profile === 'codex') {
|
||||
env.OPENAI_BASE_URL =
|
||||
persistedEnv.OPENAI_BASE_URL && isCodexBaseUrl(persistedEnv.OPENAI_BASE_URL)
|
||||
? persistedEnv.OPENAI_BASE_URL
|
||||
: DEFAULT_CODEX_BASE_URL
|
||||
env.OPENAI_MODEL = persistedEnv.OPENAI_MODEL || 'codexplan'
|
||||
delete env.OPENAI_API_KEY
|
||||
|
||||
const codexKey =
|
||||
sanitizeApiKey(processEnv.CODEX_API_KEY) ||
|
||||
sanitizeApiKey(persistedEnv.CODEX_API_KEY)
|
||||
const liveCodexCredentials = resolveCodexApiCredentials(processEnv)
|
||||
const codexAccountId =
|
||||
processEnv.CHATGPT_ACCOUNT_ID ||
|
||||
processEnv.CODEX_ACCOUNT_ID ||
|
||||
liveCodexCredentials.accountId ||
|
||||
persistedEnv.CHATGPT_ACCOUNT_ID ||
|
||||
persistedEnv.CODEX_ACCOUNT_ID
|
||||
if (codexKey) {
|
||||
env.CODEX_API_KEY = codexKey
|
||||
} else {
|
||||
delete env.CODEX_API_KEY
|
||||
}
|
||||
|
||||
if (codexAccountId) {
|
||||
env.CHATGPT_ACCOUNT_ID = codexAccountId
|
||||
} else {
|
||||
delete env.CHATGPT_ACCOUNT_ID
|
||||
}
|
||||
delete env.CODEX_ACCOUNT_ID
|
||||
|
||||
return env
|
||||
}
|
||||
|
||||
const defaultOpenAIModel = getGoalDefaultOpenAIModel(options.goal)
|
||||
const shellOpenAIRequest = resolveProviderRequest({
|
||||
model: processEnv.OPENAI_MODEL,
|
||||
baseUrl: processEnv.OPENAI_BASE_URL,
|
||||
fallbackModel: defaultOpenAIModel,
|
||||
})
|
||||
const persistedOpenAIRequest = resolveProviderRequest({
|
||||
model: persistedEnv.OPENAI_MODEL,
|
||||
baseUrl: persistedEnv.OPENAI_BASE_URL,
|
||||
fallbackModel: defaultOpenAIModel,
|
||||
})
|
||||
const useShellOpenAIConfig = shellOpenAIRequest.transport === 'chat_completions'
|
||||
const usePersistedOpenAIConfig =
|
||||
(!persistedEnv.OPENAI_MODEL && !persistedEnv.OPENAI_BASE_URL) ||
|
||||
persistedOpenAIRequest.transport === 'chat_completions'
|
||||
|
||||
env.OPENAI_BASE_URL =
|
||||
(useShellOpenAIConfig ? processEnv.OPENAI_BASE_URL : undefined) ||
|
||||
(usePersistedOpenAIConfig ? persistedEnv.OPENAI_BASE_URL : undefined) ||
|
||||
DEFAULT_OPENAI_BASE_URL
|
||||
env.OPENAI_MODEL =
|
||||
(useShellOpenAIConfig ? processEnv.OPENAI_MODEL : undefined) ||
|
||||
(usePersistedOpenAIConfig ? persistedEnv.OPENAI_MODEL : undefined) ||
|
||||
defaultOpenAIModel
|
||||
env.OPENAI_API_KEY = processEnv.OPENAI_API_KEY || persistedEnv.OPENAI_API_KEY
|
||||
delete env.CODEX_API_KEY
|
||||
delete env.CHATGPT_ACCOUNT_ID
|
||||
delete env.CODEX_ACCOUNT_ID
|
||||
return env
|
||||
}
|
||||
194
src/utils/providerRecommendation.test.ts
Normal file
194
src/utils/providerRecommendation.test.ts
Normal file
@@ -0,0 +1,194 @@
|
||||
import assert from 'node:assert/strict'
|
||||
import test from 'node:test'
|
||||
|
||||
import {
|
||||
applyBenchmarkLatency,
|
||||
getGoalDefaultOpenAIModel,
|
||||
normalizeRecommendationGoal,
|
||||
rankOllamaModels,
|
||||
recommendOllamaModel,
|
||||
type OllamaModelDescriptor,
|
||||
} from './providerRecommendation.ts'
|
||||
|
||||
function model(
|
||||
name: string,
|
||||
overrides: Partial<OllamaModelDescriptor> = {},
|
||||
): OllamaModelDescriptor {
|
||||
return {
|
||||
name,
|
||||
sizeBytes: null,
|
||||
family: null,
|
||||
families: [],
|
||||
parameterSize: null,
|
||||
quantizationLevel: null,
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
test('normalizes recommendation goals safely', () => {
|
||||
assert.equal(normalizeRecommendationGoal('coding'), 'coding')
|
||||
assert.equal(normalizeRecommendationGoal(' LATENCY '), 'latency')
|
||||
assert.equal(normalizeRecommendationGoal('weird'), 'balanced')
|
||||
assert.equal(normalizeRecommendationGoal(undefined), 'balanced')
|
||||
})
|
||||
|
||||
test('coding goal prefers coding-oriented ollama models', () => {
|
||||
const recommended = recommendOllamaModel(
|
||||
[
|
||||
model('llama3.1:8b', {
|
||||
parameterSize: '8B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
model('qwen2.5-coder:7b', {
|
||||
parameterSize: '7B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
],
|
||||
'coding',
|
||||
)
|
||||
|
||||
assert.equal(recommended?.name, 'qwen2.5-coder:7b')
|
||||
})
|
||||
|
||||
test('latency goal prefers smaller models', () => {
|
||||
const recommended = recommendOllamaModel(
|
||||
[
|
||||
model('llama3.1:70b', {
|
||||
parameterSize: '70B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
model('llama3.2:3b', {
|
||||
parameterSize: '3B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
],
|
||||
'latency',
|
||||
)
|
||||
|
||||
assert.equal(recommended?.name, 'llama3.2:3b')
|
||||
})
|
||||
|
||||
test('non-chat embedding models are heavily demoted', () => {
|
||||
const ranked = rankOllamaModels(
|
||||
[
|
||||
model('nomic-embed-text', { parameterSize: '0.5B' }),
|
||||
model('mistral:7b-instruct', {
|
||||
parameterSize: '7B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
],
|
||||
'balanced',
|
||||
)
|
||||
|
||||
assert.equal(ranked[0]?.name, 'mistral:7b-instruct')
|
||||
})
|
||||
|
||||
test('auto-pick ignores non-chat ollama models', () => {
|
||||
const recommended = recommendOllamaModel(
|
||||
[
|
||||
model('nomic-embed-text', { parameterSize: '0.5B' }),
|
||||
model('bge-reranker-v2', { parameterSize: '1.5B' }),
|
||||
model('whisper-large-v3', { parameterSize: '1.6B' }),
|
||||
],
|
||||
'balanced',
|
||||
)
|
||||
|
||||
assert.equal(recommended, null)
|
||||
})
|
||||
|
||||
test('benchmark latency can reorder close recommendations', () => {
|
||||
const ranked = rankOllamaModels(
|
||||
[
|
||||
model('llama3.1:8b', {
|
||||
parameterSize: '8B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
model('mistral:7b-instruct', {
|
||||
parameterSize: '7B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
],
|
||||
'latency',
|
||||
)
|
||||
|
||||
const benchmarked = applyBenchmarkLatency(
|
||||
ranked,
|
||||
{
|
||||
'llama3.1:8b': 2000,
|
||||
'mistral:7b-instruct': 350,
|
||||
},
|
||||
'latency',
|
||||
)
|
||||
|
||||
assert.equal(benchmarked[0]?.name, 'mistral:7b-instruct')
|
||||
assert.equal(benchmarked[0]?.benchmarkMs, 350)
|
||||
})
|
||||
|
||||
test('unbenchmarked models stay behind benchmarked candidates', () => {
|
||||
const ranked = rankOllamaModels(
|
||||
[
|
||||
model('phi4-mini:4b', {
|
||||
parameterSize: '4B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
model('mistral:7b-instruct', {
|
||||
parameterSize: '7B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
model('llama3.1:8b', {
|
||||
parameterSize: '8B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
model('qwen2.5:14b', {
|
||||
parameterSize: '14B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
],
|
||||
'latency',
|
||||
)
|
||||
|
||||
const benchmarked = applyBenchmarkLatency(
|
||||
ranked,
|
||||
{
|
||||
'phi4-mini:4b': 2400,
|
||||
'mistral:7b-instruct': 2200,
|
||||
'llama3.1:8b': 2100,
|
||||
},
|
||||
'latency',
|
||||
)
|
||||
|
||||
assert.ok(benchmarked.slice(0, 3).every(item => item.benchmarkMs !== null))
|
||||
assert.equal(benchmarked[3]?.name, 'qwen2.5:14b')
|
||||
assert.equal(benchmarked[3]?.benchmarkMs, null)
|
||||
})
|
||||
|
||||
test('coding goal recognizes codestral and devstral families', () => {
|
||||
const ranked = rankOllamaModels(
|
||||
[
|
||||
model('mistral:7b-instruct', {
|
||||
parameterSize: '7B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
model('codestral:22b', {
|
||||
parameterSize: '22B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
model('devstral:24b', {
|
||||
parameterSize: '24B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
],
|
||||
'coding',
|
||||
)
|
||||
|
||||
assert.deepEqual(ranked.slice(0, 2).map(item => item.name), [
|
||||
'devstral:24b',
|
||||
'codestral:22b',
|
||||
])
|
||||
})
|
||||
|
||||
test('goal defaults choose sensible openai models', () => {
|
||||
assert.equal(getGoalDefaultOpenAIModel('latency'), 'gpt-4o-mini')
|
||||
assert.equal(getGoalDefaultOpenAIModel('balanced'), 'gpt-4o')
|
||||
assert.equal(getGoalDefaultOpenAIModel('coding'), 'gpt-4o')
|
||||
})
|
||||
317
src/utils/providerRecommendation.ts
Normal file
317
src/utils/providerRecommendation.ts
Normal file
@@ -0,0 +1,317 @@
|
||||
export type RecommendationGoal = 'latency' | 'balanced' | 'coding'
|
||||
|
||||
export type OllamaModelDescriptor = {
|
||||
name: string
|
||||
sizeBytes?: number | null
|
||||
family?: string | null
|
||||
families?: string[]
|
||||
parameterSize?: string | null
|
||||
quantizationLevel?: string | null
|
||||
}
|
||||
|
||||
export type RankedOllamaModel = OllamaModelDescriptor & {
|
||||
score: number
|
||||
reasons: string[]
|
||||
summary: string
|
||||
}
|
||||
|
||||
export type BenchmarkedOllamaModel = RankedOllamaModel & {
|
||||
benchmarkMs: number | null
|
||||
}
|
||||
|
||||
const CODING_HINTS = [
|
||||
'coder',
|
||||
'codellama',
|
||||
'codegemma',
|
||||
'codestral',
|
||||
'devstral',
|
||||
'starcoder',
|
||||
'deepseek-coder',
|
||||
'qwen2.5-coder',
|
||||
'qwen-coder',
|
||||
]
|
||||
|
||||
const GENERAL_HINTS = [
|
||||
'llama',
|
||||
'qwen',
|
||||
'mistral',
|
||||
'gemma',
|
||||
'phi',
|
||||
'deepseek',
|
||||
]
|
||||
|
||||
const INSTRUCT_HINTS = ['instruct', 'chat', 'assistant']
|
||||
const NON_CHAT_HINTS = ['embed', 'embedding', 'rerank', 'bge', 'whisper']
|
||||
|
||||
function modelHaystack(model: OllamaModelDescriptor): string {
|
||||
return [
|
||||
model.name,
|
||||
model.family ?? '',
|
||||
...(model.families ?? []),
|
||||
model.parameterSize ?? '',
|
||||
model.quantizationLevel ?? '',
|
||||
]
|
||||
.join(' ')
|
||||
.toLowerCase()
|
||||
}
|
||||
|
||||
function includesAny(text: string, needles: string[]): boolean {
|
||||
return needles.some(needle => text.includes(needle))
|
||||
}
|
||||
|
||||
export function isViableOllamaChatModel(model: OllamaModelDescriptor): boolean {
|
||||
return !includesAny(modelHaystack(model), NON_CHAT_HINTS)
|
||||
}
|
||||
|
||||
export function selectRecommendedOllamaModel<
|
||||
T extends OllamaModelDescriptor,
|
||||
>(models: T[]): T | null {
|
||||
return models.find(isViableOllamaChatModel) ?? null
|
||||
}
|
||||
|
||||
function inferParameterBillions(model: OllamaModelDescriptor): number | null {
|
||||
const text = `${model.parameterSize ?? ''} ${model.name}`.toLowerCase()
|
||||
const match = text.match(/(\d+(?:\.\d+)?)\s*b\b/)
|
||||
if (match?.[1]) {
|
||||
return Number(match[1])
|
||||
}
|
||||
if (typeof model.sizeBytes === 'number' && model.sizeBytes > 0) {
|
||||
return Number((model.sizeBytes / 1_000_000_000).toFixed(1))
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function quantizationBucket(model: OllamaModelDescriptor): string {
|
||||
return (model.quantizationLevel ?? model.name).toLowerCase()
|
||||
}
|
||||
|
||||
function scoreSizeTier(
|
||||
paramsB: number | null,
|
||||
goal: RecommendationGoal,
|
||||
reasons: string[],
|
||||
): number {
|
||||
if (paramsB === null) {
|
||||
reasons.push('unknown size')
|
||||
return 0
|
||||
}
|
||||
|
||||
if (goal === 'latency') {
|
||||
if (paramsB <= 4) {
|
||||
reasons.push('tiny model for low latency')
|
||||
return 32
|
||||
}
|
||||
if (paramsB <= 8) {
|
||||
reasons.push('small model for fast responses')
|
||||
return 26
|
||||
}
|
||||
if (paramsB <= 14) {
|
||||
reasons.push('mid-sized model with acceptable latency')
|
||||
return 16
|
||||
}
|
||||
if (paramsB <= 24) {
|
||||
reasons.push('larger model may be slower')
|
||||
return 8
|
||||
}
|
||||
reasons.push('large model likely slower locally')
|
||||
return paramsB <= 40 ? 0 : -8
|
||||
}
|
||||
|
||||
if (goal === 'coding') {
|
||||
if (paramsB >= 7 && paramsB <= 14) {
|
||||
reasons.push('strong coding size tier')
|
||||
return 24
|
||||
}
|
||||
if (paramsB > 14 && paramsB <= 34) {
|
||||
reasons.push('large coding-capable size tier')
|
||||
return 28
|
||||
}
|
||||
if (paramsB > 34) {
|
||||
reasons.push('very large model with higher quality potential')
|
||||
return 18
|
||||
}
|
||||
reasons.push('compact model may trade off coding depth')
|
||||
return 12
|
||||
}
|
||||
|
||||
if (paramsB >= 7 && paramsB <= 14) {
|
||||
reasons.push('great balanced size tier')
|
||||
return 26
|
||||
}
|
||||
if (paramsB >= 3 && paramsB < 7) {
|
||||
reasons.push('compact balanced size tier')
|
||||
return 18
|
||||
}
|
||||
if (paramsB > 14 && paramsB <= 24) {
|
||||
reasons.push('high quality balanced size tier')
|
||||
return 20
|
||||
}
|
||||
if (paramsB > 24) {
|
||||
reasons.push('large model for quality-first usage')
|
||||
return 10
|
||||
}
|
||||
reasons.push('very small model for general usage')
|
||||
return 8
|
||||
}
|
||||
|
||||
function scoreQuantization(
|
||||
model: OllamaModelDescriptor,
|
||||
goal: RecommendationGoal,
|
||||
reasons: string[],
|
||||
): number {
|
||||
const quant = quantizationBucket(model)
|
||||
if (quant.includes('q4')) {
|
||||
reasons.push('efficient Q4 quantization')
|
||||
return goal === 'latency' ? 8 : 4
|
||||
}
|
||||
if (quant.includes('q5')) {
|
||||
reasons.push('balanced Q5 quantization')
|
||||
return goal === 'latency' ? 6 : 5
|
||||
}
|
||||
if (quant.includes('q8')) {
|
||||
reasons.push('higher quality Q8 quantization')
|
||||
return goal === 'latency' ? 2 : 5
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
function compareRankedModels(
|
||||
a: RankedOllamaModel | BenchmarkedOllamaModel,
|
||||
b: RankedOllamaModel | BenchmarkedOllamaModel,
|
||||
goal: RecommendationGoal,
|
||||
): number {
|
||||
if (b.score !== a.score) {
|
||||
return b.score - a.score
|
||||
}
|
||||
|
||||
const aSize = inferParameterBillions(a) ?? Number.POSITIVE_INFINITY
|
||||
const bSize = inferParameterBillions(b) ?? Number.POSITIVE_INFINITY
|
||||
|
||||
if (goal === 'latency') {
|
||||
return aSize - bSize
|
||||
}
|
||||
|
||||
if (goal === 'coding') {
|
||||
return bSize - aSize
|
||||
}
|
||||
|
||||
const target = 14
|
||||
return Math.abs(aSize - target) - Math.abs(bSize - target)
|
||||
}
|
||||
|
||||
export function normalizeRecommendationGoal(
|
||||
goal: string | null | undefined,
|
||||
): RecommendationGoal {
|
||||
const normalized = goal?.trim().toLowerCase()
|
||||
if (
|
||||
normalized === 'latency' ||
|
||||
normalized === 'balanced' ||
|
||||
normalized === 'coding'
|
||||
) {
|
||||
return normalized
|
||||
}
|
||||
return 'balanced'
|
||||
}
|
||||
|
||||
export function getGoalDefaultOpenAIModel(goal: RecommendationGoal): string {
|
||||
switch (goal) {
|
||||
case 'latency':
|
||||
return 'gpt-4o-mini'
|
||||
case 'coding':
|
||||
return 'gpt-4o'
|
||||
case 'balanced':
|
||||
default:
|
||||
return 'gpt-4o'
|
||||
}
|
||||
}
|
||||
|
||||
export function rankOllamaModels(
|
||||
models: OllamaModelDescriptor[],
|
||||
goal: RecommendationGoal,
|
||||
): RankedOllamaModel[] {
|
||||
return models
|
||||
.map(model => {
|
||||
const haystack = modelHaystack(model)
|
||||
const reasons: string[] = []
|
||||
let score = 0
|
||||
|
||||
if (includesAny(haystack, NON_CHAT_HINTS)) {
|
||||
score -= 40
|
||||
reasons.push('not a chat-first model')
|
||||
}
|
||||
|
||||
if (includesAny(haystack, CODING_HINTS)) {
|
||||
score += goal === 'coding' ? 24 : goal === 'balanced' ? 10 : 4
|
||||
reasons.push('coding-oriented model family')
|
||||
}
|
||||
|
||||
if (includesAny(haystack, GENERAL_HINTS)) {
|
||||
score += goal === 'latency' ? 4 : goal === 'coding' ? 6 : 8
|
||||
reasons.push('strong general-purpose model family')
|
||||
}
|
||||
|
||||
if (includesAny(haystack, INSTRUCT_HINTS)) {
|
||||
score += goal === 'latency' ? 2 : 6
|
||||
reasons.push('chat/instruct tuned')
|
||||
}
|
||||
|
||||
if (haystack.includes('vision') || haystack.includes('vl')) {
|
||||
score -= 2
|
||||
reasons.push('vision model adds extra overhead')
|
||||
}
|
||||
|
||||
score += scoreSizeTier(inferParameterBillions(model), goal, reasons)
|
||||
score += scoreQuantization(model, goal, reasons)
|
||||
|
||||
const summary = reasons.slice(0, 3).join(', ')
|
||||
return {
|
||||
...model,
|
||||
score,
|
||||
reasons,
|
||||
summary,
|
||||
}
|
||||
})
|
||||
.sort((a, b) => compareRankedModels(a, b, goal))
|
||||
}
|
||||
|
||||
export function recommendOllamaModel(
|
||||
models: OllamaModelDescriptor[],
|
||||
goal: RecommendationGoal,
|
||||
): RankedOllamaModel | null {
|
||||
return selectRecommendedOllamaModel(rankOllamaModels(models, goal))
|
||||
}
|
||||
|
||||
export function applyBenchmarkLatency(
|
||||
models: RankedOllamaModel[],
|
||||
benchmarkMs: Record<string, number | null>,
|
||||
goal: RecommendationGoal,
|
||||
): BenchmarkedOllamaModel[] {
|
||||
const divisor =
|
||||
goal === 'latency' ? 120 : goal === 'coding' ? 500 : 240
|
||||
|
||||
const scoredModels = models
|
||||
.map(model => {
|
||||
const latency = benchmarkMs[model.name] ?? null
|
||||
const benchmarkPenalty = latency === null ? 0 : latency / divisor
|
||||
const reasons =
|
||||
latency === null
|
||||
? model.reasons
|
||||
: [`benchmarked at ${Math.round(latency)}ms`, ...model.reasons]
|
||||
|
||||
return {
|
||||
...model,
|
||||
benchmarkMs: latency,
|
||||
reasons,
|
||||
summary: reasons.slice(0, 3).join(', '),
|
||||
score: Number((model.score - benchmarkPenalty).toFixed(2)),
|
||||
}
|
||||
})
|
||||
|
||||
const benchmarkedModels = scoredModels.filter(model => model.benchmarkMs !== null)
|
||||
if (benchmarkedModels.length === 0) {
|
||||
return scoredModels.sort((a, b) => compareRankedModels(a, b, goal))
|
||||
}
|
||||
|
||||
const unbenchmarkedModels = scoredModels.filter(model => model.benchmarkMs === null)
|
||||
benchmarkedModels.sort((a, b) => compareRankedModels(a, b, goal))
|
||||
return [...benchmarkedModels, ...unbenchmarkedModels]
|
||||
}
|
||||
Reference in New Issue
Block a user