diff --git a/PLAYBOOK.md b/PLAYBOOK.md index 662ee4dc..076c377f 100644 --- a/PLAYBOOK.md +++ b/PLAYBOOK.md @@ -37,6 +37,18 @@ If everything is healthy, OpenClaude starts directly. bun run profile:init -- --provider ollama --model llama3.1:8b ``` +Or let OpenClaude recommend the best local model for your goal: + +```powershell +bun run profile:init -- --provider ollama --goal coding +``` + +Preview recommendations before saving: + +```powershell +bun run profile:recommend -- --goal coding --benchmark +``` + ### 3.2 Confirm profile file ```powershell @@ -171,6 +183,12 @@ Fix: bun run profile:init -- --provider ollama --model llama3.1:8b ``` +Or pick a local Ollama profile automatically by goal: + +```powershell +bun run profile:init -- --provider ollama --goal balanced +``` + ## 6.5 Placeholder key (`SUA_CHAVE`) error Cause: @@ -202,6 +220,16 @@ bun run profile:fast # llama3.2:3b bun run profile:code # qwen2.5-coder:7b ``` +Goal-based local auto-selection: + +```powershell +bun run profile:init -- --provider ollama --goal latency +bun run profile:init -- --provider ollama --goal balanced +bun run profile:init -- --provider ollama --goal coding +``` + +`profile:auto` is a best-available provider picker, not a local-only command. Use `--provider ollama` when you want to stay on a local model. + ## 8. Practical Prompt Playbook (Copy/Paste) ## 8.1 Code understanding diff --git a/README.md b/README.md index 74bc111e..8a0690f9 100644 --- a/README.md +++ b/README.md @@ -209,7 +209,7 @@ bun run doctor:runtime:json # persist a diagnostics report to reports/doctor-runtime.json bun run doctor:report -# full local hardening check (typecheck + smoke + runtime doctor) +# full local hardening check (smoke + runtime doctor) bun run hardening:check # strict hardening (includes project-wide typecheck) @@ -226,9 +226,15 @@ Notes: Use profile launchers to avoid repeated environment setup: ```bash -# one-time profile bootstrap (auto-detect ollama, otherwise openai) +# one-time profile bootstrap (prefer viable local Ollama, otherwise OpenAI) bun run profile:init +# preview the best provider/model for your goal +bun run profile:recommend -- --goal coding --benchmark + +# auto-apply the best available local/openai provider/model for your goal +bun run profile:auto -- --goal latency + # codex bootstrap (defaults to codexplan and ~/.codex/auth.json) bun run profile:codex @@ -238,6 +244,9 @@ bun run profile:init -- --provider openai --api-key sk-... # ollama bootstrap with custom model bun run profile:init -- --provider ollama --model llama3.1:8b +# ollama bootstrap with intelligent model auto-selection +bun run profile:init -- --provider ollama --goal coding + # codex bootstrap with a fast model alias bun run profile:init -- --provider codex --model codexspark @@ -254,6 +263,14 @@ bun run dev:openai bun run dev:ollama ``` +`profile:recommend` ranks installed Ollama models for `latency`, `balanced`, or `coding`, and `profile:auto` can persist the recommendation directly. +If no profile exists yet, `dev:profile` now uses the same goal-aware defaults when picking the initial model. + +Use `--provider ollama` when you want a local-only path. Auto mode falls back to OpenAI when no viable local chat model is installed. +Goal-based Ollama selection only recommends among models that are already installed and reachable from Ollama. + +Use `profile:codex` or `--provider codex` when you want the ChatGPT Codex backend. + `dev:openai`, `dev:ollama`, and `dev:codex` run `doctor:runtime` first and only launch the app if checks pass. For `dev:ollama`, make sure Ollama is running locally before launch. diff --git a/package.json b/package.json index 688d3697..7c595fbe 100644 --- a/package.json +++ b/package.json @@ -22,12 +22,15 @@ "dev:ollama": "bun run scripts/provider-launch.ts ollama", "dev:ollama:fast": "bun run scripts/provider-launch.ts ollama --fast --bare", "profile:init": "bun run scripts/provider-bootstrap.ts", + "profile:recommend": "bun run scripts/provider-recommend.ts", + "profile:auto": "bun run scripts/provider-recommend.ts --apply", "profile:codex": "bun run profile:init -- --provider codex --model codexplan", "profile:fast": "bun run profile:init -- --provider ollama --model llama3.2:3b", "profile:code": "bun run profile:init -- --provider ollama --model qwen2.5-coder:7b", "dev:fast": "bun run profile:fast && bun run dev:ollama:fast", "dev:code": "bun run profile:code && bun run dev:profile", "start": "node dist/cli.mjs", + "test:provider-recommendation": "node --test --experimental-strip-types src/utils/providerRecommendation.test.ts src/utils/providerProfile.test.ts", "typecheck": "tsc --noEmit", "smoke": "bun run build && node dist/cli.mjs --version", "test:provider": "bun test src/services/api/*.test.ts", diff --git a/scripts/provider-bootstrap.ts b/scripts/provider-bootstrap.ts index 7e5d1f66..82ebbbb6 100644 --- a/scripts/provider-bootstrap.ts +++ b/scripts/provider-bootstrap.ts @@ -2,25 +2,28 @@ import { writeFileSync } from 'node:fs' import { resolve } from 'node:path' import { - DEFAULT_CODEX_BASE_URL, resolveCodexApiCredentials, } from '../src/services/api/providerConfig.js' - -type ProviderProfile = 'openai' | 'ollama' | 'codex' | 'gemini' - -type ProfileFile = { - profile: ProviderProfile - env: { - OPENAI_BASE_URL?: string - OPENAI_MODEL?: string - OPENAI_API_KEY?: string - CODEX_API_KEY?: string - GEMINI_API_KEY?: string - GEMINI_MODEL?: string - GEMINI_BASE_URL?: string - } - createdAt: string -} +import { + getGoalDefaultOpenAIModel, + normalizeRecommendationGoal, + recommendOllamaModel, +} from '../src/utils/providerRecommendation.ts' +import { + buildCodexProfileEnv, + buildGeminiProfileEnv, + buildOllamaProfileEnv, + buildOpenAIProfileEnv, + createProfileFile, + selectAutoProfile, + type ProfileFile, + type ProviderProfile, +} from '../src/utils/providerProfile.ts' +import { + getOllamaChatBaseUrl, + hasLocalOllama, + listOllamaModels, +} from './provider-discovery.ts' function parseArg(name: string): string | null { const args = process.argv.slice(2) @@ -35,27 +38,16 @@ function parseProviderArg(): ProviderProfile | 'auto' { return 'auto' } -async function hasLocalOllama(): Promise { - const endpoint = 'http://localhost:11434/api/tags' - const controller = new AbortController() - const timeout = setTimeout(() => controller.abort(), 1200) +async function resolveOllamaModel( + argModel: string | null, + argBaseUrl: string | null, + goal: ReturnType, +): Promise { + if (argModel) return argModel - try { - const response = await fetch(endpoint, { - method: 'GET', - signal: controller.signal, - }) - return response.ok - } catch { - return false - } finally { - clearTimeout(timeout) - } -} - -function sanitizeApiKey(key: string | null): string | undefined { - if (!key || key === 'SUA_CHAVE') return undefined - return key + const discovered = await listOllamaModels(argBaseUrl || undefined) + const recommended = recommendOllamaModel(discovered, goal) + return recommended?.name ?? null } async function main(): Promise { @@ -63,69 +55,104 @@ async function main(): Promise { const argModel = parseArg('--model') const argBaseUrl = parseArg('--base-url') const argApiKey = parseArg('--api-key') + const goal = normalizeRecommendationGoal( + parseArg('--goal') || process.env.OPENCLAUDE_PROFILE_GOAL, + ) let selected: ProviderProfile + let resolvedOllamaModel: string | null = null if (provider === 'auto') { - selected = (await hasLocalOllama()) ? 'ollama' : 'openai' + if (await hasLocalOllama(argBaseUrl || undefined)) { + resolvedOllamaModel = await resolveOllamaModel(argModel, argBaseUrl, goal) + selected = selectAutoProfile(resolvedOllamaModel) + } else { + selected = 'openai' + } } else { selected = provider } - const env: ProfileFile['env'] = {} - + let env: ProfileFile['env'] if (selected === 'gemini') { - env.GEMINI_MODEL = argModel || process.env.GEMINI_MODEL || 'gemini-2.0-flash' - const key = sanitizeApiKey(argApiKey || process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY || null) - if (!key) { + const builtEnv = buildGeminiProfileEnv({ + model: argModel || null, + baseUrl: argBaseUrl || null, + apiKey: argApiKey || null, + processEnv: process.env, + }) + + if (!builtEnv) { console.error('Gemini profile requires an API key. Use --api-key or set GEMINI_API_KEY.') console.error('Get a free key at: https://aistudio.google.com/apikey') process.exit(1) } - env.GEMINI_API_KEY = key - if (argBaseUrl) env.GEMINI_BASE_URL = argBaseUrl + + env = builtEnv } else if (selected === 'ollama') { - env.OPENAI_BASE_URL = argBaseUrl || 'http://localhost:11434/v1' - env.OPENAI_MODEL = argModel || process.env.OPENAI_MODEL || 'llama3.1:8b' - const key = sanitizeApiKey(argApiKey || process.env.OPENAI_API_KEY || null) - if (key) env.OPENAI_API_KEY = key - } else if (selected === 'codex') { - env.OPENAI_BASE_URL = - argBaseUrl || process.env.OPENAI_BASE_URL || DEFAULT_CODEX_BASE_URL - env.OPENAI_MODEL = argModel || process.env.OPENAI_MODEL || 'codexplan' - const key = sanitizeApiKey(argApiKey || process.env.CODEX_API_KEY || null) - if (key) { - env.CODEX_API_KEY = key - } else { - const credentials = resolveCodexApiCredentials(process.env) - if (!credentials.apiKey) { - const authHint = credentials.authPath - ? ` or make sure ${credentials.authPath} exists` - : '' - console.error(`Codex profile requires CODEX_API_KEY${authHint}.`) - process.exit(1) - } + resolvedOllamaModel ??= await resolveOllamaModel(argModel, argBaseUrl, goal) + if (!resolvedOllamaModel) { + console.error('No viable Ollama chat model was discovered. Pull a chat model first or pass --model explicitly.') + process.exit(1) } + + env = buildOllamaProfileEnv( + resolvedOllamaModel, + { + baseUrl: argBaseUrl, + getOllamaChatBaseUrl, + }, + ) + } else if (selected === 'codex') { + const builtEnv = buildCodexProfileEnv({ + model: argModel, + baseUrl: argBaseUrl, + apiKey: argApiKey || process.env.CODEX_API_KEY || null, + processEnv: process.env, + }) + + if (!builtEnv) { + const credentials = resolveCodexApiCredentials( + argApiKey + ? { ...process.env, CODEX_API_KEY: argApiKey } + : process.env, + ) + const authHint = credentials.authPath + ? ` or make sure ${credentials.authPath} exists` + : '' + if (!credentials.apiKey) { + console.error(`Codex profile requires CODEX_API_KEY${authHint}.`) + } else { + console.error('Codex profile requires CHATGPT_ACCOUNT_ID or an auth.json that includes it.') + } + process.exit(1) + } + + env = builtEnv } else { - env.OPENAI_BASE_URL = argBaseUrl || process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1' - env.OPENAI_MODEL = argModel || process.env.OPENAI_MODEL || 'gpt-4o' - const key = sanitizeApiKey(argApiKey || process.env.OPENAI_API_KEY || null) - if (!key) { + const builtEnv = buildOpenAIProfileEnv({ + goal, + model: argModel || null, + baseUrl: argBaseUrl || null, + apiKey: argApiKey || process.env.OPENAI_API_KEY || null, + processEnv: process.env, + }) + + if (!builtEnv) { console.error('OpenAI profile requires a real API key. Use --api-key or set OPENAI_API_KEY.') process.exit(1) } - env.OPENAI_API_KEY = key + + env = builtEnv } - const profile: ProfileFile = { - profile: selected, - env, - createdAt: new Date().toISOString(), - } + const profile = createProfileFile(selected, env) const outputPath = resolve(process.cwd(), '.openclaude-profile.json') writeFileSync(outputPath, JSON.stringify(profile, null, 2), 'utf8') console.log(`Saved profile: ${selected}`) + console.log(`Goal: ${goal}`) + console.log(`Model: ${profile.env.GEMINI_MODEL || profile.env.OPENAI_MODEL || getGoalDefaultOpenAIModel(goal)}`) console.log(`Path: ${outputPath}`) console.log('Next: bun run dev:profile') } diff --git a/scripts/provider-discovery.ts b/scripts/provider-discovery.ts new file mode 100644 index 00000000..9e3aacda --- /dev/null +++ b/scripts/provider-discovery.ts @@ -0,0 +1,129 @@ +import type { OllamaModelDescriptor } from '../src/utils/providerRecommendation.ts' + +export const DEFAULT_OLLAMA_BASE_URL = 'http://localhost:11434' + +function withTimeoutSignal(timeoutMs: number): { + signal: AbortSignal + clear: () => void +} { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), timeoutMs) + return { + signal: controller.signal, + clear: () => clearTimeout(timeout), + } +} + +function trimTrailingSlash(value: string): string { + return value.replace(/\/+$/, '') +} + +export function getOllamaApiBaseUrl(baseUrl?: string): string { + const parsed = new URL( + baseUrl || process.env.OLLAMA_BASE_URL || DEFAULT_OLLAMA_BASE_URL, + ) + const pathname = trimTrailingSlash(parsed.pathname) + parsed.pathname = pathname.endsWith('/v1') + ? pathname.slice(0, -3) || '/' + : pathname || '/' + parsed.search = '' + parsed.hash = '' + return trimTrailingSlash(parsed.toString()) +} + +export function getOllamaChatBaseUrl(baseUrl?: string): string { + return `${getOllamaApiBaseUrl(baseUrl)}/v1` +} + +export async function hasLocalOllama(baseUrl?: string): Promise { + const { signal, clear } = withTimeoutSignal(1200) + try { + const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, { + method: 'GET', + signal, + }) + return response.ok + } catch { + return false + } finally { + clear() + } +} + +export async function listOllamaModels( + baseUrl?: string, +): Promise { + const { signal, clear } = withTimeoutSignal(5000) + try { + const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, { + method: 'GET', + signal, + }) + if (!response.ok) { + return [] + } + + const data = await response.json() as { + models?: Array<{ + name?: string + size?: number + details?: { + family?: string + families?: string[] + parameter_size?: string + quantization_level?: string + } + }> + } + + return (data.models ?? []) + .filter(model => Boolean(model.name)) + .map(model => ({ + name: model.name!, + sizeBytes: typeof model.size === 'number' ? model.size : null, + family: model.details?.family ?? null, + families: model.details?.families ?? [], + parameterSize: model.details?.parameter_size ?? null, + quantizationLevel: model.details?.quantization_level ?? null, + })) + } catch { + return [] + } finally { + clear() + } +} + +export async function benchmarkOllamaModel( + modelName: string, + baseUrl?: string, +): Promise { + const start = Date.now() + const { signal, clear } = withTimeoutSignal(20000) + try { + const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/chat`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + signal, + body: JSON.stringify({ + model: modelName, + stream: false, + messages: [{ role: 'user', content: 'Reply with OK.' }], + options: { + temperature: 0, + num_predict: 8, + }, + }), + }) + if (!response.ok) { + return null + } + await response.json() + return Date.now() - start + } catch { + return null + } finally { + clear() + } +} diff --git a/scripts/provider-launch.ts b/scripts/provider-launch.ts index 4594b63e..d8516be0 100644 --- a/scripts/provider-launch.ts +++ b/scripts/provider-launch.ts @@ -3,43 +3,51 @@ import { spawn } from 'node:child_process' import { existsSync, readFileSync } from 'node:fs' import { resolve } from 'node:path' import { - DEFAULT_CODEX_BASE_URL, resolveCodexApiCredentials, } from '../src/services/api/providerConfig.js' - -type ProviderProfile = 'openai' | 'ollama' | 'codex' | 'gemini' - -type ProfileFile = { - profile: ProviderProfile - env?: { - OPENAI_BASE_URL?: string - OPENAI_MODEL?: string - OPENAI_API_KEY?: string - CODEX_API_KEY?: string - GEMINI_API_KEY?: string - GEMINI_MODEL?: string - GEMINI_BASE_URL?: string - } -} +import { + normalizeRecommendationGoal, + recommendOllamaModel, +} from '../src/utils/providerRecommendation.ts' +import { + buildLaunchEnv, + selectAutoProfile, + type ProfileFile, + type ProviderProfile, +} from '../src/utils/providerProfile.ts' +import { + getOllamaChatBaseUrl, + hasLocalOllama, + listOllamaModels, +} from './provider-discovery.ts' type LaunchOptions = { requestedProfile: ProviderProfile | 'auto' | null passthroughArgs: string[] fast: boolean + goal: ReturnType } function parseLaunchOptions(argv: string[]): LaunchOptions { let requestedProfile: ProviderProfile | 'auto' | null = 'auto' const passthroughArgs: string[] = [] let fast = false + let goal = normalizeRecommendationGoal(process.env.OPENCLAUDE_PROFILE_GOAL) - for (const arg of argv) { + for (let i = 0; i < argv.length; i++) { + const arg = argv[i]! const lower = arg.toLowerCase() if (lower === '--fast') { fast = true continue } + if (lower === '--goal') { + goal = normalizeRecommendationGoal(argv[i + 1] ?? null) + i++ + continue + } + if ((lower === 'auto' || lower === 'openai' || lower === 'ollama' || lower === 'codex' || lower === 'gemini') && requestedProfile === 'auto') { requestedProfile = lower as ProviderProfile | 'auto' continue @@ -62,6 +70,7 @@ function parseLaunchOptions(argv: string[]): LaunchOptions { requestedProfile, passthroughArgs, fast, + goal, } } @@ -79,18 +88,12 @@ function loadPersistedProfile(): ProfileFile | null { } } -async function hasLocalOllama(): Promise { - const endpoint = 'http://localhost:11434/api/tags' - const controller = new AbortController() - const timeout = setTimeout(() => controller.abort(), 1200) - try { - const response = await fetch(endpoint, { signal: controller.signal }) - return response.ok - } catch { - return false - } finally { - clearTimeout(timeout) - } +async function resolveOllamaDefaultModel( + goal: ReturnType, +): Promise { + const models = await listOllamaModels() + const recommended = recommendOllamaModel(models, goal) + return recommended?.name ?? null } function runCommand(command: string, env: NodeJS.ProcessEnv): Promise { @@ -107,57 +110,6 @@ function runCommand(command: string, env: NodeJS.ProcessEnv): Promise { }) } -function buildEnv(profile: ProviderProfile, persisted: ProfileFile | null): NodeJS.ProcessEnv { - const persistedEnv = persisted?.env ?? {} - - if (profile === 'gemini') { - const env: NodeJS.ProcessEnv = { - ...process.env, - CLAUDE_CODE_USE_GEMINI: '1', - } - delete env.CLAUDE_CODE_USE_OPENAI - env.GEMINI_MODEL = process.env.GEMINI_MODEL || persistedEnv.GEMINI_MODEL || 'gemini-2.0-flash' - env.GEMINI_API_KEY = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY || persistedEnv.GEMINI_API_KEY - if (persistedEnv.GEMINI_BASE_URL || process.env.GEMINI_BASE_URL) { - env.GEMINI_BASE_URL = process.env.GEMINI_BASE_URL || persistedEnv.GEMINI_BASE_URL - } - return env - } - - const env: NodeJS.ProcessEnv = { - ...process.env, - CLAUDE_CODE_USE_OPENAI: '1', - } - - if (profile === 'ollama') { - env.OPENAI_BASE_URL = persistedEnv.OPENAI_BASE_URL || process.env.OPENAI_BASE_URL || 'http://localhost:11434/v1' - env.OPENAI_MODEL = persistedEnv.OPENAI_MODEL || process.env.OPENAI_MODEL || 'llama3.1:8b' - if (!process.env.OPENAI_API_KEY || process.env.OPENAI_API_KEY === 'SUA_CHAVE') { - delete env.OPENAI_API_KEY - } - return env - } - - if (profile === 'codex') { - env.OPENAI_BASE_URL = - process.env.OPENAI_BASE_URL || - persistedEnv.OPENAI_BASE_URL || - DEFAULT_CODEX_BASE_URL - env.OPENAI_MODEL = - process.env.OPENAI_MODEL || - persistedEnv.OPENAI_MODEL || - 'codexplan' - env.CODEX_API_KEY = - process.env.CODEX_API_KEY || persistedEnv.CODEX_API_KEY - return env - } - - env.OPENAI_BASE_URL = process.env.OPENAI_BASE_URL || persistedEnv.OPENAI_BASE_URL || 'https://api.openai.com/v1' - env.OPENAI_MODEL = process.env.OPENAI_MODEL || persistedEnv.OPENAI_MODEL || 'gpt-4o' - env.OPENAI_API_KEY = process.env.OPENAI_API_KEY || persistedEnv.OPENAI_API_KEY - return env -} - function applyFastFlags(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv { env.CLAUDE_CODE_SIMPLE ??= '1' env.CLAUDE_CODE_DISABLE_THINKING ??= '1' @@ -193,24 +145,45 @@ async function main(): Promise { const options = parseLaunchOptions(process.argv.slice(2)) const requestedProfile = options.requestedProfile if (!requestedProfile) { - console.error('Usage: bun run scripts/provider-launch.ts [openai|ollama|codex|gemini|auto] [--fast] [-- ]') + console.error('Usage: bun run scripts/provider-launch.ts [openai|ollama|codex|gemini|auto] [--fast] [--goal ] [-- ]') process.exit(1) } const persisted = loadPersistedProfile() let profile: ProviderProfile + let resolvedOllamaModel: string | null = null if (requestedProfile === 'auto') { if (persisted) { profile = persisted.profile + } else if (await hasLocalOllama()) { + resolvedOllamaModel = await resolveOllamaDefaultModel(options.goal) + profile = selectAutoProfile(resolvedOllamaModel) } else { - profile = (await hasLocalOllama()) ? 'ollama' : 'openai' + profile = 'openai' } } else { profile = requestedProfile } - const env = buildEnv(profile, persisted) + if ( + profile === 'ollama' && + (persisted?.profile !== 'ollama' || !persisted?.env?.OPENAI_MODEL) + ) { + resolvedOllamaModel ??= await resolveOllamaDefaultModel(options.goal) + if (!resolvedOllamaModel) { + console.error('No viable Ollama chat model was discovered. Pull a chat model first or save one with `bun run profile:init -- --provider ollama --model `.') + process.exit(1) + } + } + + const env = await buildLaunchEnv({ + profile, + persisted, + goal: options.goal, + getOllamaChatBaseUrl, + resolveOllamaDefaultModel: async () => resolvedOllamaModel || 'llama3.1:8b', + }) if (options.fast) { applyFastFlags(env) } @@ -234,6 +207,11 @@ async function main(): Promise { console.error(`CODEX_API_KEY is required for codex profile${authHint}. Run: bun run profile:init -- --provider codex --model codexplan`) process.exit(1) } + + if (!credentials.accountId) { + console.error('CHATGPT_ACCOUNT_ID is required for codex profile. Set CHATGPT_ACCOUNT_ID/CODEX_ACCOUNT_ID or use an auth.json that includes it.') + process.exit(1) + } } printSummary(profile, env) diff --git a/scripts/provider-recommend.ts b/scripts/provider-recommend.ts new file mode 100644 index 00000000..eca811e6 --- /dev/null +++ b/scripts/provider-recommend.ts @@ -0,0 +1,270 @@ +// @ts-nocheck +import { writeFileSync } from 'node:fs' +import { resolve } from 'node:path' + +import { + applyBenchmarkLatency, + getGoalDefaultOpenAIModel, + isViableOllamaChatModel, + normalizeRecommendationGoal, + rankOllamaModels, + selectRecommendedOllamaModel, + type BenchmarkedOllamaModel, + type RecommendationGoal, +} from '../src/utils/providerRecommendation.ts' +import { + buildOllamaProfileEnv, + buildOpenAIProfileEnv, + createProfileFile, + sanitizeApiKey, + type ProfileFile, + type ProviderProfile, +} from '../src/utils/providerProfile.ts' +import { + benchmarkOllamaModel, + getOllamaChatBaseUrl, + hasLocalOllama, + listOllamaModels, +} from './provider-discovery.ts' + +type CliOptions = { + apply: boolean + benchmark: boolean + goal: RecommendationGoal + json: boolean + provider: ProviderProfile | 'auto' + baseUrl: string | null +} + +function parseOptions(argv: string[]): CliOptions { + const options: CliOptions = { + apply: false, + benchmark: false, + goal: normalizeRecommendationGoal(process.env.OPENCLAUDE_PROFILE_GOAL), + json: false, + provider: 'auto', + baseUrl: null, + } + + for (let i = 0; i < argv.length; i++) { + const arg = argv[i]?.toLowerCase() + if (!arg) continue + + if (arg === '--apply') { + options.apply = true + continue + } + if (arg === '--benchmark') { + options.benchmark = true + continue + } + if (arg === '--json') { + options.json = true + continue + } + if (arg === '--goal') { + options.goal = normalizeRecommendationGoal(argv[i + 1] ?? null) + i++ + continue + } + if (arg === '--provider') { + const provider = argv[i + 1]?.toLowerCase() + if ( + provider === 'openai' || + provider === 'ollama' || + provider === 'auto' + ) { + options.provider = provider + } + i++ + continue + } + if (arg === '--base-url') { + options.baseUrl = argv[i + 1] ?? null + i++ + } + } + + return options +} + +function printHumanSummary(payload: { + goal: RecommendationGoal + recommendedProfile: ProviderProfile + recommendedModel: string + rankedModels: BenchmarkedOllamaModel[] + benchmarked: boolean + applied: boolean +}): void { + console.log(`Recommendation goal: ${payload.goal}`) + console.log(`Recommended profile: ${payload.recommendedProfile}`) + console.log(`Recommended model: ${payload.recommendedModel}`) + + if (payload.rankedModels.length > 0) { + console.log('\nRanked Ollama models:') + for (const [index, model] of payload.rankedModels.slice(0, 5).entries()) { + const benchmarkPart = + payload.benchmarked && model.benchmarkMs !== null + ? ` | ${Math.round(model.benchmarkMs)}ms` + : '' + console.log( + `${index + 1}. ${model.name} | score=${model.score}${benchmarkPart} | ${model.summary}`, + ) + } + } + + if (payload.applied) { + console.log('\nSaved .openclaude-profile.json with the recommended profile.') + console.log('Next: bun run dev:profile') + } else { + console.log( + '\nTip: run `bun run profile:auto -- --goal ' + + payload.goal + + '` to apply this automatically.', + ) + } +} + +async function maybeApplyProfile( + profile: ProviderProfile, + model: string, + goal: RecommendationGoal, + baseUrl: string | null, +): Promise { + let env: ProfileFile['env'] | null + if (profile === 'ollama') { + env = buildOllamaProfileEnv(model, { + baseUrl, + getOllamaChatBaseUrl, + }) + } else { + env = buildOpenAIProfileEnv({ + goal, + model: model || getGoalDefaultOpenAIModel(goal), + apiKey: process.env.OPENAI_API_KEY, + processEnv: process.env, + }) + + if (!env) { + console.error('Cannot apply an OpenAI profile without OPENAI_API_KEY.') + return false + } + } + + const profileFile = createProfileFile(profile, env) + + writeFileSync( + resolve(process.cwd(), '.openclaude-profile.json'), + JSON.stringify(profileFile, null, 2), + 'utf8', + ) + return true +} + +async function main(): Promise { + const options = parseOptions(process.argv.slice(2)) + const ollamaAvailable = + options.provider !== 'openai' && + (await hasLocalOllama(options.baseUrl ?? undefined)) + const ollamaModels = ollamaAvailable + ? await listOllamaModels(options.baseUrl ?? undefined) + : [] + + const heuristicRanked = rankOllamaModels(ollamaModels, options.goal) + const benchmarkInput = options.benchmark + ? heuristicRanked.filter(isViableOllamaChatModel).slice(0, 3) + : [] + + const benchmarkResults: Record = {} + for (const model of benchmarkInput) { + benchmarkResults[model.name] = await benchmarkOllamaModel( + model.name, + options.baseUrl ?? undefined, + ) + } + + const rankedModels: BenchmarkedOllamaModel[] = options.benchmark + ? applyBenchmarkLatency(heuristicRanked, benchmarkResults, options.goal) + : heuristicRanked.map(model => ({ + ...model, + benchmarkMs: null, + })) + + const recommendedOllama = selectRecommendedOllamaModel(rankedModels) + const openAIConfigured = Boolean(sanitizeApiKey(process.env.OPENAI_API_KEY)) + + let recommendedProfile: ProviderProfile + let recommendedModel: string + + if (options.provider === 'openai') { + recommendedProfile = 'openai' + recommendedModel = getGoalDefaultOpenAIModel(options.goal) + } else if (options.provider === 'ollama') { + if (!recommendedOllama) { + console.error( + 'No Ollama models were discovered. Pull a model first or switch to --provider openai.', + ) + process.exit(1) + } + recommendedProfile = 'ollama' + recommendedModel = recommendedOllama.name + } else if (recommendedOllama) { + recommendedProfile = 'ollama' + recommendedModel = recommendedOllama.name + } else { + recommendedProfile = 'openai' + recommendedModel = getGoalDefaultOpenAIModel(options.goal) + } + + let applied = false + if (options.apply) { + applied = await maybeApplyProfile( + recommendedProfile, + recommendedModel, + options.goal, + options.baseUrl, + ) + if (!applied) { + process.exit(1) + } + } + + const payload = { + goal: options.goal, + provider: options.provider, + ollamaAvailable, + openAIConfigured, + recommendedProfile, + recommendedModel, + benchmarked: options.benchmark, + rankedModels, + applied, + } + + if (options.json) { + console.log(JSON.stringify(payload, null, 2)) + return + } + + printHumanSummary({ + goal: options.goal, + recommendedProfile, + recommendedModel, + rankedModels, + benchmarked: options.benchmark, + applied, + }) + + if (!recommendedOllama && !openAIConfigured) { + console.log( + '\nNo local Ollama model was detected and OPENAI_API_KEY is unset.', + ) + console.log( + 'Next steps: `ollama pull qwen2.5-coder:7b` or set OPENAI_API_KEY.', + ) + } +} + +await main() + +export {} diff --git a/src/utils/providerProfile.test.ts b/src/utils/providerProfile.test.ts new file mode 100644 index 00000000..e90746c6 --- /dev/null +++ b/src/utils/providerProfile.test.ts @@ -0,0 +1,383 @@ +import assert from 'node:assert/strict' +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import test from 'node:test' + +import { + buildCodexProfileEnv, + buildGeminiProfileEnv, + buildLaunchEnv, + buildOllamaProfileEnv, + buildOpenAIProfileEnv, + selectAutoProfile, + type ProfileFile, +} from './providerProfile.ts' + +function profile(profile: ProfileFile['profile'], env: ProfileFile['env']): ProfileFile { + return { + profile, + env, + createdAt: '2026-04-01T00:00:00.000Z', + } +} + +const missingCodexAuthPath = join(tmpdir(), 'openclaude-missing-codex-auth.json') + +test('matching persisted ollama env is reused for ollama launch', async () => { + const env = await buildLaunchEnv({ + profile: 'ollama', + persisted: profile('ollama', { + OPENAI_BASE_URL: 'http://127.0.0.1:11435/v1', + OPENAI_MODEL: 'mistral:7b-instruct', + }), + goal: 'balanced', + processEnv: {}, + getOllamaChatBaseUrl: () => 'http://localhost:11434/v1', + resolveOllamaDefaultModel: async () => 'llama3.1:8b', + }) + + assert.equal(env.OPENAI_BASE_URL, 'http://127.0.0.1:11435/v1') + assert.equal(env.OPENAI_MODEL, 'mistral:7b-instruct') +}) + +test('ollama launch ignores mismatched persisted openai env and shell model fallback', async () => { + const env = await buildLaunchEnv({ + profile: 'ollama', + persisted: profile('openai', { + OPENAI_BASE_URL: 'https://api.openai.com/v1', + OPENAI_MODEL: 'gpt-4o', + OPENAI_API_KEY: 'sk-persisted', + }), + goal: 'coding', + processEnv: { + OPENAI_BASE_URL: 'https://api.deepseek.com/v1', + OPENAI_MODEL: 'gpt-4o-mini', + OPENAI_API_KEY: 'sk-live', + CODEX_API_KEY: 'codex-live', + CHATGPT_ACCOUNT_ID: 'acct_live', + }, + getOllamaChatBaseUrl: () => 'http://localhost:11434/v1', + resolveOllamaDefaultModel: async () => 'qwen2.5-coder:7b', + }) + + assert.equal(env.OPENAI_BASE_URL, 'http://localhost:11434/v1') + assert.equal(env.OPENAI_MODEL, 'qwen2.5-coder:7b') + assert.equal(env.OPENAI_API_KEY, undefined) + assert.equal(env.CODEX_API_KEY, undefined) + assert.equal(env.CHATGPT_ACCOUNT_ID, undefined) +}) + +test('openai launch ignores mismatched persisted ollama env', async () => { + const env = await buildLaunchEnv({ + profile: 'openai', + persisted: profile('ollama', { + OPENAI_BASE_URL: 'http://localhost:11434/v1', + OPENAI_MODEL: 'llama3.1:8b', + }), + goal: 'latency', + processEnv: { + OPENAI_API_KEY: 'sk-live', + CODEX_API_KEY: 'codex-live', + CHATGPT_ACCOUNT_ID: 'acct_live', + }, + getOllamaChatBaseUrl: () => 'http://localhost:11434/v1', + resolveOllamaDefaultModel: async () => 'llama3.1:8b', + }) + + assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1') + assert.equal(env.OPENAI_MODEL, 'gpt-4o-mini') + assert.equal(env.OPENAI_API_KEY, 'sk-live') + assert.equal(env.CODEX_API_KEY, undefined) + assert.equal(env.CHATGPT_ACCOUNT_ID, undefined) +}) + +test('openai launch ignores codex shell transport hints', async () => { + const env = await buildLaunchEnv({ + profile: 'openai', + persisted: null, + goal: 'balanced', + processEnv: { + OPENAI_API_KEY: 'sk-live', + OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex', + OPENAI_MODEL: 'codexplan', + }, + }) + + assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1') + assert.equal(env.OPENAI_MODEL, 'gpt-4o') + assert.equal(env.OPENAI_API_KEY, 'sk-live') +}) + +test('openai launch ignores codex persisted transport hints', async () => { + const env = await buildLaunchEnv({ + profile: 'openai', + persisted: profile('openai', { + OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex', + OPENAI_MODEL: 'codexplan', + OPENAI_API_KEY: 'sk-persisted', + }), + goal: 'balanced', + processEnv: { + OPENAI_API_KEY: 'sk-live', + }, + }) + + assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1') + assert.equal(env.OPENAI_MODEL, 'gpt-4o') + assert.equal(env.OPENAI_API_KEY, 'sk-live') +}) + +test('matching persisted gemini env is reused for gemini launch', async () => { + const env = await buildLaunchEnv({ + profile: 'gemini', + persisted: profile('gemini', { + GEMINI_MODEL: 'gemini-2.5-flash', + GEMINI_API_KEY: 'gem-persisted', + GEMINI_BASE_URL: 'https://example.test/v1beta/openai', + }), + goal: 'balanced', + processEnv: {}, + }) + + assert.equal(env.CLAUDE_CODE_USE_GEMINI, '1') + assert.equal(env.CLAUDE_CODE_USE_OPENAI, undefined) + assert.equal(env.GEMINI_MODEL, 'gemini-2.5-flash') + assert.equal(env.GEMINI_API_KEY, 'gem-persisted') + assert.equal(env.GEMINI_BASE_URL, 'https://example.test/v1beta/openai') +}) + +test('gemini launch ignores mismatched persisted openai env and strips other provider secrets', async () => { + const env = await buildLaunchEnv({ + profile: 'gemini', + persisted: profile('openai', { + OPENAI_BASE_URL: 'https://api.openai.com/v1', + OPENAI_MODEL: 'gpt-4o', + OPENAI_API_KEY: 'sk-persisted', + }), + goal: 'balanced', + processEnv: { + GEMINI_API_KEY: 'gem-live', + GOOGLE_API_KEY: 'google-live', + OPENAI_API_KEY: 'sk-live', + OPENAI_BASE_URL: 'https://api.openai.com/v1', + OPENAI_MODEL: 'gpt-4o-mini', + CODEX_API_KEY: 'codex-live', + CHATGPT_ACCOUNT_ID: 'acct_live', + CLAUDE_CODE_USE_OPENAI: '1', + }, + }) + + assert.equal(env.CLAUDE_CODE_USE_GEMINI, '1') + assert.equal(env.CLAUDE_CODE_USE_OPENAI, undefined) + assert.equal(env.GEMINI_MODEL, 'gemini-2.0-flash') + assert.equal(env.GEMINI_API_KEY, 'gem-live') + assert.equal( + env.GEMINI_BASE_URL, + 'https://generativelanguage.googleapis.com/v1beta/openai', + ) + assert.equal(env.GOOGLE_API_KEY, undefined) + assert.equal(env.OPENAI_API_KEY, undefined) + assert.equal(env.CODEX_API_KEY, undefined) + assert.equal(env.CHATGPT_ACCOUNT_ID, undefined) +}) + +test('matching persisted codex env is reused for codex launch', async () => { + const env = await buildLaunchEnv({ + profile: 'codex', + persisted: profile('codex', { + OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex', + OPENAI_MODEL: 'codexspark', + CODEX_API_KEY: 'codex-persisted', + CHATGPT_ACCOUNT_ID: 'acct_persisted', + }), + goal: 'balanced', + processEnv: { + CODEX_AUTH_JSON_PATH: missingCodexAuthPath, + }, + }) + + assert.equal(env.OPENAI_BASE_URL, 'https://chatgpt.com/backend-api/codex') + assert.equal(env.OPENAI_MODEL, 'codexspark') + assert.equal(env.CODEX_API_KEY, 'codex-persisted') + assert.equal(env.CHATGPT_ACCOUNT_ID, 'acct_persisted') +}) + +test('codex launch normalizes poisoned persisted base urls', async () => { + const env = await buildLaunchEnv({ + profile: 'codex', + persisted: profile('codex', { + OPENAI_BASE_URL: 'https://api.openai.com/v1', + OPENAI_MODEL: 'codexspark', + CHATGPT_ACCOUNT_ID: 'acct_persisted', + }), + goal: 'balanced', + processEnv: { + CODEX_AUTH_JSON_PATH: missingCodexAuthPath, + }, + }) + + assert.equal(env.OPENAI_BASE_URL, 'https://chatgpt.com/backend-api/codex') + assert.equal(env.OPENAI_MODEL, 'codexspark') +}) + +test('codex launch ignores mismatched persisted openai env', async () => { + const env = await buildLaunchEnv({ + profile: 'codex', + persisted: profile('openai', { + OPENAI_BASE_URL: 'https://api.openai.com/v1', + OPENAI_MODEL: 'gpt-4o', + OPENAI_API_KEY: 'sk-persisted', + }), + goal: 'balanced', + processEnv: { + OPENAI_BASE_URL: 'https://api.openai.com/v1', + OPENAI_MODEL: 'gpt-4o-mini', + OPENAI_API_KEY: 'sk-live', + CODEX_API_KEY: 'codex-live', + CHATGPT_ACCOUNT_ID: 'acct_live', + }, + }) + + assert.equal(env.OPENAI_BASE_URL, 'https://chatgpt.com/backend-api/codex') + assert.equal(env.OPENAI_MODEL, 'codexplan') + assert.equal(env.OPENAI_API_KEY, undefined) + assert.equal(env.CODEX_API_KEY, 'codex-live') + assert.equal(env.CHATGPT_ACCOUNT_ID, 'acct_live') +}) + +test('codex launch ignores placeholder codex env keys', async () => { + const env = await buildLaunchEnv({ + profile: 'codex', + persisted: profile('codex', { + OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex', + OPENAI_MODEL: 'codexspark', + CODEX_API_KEY: 'codex-persisted', + CHATGPT_ACCOUNT_ID: 'acct_persisted', + }), + goal: 'balanced', + processEnv: { + CODEX_API_KEY: 'SUA_CHAVE', + CODEX_AUTH_JSON_PATH: missingCodexAuthPath, + }, + }) + + assert.equal(env.CODEX_API_KEY, 'codex-persisted') + assert.equal(env.CHATGPT_ACCOUNT_ID, 'acct_persisted') +}) + +test('codex launch prefers auth account id over stale persisted value', async () => { + const codexHome = mkdtempSync(join(tmpdir(), 'openclaude-codex-')) + try { + writeFileSync( + join(codexHome, 'auth.json'), + JSON.stringify({ + access_token: 'codex-live', + account_id: 'acct_auth', + }), + 'utf8', + ) + + const env = await buildLaunchEnv({ + profile: 'codex', + persisted: profile('codex', { + OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex', + OPENAI_MODEL: 'codexspark', + CHATGPT_ACCOUNT_ID: 'acct_persisted', + }), + goal: 'balanced', + processEnv: { + CODEX_HOME: codexHome, + }, + }) + + assert.equal(env.CHATGPT_ACCOUNT_ID, 'acct_auth') + } finally { + rmSync(codexHome, { recursive: true, force: true }) + } +}) + +test('ollama profiles never persist openai api keys', () => { + const env = buildOllamaProfileEnv('llama3.1:8b', { + getOllamaChatBaseUrl: () => 'http://localhost:11434/v1', + }) + + assert.deepEqual(env, { + OPENAI_BASE_URL: 'http://localhost:11434/v1', + OPENAI_MODEL: 'llama3.1:8b', + }) + assert.equal('OPENAI_API_KEY' in env, false) +}) + +test('codex profiles accept explicit codex credentials', () => { + const env = buildCodexProfileEnv({ + model: 'codexspark', + apiKey: 'codex-live', + processEnv: { + CHATGPT_ACCOUNT_ID: 'acct_123', + }, + }) + + assert.deepEqual(env, { + OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex', + OPENAI_MODEL: 'codexspark', + CODEX_API_KEY: 'codex-live', + CHATGPT_ACCOUNT_ID: 'acct_123', + }) +}) + +test('codex profiles require a chatgpt account id', () => { + const env = buildCodexProfileEnv({ + model: 'codexspark', + apiKey: 'codex-live', + processEnv: { + CODEX_AUTH_JSON_PATH: missingCodexAuthPath, + }, + }) + + assert.equal(env, null) +}) + +test('gemini profiles accept google api key fallback', () => { + const env = buildGeminiProfileEnv({ + processEnv: { + GOOGLE_API_KEY: 'gem-live', + }, + }) + + assert.deepEqual(env, { + GEMINI_MODEL: 'gemini-2.0-flash', + GEMINI_API_KEY: 'gem-live', + }) +}) + +test('gemini profiles require a key', () => { + const env = buildGeminiProfileEnv({ + processEnv: {}, + }) + + assert.equal(env, null) +}) + +test('openai profiles ignore codex shell transport hints', () => { + const env = buildOpenAIProfileEnv({ + goal: 'balanced', + apiKey: 'sk-live', + processEnv: { + OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex', + OPENAI_MODEL: 'codexplan', + OPENAI_API_KEY: 'sk-live', + }, + }) + + assert.deepEqual(env, { + OPENAI_BASE_URL: 'https://api.openai.com/v1', + OPENAI_MODEL: 'gpt-4o', + OPENAI_API_KEY: 'sk-live', + }) +}) + +test('auto profile falls back to openai when no viable ollama model exists', () => { + assert.equal(selectAutoProfile(null), 'openai') + assert.equal(selectAutoProfile('qwen2.5-coder:7b'), 'ollama') +}) diff --git a/src/utils/providerProfile.ts b/src/utils/providerProfile.ts new file mode 100644 index 00000000..866c19c5 --- /dev/null +++ b/src/utils/providerProfile.ts @@ -0,0 +1,314 @@ +import { + DEFAULT_CODEX_BASE_URL, + DEFAULT_OPENAI_BASE_URL, + isCodexBaseUrl, + resolveCodexApiCredentials, + resolveProviderRequest, +} from '../services/api/providerConfig.ts' +import { + getGoalDefaultOpenAIModel, + type RecommendationGoal, +} from './providerRecommendation.ts' + +const DEFAULT_GEMINI_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta/openai' +const DEFAULT_GEMINI_MODEL = 'gemini-2.0-flash' + +export type ProviderProfile = 'openai' | 'ollama' | 'codex' | 'gemini' + +export type ProfileEnv = { + OPENAI_BASE_URL?: string + OPENAI_MODEL?: string + OPENAI_API_KEY?: string + CODEX_API_KEY?: string + CHATGPT_ACCOUNT_ID?: string + CODEX_ACCOUNT_ID?: string + GEMINI_API_KEY?: string + GEMINI_MODEL?: string + GEMINI_BASE_URL?: string +} + +export type ProfileFile = { + profile: ProviderProfile + env: ProfileEnv + createdAt: string +} + +export function sanitizeApiKey( + key: string | null | undefined, +): string | undefined { + if (!key || key === 'SUA_CHAVE') return undefined + return key +} + +export function buildOllamaProfileEnv( + model: string, + options: { + baseUrl?: string | null + getOllamaChatBaseUrl: (baseUrl?: string) => string + }, +): ProfileEnv { + return { + OPENAI_BASE_URL: options.getOllamaChatBaseUrl(options.baseUrl ?? undefined), + OPENAI_MODEL: model, + } +} + +export function buildGeminiProfileEnv(options: { + model?: string | null + baseUrl?: string | null + apiKey?: string | null + processEnv?: NodeJS.ProcessEnv +}): ProfileEnv | null { + const processEnv = options.processEnv ?? process.env + const key = sanitizeApiKey( + options.apiKey ?? + processEnv.GEMINI_API_KEY ?? + processEnv.GOOGLE_API_KEY, + ) + if (!key) { + return null + } + + const env: ProfileEnv = { + GEMINI_MODEL: + options.model || processEnv.GEMINI_MODEL || DEFAULT_GEMINI_MODEL, + GEMINI_API_KEY: key, + } + + const baseUrl = options.baseUrl || processEnv.GEMINI_BASE_URL + if (baseUrl) { + env.GEMINI_BASE_URL = baseUrl + } + + return env +} + +export function buildOpenAIProfileEnv(options: { + goal: RecommendationGoal + model?: string | null + baseUrl?: string | null + apiKey?: string | null + processEnv?: NodeJS.ProcessEnv +}): ProfileEnv | null { + const processEnv = options.processEnv ?? process.env + const key = sanitizeApiKey(options.apiKey ?? processEnv.OPENAI_API_KEY) + if (!key) { + return null + } + + const defaultModel = getGoalDefaultOpenAIModel(options.goal) + const shellOpenAIRequest = resolveProviderRequest({ + model: processEnv.OPENAI_MODEL, + baseUrl: processEnv.OPENAI_BASE_URL, + fallbackModel: defaultModel, + }) + const useShellOpenAIConfig = shellOpenAIRequest.transport === 'chat_completions' + + return { + OPENAI_BASE_URL: + options.baseUrl || + (useShellOpenAIConfig ? processEnv.OPENAI_BASE_URL : undefined) || + DEFAULT_OPENAI_BASE_URL, + OPENAI_MODEL: + options.model || + (useShellOpenAIConfig ? processEnv.OPENAI_MODEL : undefined) || + defaultModel, + OPENAI_API_KEY: key, + } +} + +export function buildCodexProfileEnv(options: { + model?: string | null + baseUrl?: string | null + apiKey?: string | null + processEnv?: NodeJS.ProcessEnv +}): ProfileEnv | null { + const processEnv = options.processEnv ?? process.env + const key = sanitizeApiKey(options.apiKey ?? processEnv.CODEX_API_KEY) + const credentialEnv = key + ? ({ ...processEnv, CODEX_API_KEY: key } as NodeJS.ProcessEnv) + : processEnv + const credentials = resolveCodexApiCredentials(credentialEnv) + if (!credentials.apiKey || !credentials.accountId) { + return null + } + + const env: ProfileEnv = { + OPENAI_BASE_URL: options.baseUrl || DEFAULT_CODEX_BASE_URL, + OPENAI_MODEL: options.model || 'codexplan', + } + + if (key) { + env.CODEX_API_KEY = key + } + + env.CHATGPT_ACCOUNT_ID = credentials.accountId + + return env +} + +export function createProfileFile( + profile: ProviderProfile, + env: ProfileEnv, +): ProfileFile { + return { + profile, + env, + createdAt: new Date().toISOString(), + } +} + +export function selectAutoProfile( + recommendedOllamaModel: string | null, +): ProviderProfile { + return recommendedOllamaModel ? 'ollama' : 'openai' +} + +export async function buildLaunchEnv(options: { + profile: ProviderProfile + persisted: ProfileFile | null + goal: RecommendationGoal + processEnv?: NodeJS.ProcessEnv + getOllamaChatBaseUrl?: (baseUrl?: string) => string + resolveOllamaDefaultModel?: (goal: RecommendationGoal) => Promise +}): Promise { + const processEnv = options.processEnv ?? process.env + const persistedEnv = + options.persisted?.profile === options.profile + ? options.persisted.env ?? {} + : {} + + const shellGeminiKey = sanitizeApiKey( + processEnv.GEMINI_API_KEY ?? processEnv.GOOGLE_API_KEY, + ) + const persistedGeminiKey = sanitizeApiKey(persistedEnv.GEMINI_API_KEY) + + if (options.profile === 'gemini') { + const env: NodeJS.ProcessEnv = { + ...processEnv, + CLAUDE_CODE_USE_GEMINI: '1', + } + + delete env.CLAUDE_CODE_USE_OPENAI + + env.GEMINI_MODEL = + processEnv.GEMINI_MODEL || + persistedEnv.GEMINI_MODEL || + DEFAULT_GEMINI_MODEL + env.GEMINI_BASE_URL = + processEnv.GEMINI_BASE_URL || + persistedEnv.GEMINI_BASE_URL || + DEFAULT_GEMINI_BASE_URL + + const geminiKey = shellGeminiKey || persistedGeminiKey + if (geminiKey) { + env.GEMINI_API_KEY = geminiKey + } else { + delete env.GEMINI_API_KEY + } + + delete env.GOOGLE_API_KEY + delete env.OPENAI_BASE_URL + delete env.OPENAI_MODEL + delete env.OPENAI_API_KEY + delete env.CODEX_API_KEY + delete env.CHATGPT_ACCOUNT_ID + delete env.CODEX_ACCOUNT_ID + + return env + } + + const env: NodeJS.ProcessEnv = { + ...processEnv, + CLAUDE_CODE_USE_OPENAI: '1', + } + + delete env.CLAUDE_CODE_USE_GEMINI + delete env.GEMINI_API_KEY + delete env.GEMINI_MODEL + delete env.GEMINI_BASE_URL + delete env.GOOGLE_API_KEY + + if (options.profile === 'ollama') { + const getOllamaBaseUrl = + options.getOllamaChatBaseUrl ?? (() => 'http://localhost:11434/v1') + const resolveOllamaModel = + options.resolveOllamaDefaultModel ?? (async () => 'llama3.1:8b') + + env.OPENAI_BASE_URL = persistedEnv.OPENAI_BASE_URL || getOllamaBaseUrl() + env.OPENAI_MODEL = + persistedEnv.OPENAI_MODEL || + (await resolveOllamaModel(options.goal)) + + delete env.OPENAI_API_KEY + delete env.CODEX_API_KEY + delete env.CHATGPT_ACCOUNT_ID + delete env.CODEX_ACCOUNT_ID + + return env + } + + if (options.profile === 'codex') { + env.OPENAI_BASE_URL = + persistedEnv.OPENAI_BASE_URL && isCodexBaseUrl(persistedEnv.OPENAI_BASE_URL) + ? persistedEnv.OPENAI_BASE_URL + : DEFAULT_CODEX_BASE_URL + env.OPENAI_MODEL = persistedEnv.OPENAI_MODEL || 'codexplan' + delete env.OPENAI_API_KEY + + const codexKey = + sanitizeApiKey(processEnv.CODEX_API_KEY) || + sanitizeApiKey(persistedEnv.CODEX_API_KEY) + const liveCodexCredentials = resolveCodexApiCredentials(processEnv) + const codexAccountId = + processEnv.CHATGPT_ACCOUNT_ID || + processEnv.CODEX_ACCOUNT_ID || + liveCodexCredentials.accountId || + persistedEnv.CHATGPT_ACCOUNT_ID || + persistedEnv.CODEX_ACCOUNT_ID + if (codexKey) { + env.CODEX_API_KEY = codexKey + } else { + delete env.CODEX_API_KEY + } + + if (codexAccountId) { + env.CHATGPT_ACCOUNT_ID = codexAccountId + } else { + delete env.CHATGPT_ACCOUNT_ID + } + delete env.CODEX_ACCOUNT_ID + + return env + } + + const defaultOpenAIModel = getGoalDefaultOpenAIModel(options.goal) + const shellOpenAIRequest = resolveProviderRequest({ + model: processEnv.OPENAI_MODEL, + baseUrl: processEnv.OPENAI_BASE_URL, + fallbackModel: defaultOpenAIModel, + }) + const persistedOpenAIRequest = resolveProviderRequest({ + model: persistedEnv.OPENAI_MODEL, + baseUrl: persistedEnv.OPENAI_BASE_URL, + fallbackModel: defaultOpenAIModel, + }) + const useShellOpenAIConfig = shellOpenAIRequest.transport === 'chat_completions' + const usePersistedOpenAIConfig = + (!persistedEnv.OPENAI_MODEL && !persistedEnv.OPENAI_BASE_URL) || + persistedOpenAIRequest.transport === 'chat_completions' + + env.OPENAI_BASE_URL = + (useShellOpenAIConfig ? processEnv.OPENAI_BASE_URL : undefined) || + (usePersistedOpenAIConfig ? persistedEnv.OPENAI_BASE_URL : undefined) || + DEFAULT_OPENAI_BASE_URL + env.OPENAI_MODEL = + (useShellOpenAIConfig ? processEnv.OPENAI_MODEL : undefined) || + (usePersistedOpenAIConfig ? persistedEnv.OPENAI_MODEL : undefined) || + defaultOpenAIModel + env.OPENAI_API_KEY = processEnv.OPENAI_API_KEY || persistedEnv.OPENAI_API_KEY + delete env.CODEX_API_KEY + delete env.CHATGPT_ACCOUNT_ID + delete env.CODEX_ACCOUNT_ID + return env +} diff --git a/src/utils/providerRecommendation.test.ts b/src/utils/providerRecommendation.test.ts new file mode 100644 index 00000000..f85777bf --- /dev/null +++ b/src/utils/providerRecommendation.test.ts @@ -0,0 +1,194 @@ +import assert from 'node:assert/strict' +import test from 'node:test' + +import { + applyBenchmarkLatency, + getGoalDefaultOpenAIModel, + normalizeRecommendationGoal, + rankOllamaModels, + recommendOllamaModel, + type OllamaModelDescriptor, +} from './providerRecommendation.ts' + +function model( + name: string, + overrides: Partial = {}, +): OllamaModelDescriptor { + return { + name, + sizeBytes: null, + family: null, + families: [], + parameterSize: null, + quantizationLevel: null, + ...overrides, + } +} + +test('normalizes recommendation goals safely', () => { + assert.equal(normalizeRecommendationGoal('coding'), 'coding') + assert.equal(normalizeRecommendationGoal(' LATENCY '), 'latency') + assert.equal(normalizeRecommendationGoal('weird'), 'balanced') + assert.equal(normalizeRecommendationGoal(undefined), 'balanced') +}) + +test('coding goal prefers coding-oriented ollama models', () => { + const recommended = recommendOllamaModel( + [ + model('llama3.1:8b', { + parameterSize: '8B', + quantizationLevel: 'Q4_K_M', + }), + model('qwen2.5-coder:7b', { + parameterSize: '7B', + quantizationLevel: 'Q4_K_M', + }), + ], + 'coding', + ) + + assert.equal(recommended?.name, 'qwen2.5-coder:7b') +}) + +test('latency goal prefers smaller models', () => { + const recommended = recommendOllamaModel( + [ + model('llama3.1:70b', { + parameterSize: '70B', + quantizationLevel: 'Q4_K_M', + }), + model('llama3.2:3b', { + parameterSize: '3B', + quantizationLevel: 'Q4_K_M', + }), + ], + 'latency', + ) + + assert.equal(recommended?.name, 'llama3.2:3b') +}) + +test('non-chat embedding models are heavily demoted', () => { + const ranked = rankOllamaModels( + [ + model('nomic-embed-text', { parameterSize: '0.5B' }), + model('mistral:7b-instruct', { + parameterSize: '7B', + quantizationLevel: 'Q4_K_M', + }), + ], + 'balanced', + ) + + assert.equal(ranked[0]?.name, 'mistral:7b-instruct') +}) + +test('auto-pick ignores non-chat ollama models', () => { + const recommended = recommendOllamaModel( + [ + model('nomic-embed-text', { parameterSize: '0.5B' }), + model('bge-reranker-v2', { parameterSize: '1.5B' }), + model('whisper-large-v3', { parameterSize: '1.6B' }), + ], + 'balanced', + ) + + assert.equal(recommended, null) +}) + +test('benchmark latency can reorder close recommendations', () => { + const ranked = rankOllamaModels( + [ + model('llama3.1:8b', { + parameterSize: '8B', + quantizationLevel: 'Q4_K_M', + }), + model('mistral:7b-instruct', { + parameterSize: '7B', + quantizationLevel: 'Q4_K_M', + }), + ], + 'latency', + ) + + const benchmarked = applyBenchmarkLatency( + ranked, + { + 'llama3.1:8b': 2000, + 'mistral:7b-instruct': 350, + }, + 'latency', + ) + + assert.equal(benchmarked[0]?.name, 'mistral:7b-instruct') + assert.equal(benchmarked[0]?.benchmarkMs, 350) +}) + +test('unbenchmarked models stay behind benchmarked candidates', () => { + const ranked = rankOllamaModels( + [ + model('phi4-mini:4b', { + parameterSize: '4B', + quantizationLevel: 'Q4_K_M', + }), + model('mistral:7b-instruct', { + parameterSize: '7B', + quantizationLevel: 'Q4_K_M', + }), + model('llama3.1:8b', { + parameterSize: '8B', + quantizationLevel: 'Q4_K_M', + }), + model('qwen2.5:14b', { + parameterSize: '14B', + quantizationLevel: 'Q4_K_M', + }), + ], + 'latency', + ) + + const benchmarked = applyBenchmarkLatency( + ranked, + { + 'phi4-mini:4b': 2400, + 'mistral:7b-instruct': 2200, + 'llama3.1:8b': 2100, + }, + 'latency', + ) + + assert.ok(benchmarked.slice(0, 3).every(item => item.benchmarkMs !== null)) + assert.equal(benchmarked[3]?.name, 'qwen2.5:14b') + assert.equal(benchmarked[3]?.benchmarkMs, null) +}) + +test('coding goal recognizes codestral and devstral families', () => { + const ranked = rankOllamaModels( + [ + model('mistral:7b-instruct', { + parameterSize: '7B', + quantizationLevel: 'Q4_K_M', + }), + model('codestral:22b', { + parameterSize: '22B', + quantizationLevel: 'Q4_K_M', + }), + model('devstral:24b', { + parameterSize: '24B', + quantizationLevel: 'Q4_K_M', + }), + ], + 'coding', + ) + + assert.deepEqual(ranked.slice(0, 2).map(item => item.name), [ + 'devstral:24b', + 'codestral:22b', + ]) +}) + +test('goal defaults choose sensible openai models', () => { + assert.equal(getGoalDefaultOpenAIModel('latency'), 'gpt-4o-mini') + assert.equal(getGoalDefaultOpenAIModel('balanced'), 'gpt-4o') + assert.equal(getGoalDefaultOpenAIModel('coding'), 'gpt-4o') +}) diff --git a/src/utils/providerRecommendation.ts b/src/utils/providerRecommendation.ts new file mode 100644 index 00000000..8bd1e2cd --- /dev/null +++ b/src/utils/providerRecommendation.ts @@ -0,0 +1,317 @@ +export type RecommendationGoal = 'latency' | 'balanced' | 'coding' + +export type OllamaModelDescriptor = { + name: string + sizeBytes?: number | null + family?: string | null + families?: string[] + parameterSize?: string | null + quantizationLevel?: string | null +} + +export type RankedOllamaModel = OllamaModelDescriptor & { + score: number + reasons: string[] + summary: string +} + +export type BenchmarkedOllamaModel = RankedOllamaModel & { + benchmarkMs: number | null +} + +const CODING_HINTS = [ + 'coder', + 'codellama', + 'codegemma', + 'codestral', + 'devstral', + 'starcoder', + 'deepseek-coder', + 'qwen2.5-coder', + 'qwen-coder', +] + +const GENERAL_HINTS = [ + 'llama', + 'qwen', + 'mistral', + 'gemma', + 'phi', + 'deepseek', +] + +const INSTRUCT_HINTS = ['instruct', 'chat', 'assistant'] +const NON_CHAT_HINTS = ['embed', 'embedding', 'rerank', 'bge', 'whisper'] + +function modelHaystack(model: OllamaModelDescriptor): string { + return [ + model.name, + model.family ?? '', + ...(model.families ?? []), + model.parameterSize ?? '', + model.quantizationLevel ?? '', + ] + .join(' ') + .toLowerCase() +} + +function includesAny(text: string, needles: string[]): boolean { + return needles.some(needle => text.includes(needle)) +} + +export function isViableOllamaChatModel(model: OllamaModelDescriptor): boolean { + return !includesAny(modelHaystack(model), NON_CHAT_HINTS) +} + +export function selectRecommendedOllamaModel< + T extends OllamaModelDescriptor, +>(models: T[]): T | null { + return models.find(isViableOllamaChatModel) ?? null +} + +function inferParameterBillions(model: OllamaModelDescriptor): number | null { + const text = `${model.parameterSize ?? ''} ${model.name}`.toLowerCase() + const match = text.match(/(\d+(?:\.\d+)?)\s*b\b/) + if (match?.[1]) { + return Number(match[1]) + } + if (typeof model.sizeBytes === 'number' && model.sizeBytes > 0) { + return Number((model.sizeBytes / 1_000_000_000).toFixed(1)) + } + return null +} + +function quantizationBucket(model: OllamaModelDescriptor): string { + return (model.quantizationLevel ?? model.name).toLowerCase() +} + +function scoreSizeTier( + paramsB: number | null, + goal: RecommendationGoal, + reasons: string[], +): number { + if (paramsB === null) { + reasons.push('unknown size') + return 0 + } + + if (goal === 'latency') { + if (paramsB <= 4) { + reasons.push('tiny model for low latency') + return 32 + } + if (paramsB <= 8) { + reasons.push('small model for fast responses') + return 26 + } + if (paramsB <= 14) { + reasons.push('mid-sized model with acceptable latency') + return 16 + } + if (paramsB <= 24) { + reasons.push('larger model may be slower') + return 8 + } + reasons.push('large model likely slower locally') + return paramsB <= 40 ? 0 : -8 + } + + if (goal === 'coding') { + if (paramsB >= 7 && paramsB <= 14) { + reasons.push('strong coding size tier') + return 24 + } + if (paramsB > 14 && paramsB <= 34) { + reasons.push('large coding-capable size tier') + return 28 + } + if (paramsB > 34) { + reasons.push('very large model with higher quality potential') + return 18 + } + reasons.push('compact model may trade off coding depth') + return 12 + } + + if (paramsB >= 7 && paramsB <= 14) { + reasons.push('great balanced size tier') + return 26 + } + if (paramsB >= 3 && paramsB < 7) { + reasons.push('compact balanced size tier') + return 18 + } + if (paramsB > 14 && paramsB <= 24) { + reasons.push('high quality balanced size tier') + return 20 + } + if (paramsB > 24) { + reasons.push('large model for quality-first usage') + return 10 + } + reasons.push('very small model for general usage') + return 8 +} + +function scoreQuantization( + model: OllamaModelDescriptor, + goal: RecommendationGoal, + reasons: string[], +): number { + const quant = quantizationBucket(model) + if (quant.includes('q4')) { + reasons.push('efficient Q4 quantization') + return goal === 'latency' ? 8 : 4 + } + if (quant.includes('q5')) { + reasons.push('balanced Q5 quantization') + return goal === 'latency' ? 6 : 5 + } + if (quant.includes('q8')) { + reasons.push('higher quality Q8 quantization') + return goal === 'latency' ? 2 : 5 + } + return 0 +} + +function compareRankedModels( + a: RankedOllamaModel | BenchmarkedOllamaModel, + b: RankedOllamaModel | BenchmarkedOllamaModel, + goal: RecommendationGoal, +): number { + if (b.score !== a.score) { + return b.score - a.score + } + + const aSize = inferParameterBillions(a) ?? Number.POSITIVE_INFINITY + const bSize = inferParameterBillions(b) ?? Number.POSITIVE_INFINITY + + if (goal === 'latency') { + return aSize - bSize + } + + if (goal === 'coding') { + return bSize - aSize + } + + const target = 14 + return Math.abs(aSize - target) - Math.abs(bSize - target) +} + +export function normalizeRecommendationGoal( + goal: string | null | undefined, +): RecommendationGoal { + const normalized = goal?.trim().toLowerCase() + if ( + normalized === 'latency' || + normalized === 'balanced' || + normalized === 'coding' + ) { + return normalized + } + return 'balanced' +} + +export function getGoalDefaultOpenAIModel(goal: RecommendationGoal): string { + switch (goal) { + case 'latency': + return 'gpt-4o-mini' + case 'coding': + return 'gpt-4o' + case 'balanced': + default: + return 'gpt-4o' + } +} + +export function rankOllamaModels( + models: OllamaModelDescriptor[], + goal: RecommendationGoal, +): RankedOllamaModel[] { + return models + .map(model => { + const haystack = modelHaystack(model) + const reasons: string[] = [] + let score = 0 + + if (includesAny(haystack, NON_CHAT_HINTS)) { + score -= 40 + reasons.push('not a chat-first model') + } + + if (includesAny(haystack, CODING_HINTS)) { + score += goal === 'coding' ? 24 : goal === 'balanced' ? 10 : 4 + reasons.push('coding-oriented model family') + } + + if (includesAny(haystack, GENERAL_HINTS)) { + score += goal === 'latency' ? 4 : goal === 'coding' ? 6 : 8 + reasons.push('strong general-purpose model family') + } + + if (includesAny(haystack, INSTRUCT_HINTS)) { + score += goal === 'latency' ? 2 : 6 + reasons.push('chat/instruct tuned') + } + + if (haystack.includes('vision') || haystack.includes('vl')) { + score -= 2 + reasons.push('vision model adds extra overhead') + } + + score += scoreSizeTier(inferParameterBillions(model), goal, reasons) + score += scoreQuantization(model, goal, reasons) + + const summary = reasons.slice(0, 3).join(', ') + return { + ...model, + score, + reasons, + summary, + } + }) + .sort((a, b) => compareRankedModels(a, b, goal)) +} + +export function recommendOllamaModel( + models: OllamaModelDescriptor[], + goal: RecommendationGoal, +): RankedOllamaModel | null { + return selectRecommendedOllamaModel(rankOllamaModels(models, goal)) +} + +export function applyBenchmarkLatency( + models: RankedOllamaModel[], + benchmarkMs: Record, + goal: RecommendationGoal, +): BenchmarkedOllamaModel[] { + const divisor = + goal === 'latency' ? 120 : goal === 'coding' ? 500 : 240 + + const scoredModels = models + .map(model => { + const latency = benchmarkMs[model.name] ?? null + const benchmarkPenalty = latency === null ? 0 : latency / divisor + const reasons = + latency === null + ? model.reasons + : [`benchmarked at ${Math.round(latency)}ms`, ...model.reasons] + + return { + ...model, + benchmarkMs: latency, + reasons, + summary: reasons.slice(0, 3).join(', '), + score: Number((model.score - benchmarkPenalty).toFixed(2)), + } + }) + + const benchmarkedModels = scoredModels.filter(model => model.benchmarkMs !== null) + if (benchmarkedModels.length === 0) { + return scoredModels.sort((a, b) => compareRankedModels(a, b, goal)) + } + + const unbenchmarkedModels = scoredModels.filter(model => model.benchmarkMs === null) + benchmarkedModels.sort((a, b) => compareRankedModels(a, b, goal)) + return [...benchmarkedModels, ...unbenchmarkedModels] +}