fix: harden provider recommendation safety

This commit is contained in:
Vasanthdev2004
2026-04-01 11:55:24 +05:30
parent 174eb8ad3b
commit 8fe03cba57
10 changed files with 434 additions and 141 deletions

View File

@@ -183,10 +183,10 @@ Fix:
bun run profile:init -- --provider ollama --model llama3.1:8b bun run profile:init -- --provider ollama --model llama3.1:8b
``` ```
Or auto-pick a local profile: Or pick a local Ollama profile automatically by goal:
```powershell ```powershell
bun run profile:auto -- --goal balanced bun run profile:init -- --provider ollama --goal balanced
``` ```
## 6.5 Placeholder key (`SUA_CHAVE`) error ## 6.5 Placeholder key (`SUA_CHAVE`) error
@@ -220,14 +220,16 @@ bun run profile:fast # llama3.2:3b
bun run profile:code # qwen2.5-coder:7b bun run profile:code # qwen2.5-coder:7b
``` ```
Goal-based auto-selection: Goal-based local auto-selection:
```powershell ```powershell
bun run profile:auto -- --goal latency bun run profile:init -- --provider ollama --goal latency
bun run profile:auto -- --goal balanced bun run profile:init -- --provider ollama --goal balanced
bun run profile:auto -- --goal coding bun run profile:init -- --provider ollama --goal coding
``` ```
`profile:auto` is a best-available provider picker, not a local-only command. Use `--provider ollama` when you want to stay on a local model.
## 8. Practical Prompt Playbook (Copy/Paste) ## 8. Practical Prompt Playbook (Copy/Paste)
## 8.1 Code understanding ## 8.1 Code understanding

View File

@@ -187,7 +187,7 @@ bun run doctor:runtime:json
# persist a diagnostics report to reports/doctor-runtime.json # persist a diagnostics report to reports/doctor-runtime.json
bun run doctor:report bun run doctor:report
# full local hardening check (typecheck + smoke + runtime doctor) # full local hardening check (smoke + runtime doctor)
bun run hardening:check bun run hardening:check
# strict hardening (includes project-wide typecheck) # strict hardening (includes project-wide typecheck)
@@ -203,13 +203,13 @@ Notes:
Use profile launchers to avoid repeated environment setup: Use profile launchers to avoid repeated environment setup:
```bash ```bash
# one-time profile bootstrap (auto-detect ollama, otherwise openai) # one-time profile bootstrap (best available provider)
bun run profile:init bun run profile:init
# preview the best provider/model for your goal # preview the best provider/model for your goal
bun run profile:recommend -- --goal coding --benchmark bun run profile:recommend -- --goal coding --benchmark
# auto-apply the best available profile for your goal # auto-apply the best available provider/model for your goal
bun run profile:auto -- --goal latency bun run profile:auto -- --goal latency
# openai bootstrap with explicit key # openai bootstrap with explicit key
@@ -234,6 +234,9 @@ bun run dev:ollama
`profile:recommend` ranks installed Ollama models for `latency`, `balanced`, or `coding`, and `profile:auto` can persist the recommendation directly. `profile:recommend` ranks installed Ollama models for `latency`, `balanced`, or `coding`, and `profile:auto` can persist the recommendation directly.
If no profile exists yet, `dev:profile` now uses the same goal-aware defaults when picking the initial model. If no profile exists yet, `dev:profile` now uses the same goal-aware defaults when picking the initial model.
Use `--provider ollama` when you want a local-only path. Auto mode falls back to OpenAI when no viable local chat model is installed.
Goal-based Ollama selection only recommends among models that are already installed and reachable from Ollama.
`dev:openai` and `dev:ollama` run `doctor:runtime` first and only launch the app if checks pass. `dev:openai` and `dev:ollama` run `doctor:runtime` first and only launch the app if checks pass.
For `dev:ollama`, make sure Ollama is running locally before launch. For `dev:ollama`, make sure Ollama is running locally before launch.

View File

@@ -27,7 +27,7 @@
"dev:fast": "bun run profile:fast && bun run dev:ollama:fast", "dev:fast": "bun run profile:fast && bun run dev:ollama:fast",
"dev:code": "bun run profile:code && bun run dev:profile", "dev:code": "bun run profile:code && bun run dev:profile",
"start": "node dist/cli.mjs", "start": "node dist/cli.mjs",
"test:provider-recommendation": "node --test --experimental-strip-types src/utils/providerRecommendation.test.ts", "test:provider-recommendation": "node --test --experimental-strip-types src/utils/providerRecommendation.test.ts src/utils/providerProfile.test.ts",
"typecheck": "tsc --noEmit", "typecheck": "tsc --noEmit",
"smoke": "bun run build && node dist/cli.mjs --version", "smoke": "bun run build && node dist/cli.mjs --version",
"doctor:runtime": "bun run scripts/system-check.ts", "doctor:runtime": "bun run scripts/system-check.ts",

View File

@@ -6,24 +6,20 @@ import {
normalizeRecommendationGoal, normalizeRecommendationGoal,
recommendOllamaModel, recommendOllamaModel,
} from '../src/utils/providerRecommendation.ts' } from '../src/utils/providerRecommendation.ts'
import {
buildOllamaProfileEnv,
buildOpenAIProfileEnv,
createProfileFile,
selectAutoProfile,
type ProfileFile,
type ProviderProfile,
} from '../src/utils/providerProfile.ts'
import { import {
getOllamaChatBaseUrl, getOllamaChatBaseUrl,
hasLocalOllama, hasLocalOllama,
listOllamaModels, listOllamaModels,
} from './provider-discovery.ts' } from './provider-discovery.ts'
type ProviderProfile = 'openai' | 'ollama'
type ProfileFile = {
profile: ProviderProfile
env: {
OPENAI_BASE_URL?: string
OPENAI_MODEL?: string
OPENAI_API_KEY?: string
}
createdAt: string
}
function parseArg(name: string): string | null { function parseArg(name: string): string | null {
const args = process.argv.slice(2) const args = process.argv.slice(2)
const idx = args.indexOf(name) const idx = args.indexOf(name)
@@ -37,25 +33,16 @@ function parseProviderArg(): ProviderProfile | 'auto' {
return 'auto' return 'auto'
} }
function sanitizeApiKey(key: string | null): string | undefined {
if (!key || key === 'SUA_CHAVE') return undefined
return key
}
async function resolveOllamaModel( async function resolveOllamaModel(
argModel: string | null, argModel: string | null,
argBaseUrl: string | null, argBaseUrl: string | null,
goal: ReturnType<typeof normalizeRecommendationGoal>, goal: ReturnType<typeof normalizeRecommendationGoal>,
): Promise<string> { ) : Promise<string | null> {
if (argModel) return argModel if (argModel) return argModel
const discovered = await listOllamaModels(argBaseUrl || undefined) const discovered = await listOllamaModels(argBaseUrl || undefined)
const recommended = recommendOllamaModel(discovered, goal) const recommended = recommendOllamaModel(discovered, goal)
if (recommended) { return recommended?.name ?? null
return recommended.name
}
return process.env.OPENAI_MODEL || 'llama3.1:8b'
} }
async function main(): Promise<void> { async function main(): Promise<void> {
@@ -68,37 +55,57 @@ async function main(): Promise<void> {
) )
let selected: ProviderProfile let selected: ProviderProfile
let resolvedOllamaModel: string | null = null
if (provider === 'auto') { if (provider === 'auto') {
selected = (await hasLocalOllama(argBaseUrl || undefined)) ? 'ollama' : 'openai' if (await hasLocalOllama(argBaseUrl || undefined)) {
resolvedOllamaModel = await resolveOllamaModel(argModel, argBaseUrl, goal)
selected = selectAutoProfile(resolvedOllamaModel)
} else {
selected = 'openai'
}
} else { } else {
selected = provider selected = provider
} }
const env: ProfileFile['env'] = {} let env: ProfileFile['env']
if (selected === 'ollama') { if (selected === 'ollama') {
env.OPENAI_BASE_URL = getOllamaChatBaseUrl(argBaseUrl || undefined) resolvedOllamaModel ??= await resolveOllamaModel(argModel, argBaseUrl, goal)
env.OPENAI_MODEL = await resolveOllamaModel(argModel, argBaseUrl, goal) if (!resolvedOllamaModel) {
const key = sanitizeApiKey(argApiKey || process.env.OPENAI_API_KEY || null) console.error('No viable Ollama chat model was discovered. Pull a chat model first or pass --model explicitly.')
if (key) env.OPENAI_API_KEY = key process.exit(1)
}
env = buildOllamaProfileEnv(
resolvedOllamaModel,
{
baseUrl: argBaseUrl,
getOllamaChatBaseUrl,
},
)
} else { } else {
env.OPENAI_BASE_URL = argBaseUrl || process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1' const builtEnv = buildOpenAIProfileEnv({
env.OPENAI_MODEL = goal,
model:
argModel || argModel ||
process.env.OPENAI_MODEL || process.env.OPENAI_MODEL ||
getGoalDefaultOpenAIModel(goal) getGoalDefaultOpenAIModel(goal),
const key = sanitizeApiKey(argApiKey || process.env.OPENAI_API_KEY || null) apiKey: argApiKey || process.env.OPENAI_API_KEY || null,
if (!key) { processEnv: {
...process.env,
OPENAI_BASE_URL:
argBaseUrl || process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1',
},
})
if (!builtEnv) {
console.error('OpenAI profile requires a real API key. Use --api-key or set OPENAI_API_KEY.') console.error('OpenAI profile requires a real API key. Use --api-key or set OPENAI_API_KEY.')
process.exit(1) process.exit(1)
} }
env.OPENAI_API_KEY = key
env = builtEnv
} }
const profile: ProfileFile = { const profile = createProfileFile(selected, env)
profile: selected,
env,
createdAt: new Date().toISOString(),
}
const outputPath = resolve(process.cwd(), '.openclaude-profile.json') const outputPath = resolve(process.cwd(), '.openclaude-profile.json')
writeFileSync(outputPath, JSON.stringify(profile, null, 2), 'utf8') writeFileSync(outputPath, JSON.stringify(profile, null, 2), 'utf8')

View File

@@ -3,27 +3,21 @@ import { spawn } from 'node:child_process'
import { existsSync, readFileSync } from 'node:fs' import { existsSync, readFileSync } from 'node:fs'
import { resolve } from 'node:path' import { resolve } from 'node:path'
import { import {
getGoalDefaultOpenAIModel,
normalizeRecommendationGoal, normalizeRecommendationGoal,
recommendOllamaModel, recommendOllamaModel,
} from '../src/utils/providerRecommendation.ts' } from '../src/utils/providerRecommendation.ts'
import {
buildLaunchEnv,
selectAutoProfile,
type ProfileFile,
type ProviderProfile,
} from '../src/utils/providerProfile.ts'
import { import {
getOllamaChatBaseUrl, getOllamaChatBaseUrl,
hasLocalOllama, hasLocalOllama,
listOllamaModels, listOllamaModels,
} from './provider-discovery.ts' } from './provider-discovery.ts'
type ProviderProfile = 'openai' | 'ollama'
type ProfileFile = {
profile: ProviderProfile
env?: {
OPENAI_BASE_URL?: string
OPENAI_MODEL?: string
OPENAI_API_KEY?: string
}
}
type LaunchOptions = { type LaunchOptions = {
requestedProfile: ProviderProfile | 'auto' | null requestedProfile: ProviderProfile | 'auto' | null
passthroughArgs: string[] passthroughArgs: string[]
@@ -93,10 +87,10 @@ function loadPersistedProfile(): ProfileFile | null {
async function resolveOllamaDefaultModel( async function resolveOllamaDefaultModel(
goal: ReturnType<typeof normalizeRecommendationGoal>, goal: ReturnType<typeof normalizeRecommendationGoal>,
): Promise<string> { ): Promise<string | null> {
const models = await listOllamaModels() const models = await listOllamaModels()
const recommended = recommendOllamaModel(models, goal) const recommended = recommendOllamaModel(models, goal)
return recommended?.name || process.env.OPENAI_MODEL || 'llama3.1:8b' return recommended?.name ?? null
} }
function runCommand(command: string, env: NodeJS.ProcessEnv): Promise<number> { function runCommand(command: string, env: NodeJS.ProcessEnv): Promise<number> {
@@ -113,41 +107,6 @@ function runCommand(command: string, env: NodeJS.ProcessEnv): Promise<number> {
}) })
} }
async function buildEnv(
profile: ProviderProfile,
persisted: ProfileFile | null,
goal: ReturnType<typeof normalizeRecommendationGoal>,
): Promise<NodeJS.ProcessEnv> {
const persistedEnv = persisted?.env ?? {}
const env: NodeJS.ProcessEnv = {
...process.env,
CLAUDE_CODE_USE_OPENAI: '1',
}
if (profile === 'ollama') {
env.OPENAI_BASE_URL =
persistedEnv.OPENAI_BASE_URL ||
process.env.OPENAI_BASE_URL ||
getOllamaChatBaseUrl()
env.OPENAI_MODEL =
persistedEnv.OPENAI_MODEL ||
process.env.OPENAI_MODEL ||
await resolveOllamaDefaultModel(goal)
if (!process.env.OPENAI_API_KEY || process.env.OPENAI_API_KEY === 'SUA_CHAVE') {
delete env.OPENAI_API_KEY
}
return env
}
env.OPENAI_BASE_URL = process.env.OPENAI_BASE_URL || persistedEnv.OPENAI_BASE_URL || 'https://api.openai.com/v1'
env.OPENAI_MODEL =
process.env.OPENAI_MODEL ||
persistedEnv.OPENAI_MODEL ||
getGoalDefaultOpenAIModel(goal)
env.OPENAI_API_KEY = process.env.OPENAI_API_KEY || persistedEnv.OPENAI_API_KEY
return env
}
function applyFastFlags(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv { function applyFastFlags(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv {
env.CLAUDE_CODE_SIMPLE ??= '1' env.CLAUDE_CODE_SIMPLE ??= '1'
env.CLAUDE_CODE_DISABLE_THINKING ??= '1' env.CLAUDE_CODE_DISABLE_THINKING ??= '1'
@@ -181,18 +140,36 @@ async function main(): Promise<void> {
const persisted = loadPersistedProfile() const persisted = loadPersistedProfile()
let profile: ProviderProfile let profile: ProviderProfile
let resolvedOllamaModel: string | null = null
if (requestedProfile === 'auto') { if (requestedProfile === 'auto') {
if (persisted) { if (persisted) {
profile = persisted.profile profile = persisted.profile
} else if (await hasLocalOllama()) {
resolvedOllamaModel = await resolveOllamaDefaultModel(options.goal)
profile = selectAutoProfile(resolvedOllamaModel)
} else { } else {
profile = (await hasLocalOllama()) ? 'ollama' : 'openai' profile = 'openai'
} }
} else { } else {
profile = requestedProfile profile = requestedProfile
} }
const env = await buildEnv(profile, persisted, options.goal) if (profile === 'ollama' && persisted?.profile !== 'ollama') {
resolvedOllamaModel ??= await resolveOllamaDefaultModel(options.goal)
if (!resolvedOllamaModel) {
console.error('No viable Ollama chat model was discovered. Pull a chat model first or save one with `bun run profile:init -- --provider ollama --model <model>`.')
process.exit(1)
}
}
const env = await buildLaunchEnv({
profile,
persisted,
goal: options.goal,
getOllamaChatBaseUrl,
resolveOllamaDefaultModel: async () => resolvedOllamaModel || 'llama3.1:8b',
})
if (options.fast) { if (options.fast) {
applyFastFlags(env) applyFastFlags(env)
} }

View File

@@ -5,11 +5,21 @@ import { resolve } from 'node:path'
import { import {
applyBenchmarkLatency, applyBenchmarkLatency,
getGoalDefaultOpenAIModel, getGoalDefaultOpenAIModel,
isViableOllamaChatModel,
normalizeRecommendationGoal, normalizeRecommendationGoal,
rankOllamaModels, rankOllamaModels,
selectRecommendedOllamaModel,
type BenchmarkedOllamaModel, type BenchmarkedOllamaModel,
type RecommendationGoal, type RecommendationGoal,
} from '../src/utils/providerRecommendation.ts' } from '../src/utils/providerRecommendation.ts'
import {
buildOllamaProfileEnv,
buildOpenAIProfileEnv,
createProfileFile,
sanitizeApiKey,
type ProfileFile,
type ProviderProfile,
} from '../src/utils/providerProfile.ts'
import { import {
benchmarkOllamaModel, benchmarkOllamaModel,
getOllamaChatBaseUrl, getOllamaChatBaseUrl,
@@ -17,18 +27,6 @@ import {
listOllamaModels, listOllamaModels,
} from './provider-discovery.ts' } from './provider-discovery.ts'
type ProviderProfile = 'openai' | 'ollama'
type ProfileFile = {
profile: ProviderProfile
env: {
OPENAI_BASE_URL?: string
OPENAI_MODEL?: string
OPENAI_API_KEY?: string
}
createdAt: string
}
type CliOptions = { type CliOptions = {
apply: boolean apply: boolean
benchmark: boolean benchmark: boolean
@@ -90,11 +88,6 @@ function parseOptions(argv: string[]): CliOptions {
return options return options
} }
function sanitizeApiKey(key: string | undefined): string | undefined {
if (!key || key === 'SUA_CHAVE') return undefined
return key
}
function printHumanSummary(payload: { function printHumanSummary(payload: {
goal: RecommendationGoal goal: RecommendationGoal
recommendedProfile: ProviderProfile recommendedProfile: ProviderProfile
@@ -138,29 +131,27 @@ async function maybeApplyProfile(
goal: RecommendationGoal, goal: RecommendationGoal,
baseUrl: string | null, baseUrl: string | null,
): Promise<boolean> { ): Promise<boolean> {
const env: ProfileFile['env'] = {} let env: ProfileFile['env'] | null
if (profile === 'ollama') { if (profile === 'ollama') {
env.OPENAI_BASE_URL = getOllamaChatBaseUrl(baseUrl ?? undefined) env = buildOllamaProfileEnv(model, {
env.OPENAI_MODEL = model baseUrl,
const key = sanitizeApiKey(process.env.OPENAI_API_KEY) getOllamaChatBaseUrl,
if (key) env.OPENAI_API_KEY = key })
} else { } else {
const key = sanitizeApiKey(process.env.OPENAI_API_KEY) env = buildOpenAIProfileEnv({
if (!key) { goal,
model: model || getGoalDefaultOpenAIModel(goal),
apiKey: process.env.OPENAI_API_KEY,
processEnv: process.env,
})
if (!env) {
console.error('Cannot apply an OpenAI profile without OPENAI_API_KEY.') console.error('Cannot apply an OpenAI profile without OPENAI_API_KEY.')
return false return false
} }
env.OPENAI_BASE_URL =
process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1'
env.OPENAI_MODEL = model || getGoalDefaultOpenAIModel(goal)
env.OPENAI_API_KEY = key
} }
const profileFile: ProfileFile = { const profileFile = createProfileFile(profile, env)
profile,
env,
createdAt: new Date().toISOString(),
}
writeFileSync( writeFileSync(
resolve(process.cwd(), '.openclaude-profile.json'), resolve(process.cwd(), '.openclaude-profile.json'),
@@ -180,7 +171,9 @@ async function main(): Promise<void> {
: [] : []
const heuristicRanked = rankOllamaModels(ollamaModels, options.goal) const heuristicRanked = rankOllamaModels(ollamaModels, options.goal)
const benchmarkInput = options.benchmark ? heuristicRanked.slice(0, 3) : [] const benchmarkInput = options.benchmark
? heuristicRanked.filter(isViableOllamaChatModel).slice(0, 3)
: []
const benchmarkResults: Record<string, number | null> = {} const benchmarkResults: Record<string, number | null> = {}
for (const model of benchmarkInput) { for (const model of benchmarkInput) {
@@ -197,7 +190,7 @@ async function main(): Promise<void> {
benchmarkMs: null, benchmarkMs: null,
})) }))
const recommendedOllama = rankedModels[0] ?? null const recommendedOllama = selectRecommendedOllamaModel(rankedModels)
const openAIConfigured = Boolean(sanitizeApiKey(process.env.OPENAI_API_KEY)) const openAIConfigured = Boolean(sanitizeApiKey(process.env.OPENAI_API_KEY))
let recommendedProfile: ProviderProfile let recommendedProfile: ProviderProfile

View File

@@ -0,0 +1,92 @@
import assert from 'node:assert/strict'
import test from 'node:test'
import {
buildLaunchEnv,
buildOllamaProfileEnv,
selectAutoProfile,
type ProfileFile,
} from './providerProfile.ts'
function profile(profile: ProfileFile['profile'], env: ProfileFile['env']): ProfileFile {
return {
profile,
env,
createdAt: '2026-04-01T00:00:00.000Z',
}
}
test('matching persisted ollama env is reused for ollama launch', async () => {
const env = await buildLaunchEnv({
profile: 'ollama',
persisted: profile('ollama', {
OPENAI_BASE_URL: 'http://127.0.0.1:11435/v1',
OPENAI_MODEL: 'mistral:7b-instruct',
}),
goal: 'balanced',
processEnv: {},
getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
resolveOllamaDefaultModel: async () => 'llama3.1:8b',
})
assert.equal(env.OPENAI_BASE_URL, 'http://127.0.0.1:11435/v1')
assert.equal(env.OPENAI_MODEL, 'mistral:7b-instruct')
})
test('ollama launch ignores mismatched persisted openai env and shell model fallback', async () => {
const env = await buildLaunchEnv({
profile: 'ollama',
persisted: profile('openai', {
OPENAI_BASE_URL: 'https://api.openai.com/v1',
OPENAI_MODEL: 'gpt-4o',
OPENAI_API_KEY: 'sk-persisted',
}),
goal: 'coding',
processEnv: {
OPENAI_BASE_URL: 'https://api.deepseek.com/v1',
OPENAI_MODEL: 'gpt-4o-mini',
},
getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
resolveOllamaDefaultModel: async () => 'qwen2.5-coder:7b',
})
assert.equal(env.OPENAI_BASE_URL, 'http://localhost:11434/v1')
assert.equal(env.OPENAI_MODEL, 'qwen2.5-coder:7b')
})
test('openai launch ignores mismatched persisted ollama env', async () => {
const env = await buildLaunchEnv({
profile: 'openai',
persisted: profile('ollama', {
OPENAI_BASE_URL: 'http://localhost:11434/v1',
OPENAI_MODEL: 'llama3.1:8b',
}),
goal: 'latency',
processEnv: {
OPENAI_API_KEY: 'sk-live',
},
getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
resolveOllamaDefaultModel: async () => 'llama3.1:8b',
})
assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1')
assert.equal(env.OPENAI_MODEL, 'gpt-4o-mini')
assert.equal(env.OPENAI_API_KEY, 'sk-live')
})
test('ollama profiles never persist openai api keys', () => {
const env = buildOllamaProfileEnv('llama3.1:8b', {
getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
})
assert.deepEqual(env, {
OPENAI_BASE_URL: 'http://localhost:11434/v1',
OPENAI_MODEL: 'llama3.1:8b',
})
assert.equal('OPENAI_API_KEY' in env, false)
})
test('auto profile falls back to openai when no viable ollama model exists', () => {
assert.equal(selectAutoProfile(null), 'openai')
assert.equal(selectAutoProfile('qwen2.5-coder:7b'), 'ollama')
})

View File

@@ -0,0 +1,123 @@
import {
getGoalDefaultOpenAIModel,
type RecommendationGoal,
} from './providerRecommendation.ts'
export type ProviderProfile = 'openai' | 'ollama'
export type ProfileEnv = {
OPENAI_BASE_URL?: string
OPENAI_MODEL?: string
OPENAI_API_KEY?: string
}
export type ProfileFile = {
profile: ProviderProfile
env: ProfileEnv
createdAt: string
}
export function sanitizeApiKey(
key: string | null | undefined,
): string | undefined {
if (!key || key === 'SUA_CHAVE') return undefined
return key
}
export function buildOllamaProfileEnv(
model: string,
options: {
baseUrl?: string | null
getOllamaChatBaseUrl: (baseUrl?: string) => string
},
): ProfileEnv {
return {
OPENAI_BASE_URL: options.getOllamaChatBaseUrl(options.baseUrl ?? undefined),
OPENAI_MODEL: model,
}
}
export function buildOpenAIProfileEnv(options: {
goal: RecommendationGoal
model?: string | null
apiKey?: string | null
processEnv?: NodeJS.ProcessEnv
}): ProfileEnv | null {
const processEnv = options.processEnv ?? process.env
const key = sanitizeApiKey(options.apiKey ?? processEnv.OPENAI_API_KEY)
if (!key) {
return null
}
return {
OPENAI_BASE_URL: processEnv.OPENAI_BASE_URL || 'https://api.openai.com/v1',
OPENAI_MODEL: options.model || getGoalDefaultOpenAIModel(options.goal),
OPENAI_API_KEY: key,
}
}
export function createProfileFile(
profile: ProviderProfile,
env: ProfileEnv,
): ProfileFile {
return {
profile,
env,
createdAt: new Date().toISOString(),
}
}
export function selectAutoProfile(
recommendedOllamaModel: string | null,
): ProviderProfile {
return recommendedOllamaModel ? 'ollama' : 'openai'
}
export async function buildLaunchEnv(options: {
profile: ProviderProfile
persisted: ProfileFile | null
goal: RecommendationGoal
processEnv?: NodeJS.ProcessEnv
getOllamaChatBaseUrl?: (baseUrl?: string) => string
resolveOllamaDefaultModel?: (goal: RecommendationGoal) => Promise<string>
}): Promise<NodeJS.ProcessEnv> {
const processEnv = options.processEnv ?? process.env
const persistedEnv =
options.persisted?.profile === options.profile
? options.persisted.env ?? {}
: {}
const env: NodeJS.ProcessEnv = {
...processEnv,
CLAUDE_CODE_USE_OPENAI: '1',
}
if (options.profile === 'ollama') {
const getOllamaBaseUrl =
options.getOllamaChatBaseUrl ?? (() => 'http://localhost:11434/v1')
const resolveOllamaModel =
options.resolveOllamaDefaultModel ?? (async () => 'llama3.1:8b')
env.OPENAI_BASE_URL = persistedEnv.OPENAI_BASE_URL || getOllamaBaseUrl()
env.OPENAI_MODEL =
persistedEnv.OPENAI_MODEL ||
(await resolveOllamaModel(options.goal))
if (!processEnv.OPENAI_API_KEY || processEnv.OPENAI_API_KEY === 'SUA_CHAVE') {
delete env.OPENAI_API_KEY
}
return env
}
env.OPENAI_BASE_URL =
processEnv.OPENAI_BASE_URL ||
persistedEnv.OPENAI_BASE_URL ||
'https://api.openai.com/v1'
env.OPENAI_MODEL =
processEnv.OPENAI_MODEL ||
persistedEnv.OPENAI_MODEL ||
getGoalDefaultOpenAIModel(options.goal)
env.OPENAI_API_KEY = processEnv.OPENAI_API_KEY || persistedEnv.OPENAI_API_KEY
return env
}

View File

@@ -83,6 +83,19 @@ test('non-chat embedding models are heavily demoted', () => {
assert.equal(ranked[0]?.name, 'mistral:7b-instruct') assert.equal(ranked[0]?.name, 'mistral:7b-instruct')
}) })
test('auto-pick ignores non-chat ollama models', () => {
const recommended = recommendOllamaModel(
[
model('nomic-embed-text', { parameterSize: '0.5B' }),
model('bge-reranker-v2', { parameterSize: '1.5B' }),
model('whisper-large-v3', { parameterSize: '1.6B' }),
],
'balanced',
)
assert.equal(recommended, null)
})
test('benchmark latency can reorder close recommendations', () => { test('benchmark latency can reorder close recommendations', () => {
const ranked = rankOllamaModels( const ranked = rankOllamaModels(
[ [
@@ -111,6 +124,69 @@ test('benchmark latency can reorder close recommendations', () => {
assert.equal(benchmarked[0]?.benchmarkMs, 350) assert.equal(benchmarked[0]?.benchmarkMs, 350)
}) })
test('unbenchmarked models stay behind benchmarked candidates', () => {
const ranked = rankOllamaModels(
[
model('phi4-mini:4b', {
parameterSize: '4B',
quantizationLevel: 'Q4_K_M',
}),
model('mistral:7b-instruct', {
parameterSize: '7B',
quantizationLevel: 'Q4_K_M',
}),
model('llama3.1:8b', {
parameterSize: '8B',
quantizationLevel: 'Q4_K_M',
}),
model('qwen2.5:14b', {
parameterSize: '14B',
quantizationLevel: 'Q4_K_M',
}),
],
'latency',
)
const benchmarked = applyBenchmarkLatency(
ranked,
{
'phi4-mini:4b': 2400,
'mistral:7b-instruct': 2200,
'llama3.1:8b': 2100,
},
'latency',
)
assert.ok(benchmarked.slice(0, 3).every(item => item.benchmarkMs !== null))
assert.equal(benchmarked[3]?.name, 'qwen2.5:14b')
assert.equal(benchmarked[3]?.benchmarkMs, null)
})
test('coding goal recognizes codestral and devstral families', () => {
const ranked = rankOllamaModels(
[
model('mistral:7b-instruct', {
parameterSize: '7B',
quantizationLevel: 'Q4_K_M',
}),
model('codestral:22b', {
parameterSize: '22B',
quantizationLevel: 'Q4_K_M',
}),
model('devstral:24b', {
parameterSize: '24B',
quantizationLevel: 'Q4_K_M',
}),
],
'coding',
)
assert.deepEqual(ranked.slice(0, 2).map(item => item.name), [
'devstral:24b',
'codestral:22b',
])
})
test('goal defaults choose sensible openai models', () => { test('goal defaults choose sensible openai models', () => {
assert.equal(getGoalDefaultOpenAIModel('latency'), 'gpt-4o-mini') assert.equal(getGoalDefaultOpenAIModel('latency'), 'gpt-4o-mini')
assert.equal(getGoalDefaultOpenAIModel('balanced'), 'gpt-4o') assert.equal(getGoalDefaultOpenAIModel('balanced'), 'gpt-4o')

View File

@@ -23,6 +23,8 @@ const CODING_HINTS = [
'coder', 'coder',
'codellama', 'codellama',
'codegemma', 'codegemma',
'codestral',
'devstral',
'starcoder', 'starcoder',
'deepseek-coder', 'deepseek-coder',
'qwen2.5-coder', 'qwen2.5-coder',
@@ -57,6 +59,16 @@ function includesAny(text: string, needles: string[]): boolean {
return needles.some(needle => text.includes(needle)) return needles.some(needle => text.includes(needle))
} }
export function isViableOllamaChatModel(model: OllamaModelDescriptor): boolean {
return !includesAny(modelHaystack(model), NON_CHAT_HINTS)
}
export function selectRecommendedOllamaModel<
T extends OllamaModelDescriptor,
>(models: T[]): T | null {
return models.find(isViableOllamaChatModel) ?? null
}
function inferParameterBillions(model: OllamaModelDescriptor): number | null { function inferParameterBillions(model: OllamaModelDescriptor): number | null {
const text = `${model.parameterSize ?? ''} ${model.name}`.toLowerCase() const text = `${model.parameterSize ?? ''} ${model.name}`.toLowerCase()
const match = text.match(/(\d+(?:\.\d+)?)\s*b\b/) const match = text.match(/(\d+(?:\.\d+)?)\s*b\b/)
@@ -265,7 +277,7 @@ export function recommendOllamaModel(
models: OllamaModelDescriptor[], models: OllamaModelDescriptor[],
goal: RecommendationGoal, goal: RecommendationGoal,
): RankedOllamaModel | null { ): RankedOllamaModel | null {
return rankOllamaModels(models, goal)[0] ?? null return selectRecommendedOllamaModel(rankOllamaModels(models, goal))
} }
export function applyBenchmarkLatency( export function applyBenchmarkLatency(
@@ -276,7 +288,7 @@ export function applyBenchmarkLatency(
const divisor = const divisor =
goal === 'latency' ? 120 : goal === 'coding' ? 500 : 240 goal === 'latency' ? 120 : goal === 'coding' ? 500 : 240
return models const scoredModels = models
.map(model => { .map(model => {
const latency = benchmarkMs[model.name] ?? null const latency = benchmarkMs[model.name] ?? null
const benchmarkPenalty = latency === null ? 0 : latency / divisor const benchmarkPenalty = latency === null ? 0 : latency / divisor
@@ -293,5 +305,13 @@ export function applyBenchmarkLatency(
score: Number((model.score - benchmarkPenalty).toFixed(2)), score: Number((model.score - benchmarkPenalty).toFixed(2)),
} }
}) })
.sort((a, b) => compareRankedModels(a, b, goal))
const benchmarkedModels = scoredModels.filter(model => model.benchmarkMs !== null)
if (benchmarkedModels.length === 0) {
return scoredModels.sort((a, b) => compareRankedModels(a, b, goal))
}
const unbenchmarkedModels = scoredModels.filter(model => model.benchmarkMs === null)
benchmarkedModels.sort((a, b) => compareRankedModels(a, b, goal))
return [...benchmarkedModels, ...unbenchmarkedModels]
} }