fix: harden provider recommendation safety
This commit is contained in:
92
src/utils/providerProfile.test.ts
Normal file
92
src/utils/providerProfile.test.ts
Normal file
@@ -0,0 +1,92 @@
|
||||
import assert from 'node:assert/strict'
|
||||
import test from 'node:test'
|
||||
|
||||
import {
|
||||
buildLaunchEnv,
|
||||
buildOllamaProfileEnv,
|
||||
selectAutoProfile,
|
||||
type ProfileFile,
|
||||
} from './providerProfile.ts'
|
||||
|
||||
function profile(profile: ProfileFile['profile'], env: ProfileFile['env']): ProfileFile {
|
||||
return {
|
||||
profile,
|
||||
env,
|
||||
createdAt: '2026-04-01T00:00:00.000Z',
|
||||
}
|
||||
}
|
||||
|
||||
test('matching persisted ollama env is reused for ollama launch', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'ollama',
|
||||
persisted: profile('ollama', {
|
||||
OPENAI_BASE_URL: 'http://127.0.0.1:11435/v1',
|
||||
OPENAI_MODEL: 'mistral:7b-instruct',
|
||||
}),
|
||||
goal: 'balanced',
|
||||
processEnv: {},
|
||||
getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
|
||||
resolveOllamaDefaultModel: async () => 'llama3.1:8b',
|
||||
})
|
||||
|
||||
assert.equal(env.OPENAI_BASE_URL, 'http://127.0.0.1:11435/v1')
|
||||
assert.equal(env.OPENAI_MODEL, 'mistral:7b-instruct')
|
||||
})
|
||||
|
||||
test('ollama launch ignores mismatched persisted openai env and shell model fallback', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'ollama',
|
||||
persisted: profile('openai', {
|
||||
OPENAI_BASE_URL: 'https://api.openai.com/v1',
|
||||
OPENAI_MODEL: 'gpt-4o',
|
||||
OPENAI_API_KEY: 'sk-persisted',
|
||||
}),
|
||||
goal: 'coding',
|
||||
processEnv: {
|
||||
OPENAI_BASE_URL: 'https://api.deepseek.com/v1',
|
||||
OPENAI_MODEL: 'gpt-4o-mini',
|
||||
},
|
||||
getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
|
||||
resolveOllamaDefaultModel: async () => 'qwen2.5-coder:7b',
|
||||
})
|
||||
|
||||
assert.equal(env.OPENAI_BASE_URL, 'http://localhost:11434/v1')
|
||||
assert.equal(env.OPENAI_MODEL, 'qwen2.5-coder:7b')
|
||||
})
|
||||
|
||||
test('openai launch ignores mismatched persisted ollama env', async () => {
|
||||
const env = await buildLaunchEnv({
|
||||
profile: 'openai',
|
||||
persisted: profile('ollama', {
|
||||
OPENAI_BASE_URL: 'http://localhost:11434/v1',
|
||||
OPENAI_MODEL: 'llama3.1:8b',
|
||||
}),
|
||||
goal: 'latency',
|
||||
processEnv: {
|
||||
OPENAI_API_KEY: 'sk-live',
|
||||
},
|
||||
getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
|
||||
resolveOllamaDefaultModel: async () => 'llama3.1:8b',
|
||||
})
|
||||
|
||||
assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1')
|
||||
assert.equal(env.OPENAI_MODEL, 'gpt-4o-mini')
|
||||
assert.equal(env.OPENAI_API_KEY, 'sk-live')
|
||||
})
|
||||
|
||||
test('ollama profiles never persist openai api keys', () => {
|
||||
const env = buildOllamaProfileEnv('llama3.1:8b', {
|
||||
getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
|
||||
})
|
||||
|
||||
assert.deepEqual(env, {
|
||||
OPENAI_BASE_URL: 'http://localhost:11434/v1',
|
||||
OPENAI_MODEL: 'llama3.1:8b',
|
||||
})
|
||||
assert.equal('OPENAI_API_KEY' in env, false)
|
||||
})
|
||||
|
||||
test('auto profile falls back to openai when no viable ollama model exists', () => {
|
||||
assert.equal(selectAutoProfile(null), 'openai')
|
||||
assert.equal(selectAutoProfile('qwen2.5-coder:7b'), 'ollama')
|
||||
})
|
||||
123
src/utils/providerProfile.ts
Normal file
123
src/utils/providerProfile.ts
Normal file
@@ -0,0 +1,123 @@
|
||||
import {
|
||||
getGoalDefaultOpenAIModel,
|
||||
type RecommendationGoal,
|
||||
} from './providerRecommendation.ts'
|
||||
|
||||
export type ProviderProfile = 'openai' | 'ollama'
|
||||
|
||||
export type ProfileEnv = {
|
||||
OPENAI_BASE_URL?: string
|
||||
OPENAI_MODEL?: string
|
||||
OPENAI_API_KEY?: string
|
||||
}
|
||||
|
||||
export type ProfileFile = {
|
||||
profile: ProviderProfile
|
||||
env: ProfileEnv
|
||||
createdAt: string
|
||||
}
|
||||
|
||||
export function sanitizeApiKey(
|
||||
key: string | null | undefined,
|
||||
): string | undefined {
|
||||
if (!key || key === 'SUA_CHAVE') return undefined
|
||||
return key
|
||||
}
|
||||
|
||||
export function buildOllamaProfileEnv(
|
||||
model: string,
|
||||
options: {
|
||||
baseUrl?: string | null
|
||||
getOllamaChatBaseUrl: (baseUrl?: string) => string
|
||||
},
|
||||
): ProfileEnv {
|
||||
return {
|
||||
OPENAI_BASE_URL: options.getOllamaChatBaseUrl(options.baseUrl ?? undefined),
|
||||
OPENAI_MODEL: model,
|
||||
}
|
||||
}
|
||||
|
||||
export function buildOpenAIProfileEnv(options: {
|
||||
goal: RecommendationGoal
|
||||
model?: string | null
|
||||
apiKey?: string | null
|
||||
processEnv?: NodeJS.ProcessEnv
|
||||
}): ProfileEnv | null {
|
||||
const processEnv = options.processEnv ?? process.env
|
||||
const key = sanitizeApiKey(options.apiKey ?? processEnv.OPENAI_API_KEY)
|
||||
if (!key) {
|
||||
return null
|
||||
}
|
||||
|
||||
return {
|
||||
OPENAI_BASE_URL: processEnv.OPENAI_BASE_URL || 'https://api.openai.com/v1',
|
||||
OPENAI_MODEL: options.model || getGoalDefaultOpenAIModel(options.goal),
|
||||
OPENAI_API_KEY: key,
|
||||
}
|
||||
}
|
||||
|
||||
export function createProfileFile(
|
||||
profile: ProviderProfile,
|
||||
env: ProfileEnv,
|
||||
): ProfileFile {
|
||||
return {
|
||||
profile,
|
||||
env,
|
||||
createdAt: new Date().toISOString(),
|
||||
}
|
||||
}
|
||||
|
||||
export function selectAutoProfile(
|
||||
recommendedOllamaModel: string | null,
|
||||
): ProviderProfile {
|
||||
return recommendedOllamaModel ? 'ollama' : 'openai'
|
||||
}
|
||||
|
||||
export async function buildLaunchEnv(options: {
|
||||
profile: ProviderProfile
|
||||
persisted: ProfileFile | null
|
||||
goal: RecommendationGoal
|
||||
processEnv?: NodeJS.ProcessEnv
|
||||
getOllamaChatBaseUrl?: (baseUrl?: string) => string
|
||||
resolveOllamaDefaultModel?: (goal: RecommendationGoal) => Promise<string>
|
||||
}): Promise<NodeJS.ProcessEnv> {
|
||||
const processEnv = options.processEnv ?? process.env
|
||||
const persistedEnv =
|
||||
options.persisted?.profile === options.profile
|
||||
? options.persisted.env ?? {}
|
||||
: {}
|
||||
|
||||
const env: NodeJS.ProcessEnv = {
|
||||
...processEnv,
|
||||
CLAUDE_CODE_USE_OPENAI: '1',
|
||||
}
|
||||
|
||||
if (options.profile === 'ollama') {
|
||||
const getOllamaBaseUrl =
|
||||
options.getOllamaChatBaseUrl ?? (() => 'http://localhost:11434/v1')
|
||||
const resolveOllamaModel =
|
||||
options.resolveOllamaDefaultModel ?? (async () => 'llama3.1:8b')
|
||||
|
||||
env.OPENAI_BASE_URL = persistedEnv.OPENAI_BASE_URL || getOllamaBaseUrl()
|
||||
env.OPENAI_MODEL =
|
||||
persistedEnv.OPENAI_MODEL ||
|
||||
(await resolveOllamaModel(options.goal))
|
||||
|
||||
if (!processEnv.OPENAI_API_KEY || processEnv.OPENAI_API_KEY === 'SUA_CHAVE') {
|
||||
delete env.OPENAI_API_KEY
|
||||
}
|
||||
|
||||
return env
|
||||
}
|
||||
|
||||
env.OPENAI_BASE_URL =
|
||||
processEnv.OPENAI_BASE_URL ||
|
||||
persistedEnv.OPENAI_BASE_URL ||
|
||||
'https://api.openai.com/v1'
|
||||
env.OPENAI_MODEL =
|
||||
processEnv.OPENAI_MODEL ||
|
||||
persistedEnv.OPENAI_MODEL ||
|
||||
getGoalDefaultOpenAIModel(options.goal)
|
||||
env.OPENAI_API_KEY = processEnv.OPENAI_API_KEY || persistedEnv.OPENAI_API_KEY
|
||||
return env
|
||||
}
|
||||
@@ -83,6 +83,19 @@ test('non-chat embedding models are heavily demoted', () => {
|
||||
assert.equal(ranked[0]?.name, 'mistral:7b-instruct')
|
||||
})
|
||||
|
||||
test('auto-pick ignores non-chat ollama models', () => {
|
||||
const recommended = recommendOllamaModel(
|
||||
[
|
||||
model('nomic-embed-text', { parameterSize: '0.5B' }),
|
||||
model('bge-reranker-v2', { parameterSize: '1.5B' }),
|
||||
model('whisper-large-v3', { parameterSize: '1.6B' }),
|
||||
],
|
||||
'balanced',
|
||||
)
|
||||
|
||||
assert.equal(recommended, null)
|
||||
})
|
||||
|
||||
test('benchmark latency can reorder close recommendations', () => {
|
||||
const ranked = rankOllamaModels(
|
||||
[
|
||||
@@ -111,6 +124,69 @@ test('benchmark latency can reorder close recommendations', () => {
|
||||
assert.equal(benchmarked[0]?.benchmarkMs, 350)
|
||||
})
|
||||
|
||||
test('unbenchmarked models stay behind benchmarked candidates', () => {
|
||||
const ranked = rankOllamaModels(
|
||||
[
|
||||
model('phi4-mini:4b', {
|
||||
parameterSize: '4B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
model('mistral:7b-instruct', {
|
||||
parameterSize: '7B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
model('llama3.1:8b', {
|
||||
parameterSize: '8B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
model('qwen2.5:14b', {
|
||||
parameterSize: '14B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
],
|
||||
'latency',
|
||||
)
|
||||
|
||||
const benchmarked = applyBenchmarkLatency(
|
||||
ranked,
|
||||
{
|
||||
'phi4-mini:4b': 2400,
|
||||
'mistral:7b-instruct': 2200,
|
||||
'llama3.1:8b': 2100,
|
||||
},
|
||||
'latency',
|
||||
)
|
||||
|
||||
assert.ok(benchmarked.slice(0, 3).every(item => item.benchmarkMs !== null))
|
||||
assert.equal(benchmarked[3]?.name, 'qwen2.5:14b')
|
||||
assert.equal(benchmarked[3]?.benchmarkMs, null)
|
||||
})
|
||||
|
||||
test('coding goal recognizes codestral and devstral families', () => {
|
||||
const ranked = rankOllamaModels(
|
||||
[
|
||||
model('mistral:7b-instruct', {
|
||||
parameterSize: '7B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
model('codestral:22b', {
|
||||
parameterSize: '22B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
model('devstral:24b', {
|
||||
parameterSize: '24B',
|
||||
quantizationLevel: 'Q4_K_M',
|
||||
}),
|
||||
],
|
||||
'coding',
|
||||
)
|
||||
|
||||
assert.deepEqual(ranked.slice(0, 2).map(item => item.name), [
|
||||
'devstral:24b',
|
||||
'codestral:22b',
|
||||
])
|
||||
})
|
||||
|
||||
test('goal defaults choose sensible openai models', () => {
|
||||
assert.equal(getGoalDefaultOpenAIModel('latency'), 'gpt-4o-mini')
|
||||
assert.equal(getGoalDefaultOpenAIModel('balanced'), 'gpt-4o')
|
||||
|
||||
@@ -23,6 +23,8 @@ const CODING_HINTS = [
|
||||
'coder',
|
||||
'codellama',
|
||||
'codegemma',
|
||||
'codestral',
|
||||
'devstral',
|
||||
'starcoder',
|
||||
'deepseek-coder',
|
||||
'qwen2.5-coder',
|
||||
@@ -57,6 +59,16 @@ function includesAny(text: string, needles: string[]): boolean {
|
||||
return needles.some(needle => text.includes(needle))
|
||||
}
|
||||
|
||||
export function isViableOllamaChatModel(model: OllamaModelDescriptor): boolean {
|
||||
return !includesAny(modelHaystack(model), NON_CHAT_HINTS)
|
||||
}
|
||||
|
||||
export function selectRecommendedOllamaModel<
|
||||
T extends OllamaModelDescriptor,
|
||||
>(models: T[]): T | null {
|
||||
return models.find(isViableOllamaChatModel) ?? null
|
||||
}
|
||||
|
||||
function inferParameterBillions(model: OllamaModelDescriptor): number | null {
|
||||
const text = `${model.parameterSize ?? ''} ${model.name}`.toLowerCase()
|
||||
const match = text.match(/(\d+(?:\.\d+)?)\s*b\b/)
|
||||
@@ -265,7 +277,7 @@ export function recommendOllamaModel(
|
||||
models: OllamaModelDescriptor[],
|
||||
goal: RecommendationGoal,
|
||||
): RankedOllamaModel | null {
|
||||
return rankOllamaModels(models, goal)[0] ?? null
|
||||
return selectRecommendedOllamaModel(rankOllamaModels(models, goal))
|
||||
}
|
||||
|
||||
export function applyBenchmarkLatency(
|
||||
@@ -276,7 +288,7 @@ export function applyBenchmarkLatency(
|
||||
const divisor =
|
||||
goal === 'latency' ? 120 : goal === 'coding' ? 500 : 240
|
||||
|
||||
return models
|
||||
const scoredModels = models
|
||||
.map(model => {
|
||||
const latency = benchmarkMs[model.name] ?? null
|
||||
const benchmarkPenalty = latency === null ? 0 : latency / divisor
|
||||
@@ -293,5 +305,13 @@ export function applyBenchmarkLatency(
|
||||
score: Number((model.score - benchmarkPenalty).toFixed(2)),
|
||||
}
|
||||
})
|
||||
.sort((a, b) => compareRankedModels(a, b, goal))
|
||||
|
||||
const benchmarkedModels = scoredModels.filter(model => model.benchmarkMs !== null)
|
||||
if (benchmarkedModels.length === 0) {
|
||||
return scoredModels.sort((a, b) => compareRankedModels(a, b, goal))
|
||||
}
|
||||
|
||||
const unbenchmarkedModels = scoredModels.filter(model => model.benchmarkMs === null)
|
||||
benchmarkedModels.sort((a, b) => compareRankedModels(a, b, goal))
|
||||
return [...benchmarkedModels, ...unbenchmarkedModels]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user