diff --git a/PLAYBOOK.md b/PLAYBOOK.md index dfdaec76..076c377f 100644 --- a/PLAYBOOK.md +++ b/PLAYBOOK.md @@ -183,10 +183,10 @@ Fix: bun run profile:init -- --provider ollama --model llama3.1:8b ``` -Or auto-pick a local profile: +Or pick a local Ollama profile automatically by goal: ```powershell -bun run profile:auto -- --goal balanced +bun run profile:init -- --provider ollama --goal balanced ``` ## 6.5 Placeholder key (`SUA_CHAVE`) error @@ -220,14 +220,16 @@ bun run profile:fast # llama3.2:3b bun run profile:code # qwen2.5-coder:7b ``` -Goal-based auto-selection: +Goal-based local auto-selection: ```powershell -bun run profile:auto -- --goal latency -bun run profile:auto -- --goal balanced -bun run profile:auto -- --goal coding +bun run profile:init -- --provider ollama --goal latency +bun run profile:init -- --provider ollama --goal balanced +bun run profile:init -- --provider ollama --goal coding ``` +`profile:auto` is a best-available provider picker, not a local-only command. Use `--provider ollama` when you want to stay on a local model. + ## 8. Practical Prompt Playbook (Copy/Paste) ## 8.1 Code understanding diff --git a/README.md b/README.md index 358bf95d..cef1c5cb 100644 --- a/README.md +++ b/README.md @@ -187,7 +187,7 @@ bun run doctor:runtime:json # persist a diagnostics report to reports/doctor-runtime.json bun run doctor:report -# full local hardening check (typecheck + smoke + runtime doctor) +# full local hardening check (smoke + runtime doctor) bun run hardening:check # strict hardening (includes project-wide typecheck) @@ -203,13 +203,13 @@ Notes: Use profile launchers to avoid repeated environment setup: ```bash -# one-time profile bootstrap (auto-detect ollama, otherwise openai) +# one-time profile bootstrap (best available provider) bun run profile:init # preview the best provider/model for your goal bun run profile:recommend -- --goal coding --benchmark -# auto-apply the best available profile for your goal +# auto-apply the best available provider/model for your goal bun run profile:auto -- --goal latency # openai bootstrap with explicit key @@ -234,6 +234,9 @@ bun run dev:ollama `profile:recommend` ranks installed Ollama models for `latency`, `balanced`, or `coding`, and `profile:auto` can persist the recommendation directly. If no profile exists yet, `dev:profile` now uses the same goal-aware defaults when picking the initial model. +Use `--provider ollama` when you want a local-only path. Auto mode falls back to OpenAI when no viable local chat model is installed. +Goal-based Ollama selection only recommends among models that are already installed and reachable from Ollama. + `dev:openai` and `dev:ollama` run `doctor:runtime` first and only launch the app if checks pass. For `dev:ollama`, make sure Ollama is running locally before launch. diff --git a/package.json b/package.json index ab44903f..6e28e367 100644 --- a/package.json +++ b/package.json @@ -27,7 +27,7 @@ "dev:fast": "bun run profile:fast && bun run dev:ollama:fast", "dev:code": "bun run profile:code && bun run dev:profile", "start": "node dist/cli.mjs", - "test:provider-recommendation": "node --test --experimental-strip-types src/utils/providerRecommendation.test.ts", + "test:provider-recommendation": "node --test --experimental-strip-types src/utils/providerRecommendation.test.ts src/utils/providerProfile.test.ts", "typecheck": "tsc --noEmit", "smoke": "bun run build && node dist/cli.mjs --version", "doctor:runtime": "bun run scripts/system-check.ts", diff --git a/scripts/provider-bootstrap.ts b/scripts/provider-bootstrap.ts index 31915b39..7fc3ff55 100644 --- a/scripts/provider-bootstrap.ts +++ b/scripts/provider-bootstrap.ts @@ -6,24 +6,20 @@ import { normalizeRecommendationGoal, recommendOllamaModel, } from '../src/utils/providerRecommendation.ts' +import { + buildOllamaProfileEnv, + buildOpenAIProfileEnv, + createProfileFile, + selectAutoProfile, + type ProfileFile, + type ProviderProfile, +} from '../src/utils/providerProfile.ts' import { getOllamaChatBaseUrl, hasLocalOllama, listOllamaModels, } from './provider-discovery.ts' -type ProviderProfile = 'openai' | 'ollama' - -type ProfileFile = { - profile: ProviderProfile - env: { - OPENAI_BASE_URL?: string - OPENAI_MODEL?: string - OPENAI_API_KEY?: string - } - createdAt: string -} - function parseArg(name: string): string | null { const args = process.argv.slice(2) const idx = args.indexOf(name) @@ -37,25 +33,16 @@ function parseProviderArg(): ProviderProfile | 'auto' { return 'auto' } -function sanitizeApiKey(key: string | null): string | undefined { - if (!key || key === 'SUA_CHAVE') return undefined - return key -} - async function resolveOllamaModel( argModel: string | null, argBaseUrl: string | null, goal: ReturnType, -): Promise { +) : Promise { if (argModel) return argModel const discovered = await listOllamaModels(argBaseUrl || undefined) const recommended = recommendOllamaModel(discovered, goal) - if (recommended) { - return recommended.name - } - - return process.env.OPENAI_MODEL || 'llama3.1:8b' + return recommended?.name ?? null } async function main(): Promise { @@ -68,37 +55,57 @@ async function main(): Promise { ) let selected: ProviderProfile + let resolvedOllamaModel: string | null = null if (provider === 'auto') { - selected = (await hasLocalOllama(argBaseUrl || undefined)) ? 'ollama' : 'openai' + if (await hasLocalOllama(argBaseUrl || undefined)) { + resolvedOllamaModel = await resolveOllamaModel(argModel, argBaseUrl, goal) + selected = selectAutoProfile(resolvedOllamaModel) + } else { + selected = 'openai' + } } else { selected = provider } - const env: ProfileFile['env'] = {} + let env: ProfileFile['env'] if (selected === 'ollama') { - env.OPENAI_BASE_URL = getOllamaChatBaseUrl(argBaseUrl || undefined) - env.OPENAI_MODEL = await resolveOllamaModel(argModel, argBaseUrl, goal) - const key = sanitizeApiKey(argApiKey || process.env.OPENAI_API_KEY || null) - if (key) env.OPENAI_API_KEY = key + resolvedOllamaModel ??= await resolveOllamaModel(argModel, argBaseUrl, goal) + if (!resolvedOllamaModel) { + console.error('No viable Ollama chat model was discovered. Pull a chat model first or pass --model explicitly.') + process.exit(1) + } + + env = buildOllamaProfileEnv( + resolvedOllamaModel, + { + baseUrl: argBaseUrl, + getOllamaChatBaseUrl, + }, + ) } else { - env.OPENAI_BASE_URL = argBaseUrl || process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1' - env.OPENAI_MODEL = - argModel || - process.env.OPENAI_MODEL || - getGoalDefaultOpenAIModel(goal) - const key = sanitizeApiKey(argApiKey || process.env.OPENAI_API_KEY || null) - if (!key) { + const builtEnv = buildOpenAIProfileEnv({ + goal, + model: + argModel || + process.env.OPENAI_MODEL || + getGoalDefaultOpenAIModel(goal), + apiKey: argApiKey || process.env.OPENAI_API_KEY || null, + processEnv: { + ...process.env, + OPENAI_BASE_URL: + argBaseUrl || process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1', + }, + }) + + if (!builtEnv) { console.error('OpenAI profile requires a real API key. Use --api-key or set OPENAI_API_KEY.') process.exit(1) } - env.OPENAI_API_KEY = key + + env = builtEnv } - const profile: ProfileFile = { - profile: selected, - env, - createdAt: new Date().toISOString(), - } + const profile = createProfileFile(selected, env) const outputPath = resolve(process.cwd(), '.openclaude-profile.json') writeFileSync(outputPath, JSON.stringify(profile, null, 2), 'utf8') diff --git a/scripts/provider-launch.ts b/scripts/provider-launch.ts index 26666072..d4e321c4 100644 --- a/scripts/provider-launch.ts +++ b/scripts/provider-launch.ts @@ -3,27 +3,21 @@ import { spawn } from 'node:child_process' import { existsSync, readFileSync } from 'node:fs' import { resolve } from 'node:path' import { - getGoalDefaultOpenAIModel, normalizeRecommendationGoal, recommendOllamaModel, } from '../src/utils/providerRecommendation.ts' +import { + buildLaunchEnv, + selectAutoProfile, + type ProfileFile, + type ProviderProfile, +} from '../src/utils/providerProfile.ts' import { getOllamaChatBaseUrl, hasLocalOllama, listOllamaModels, } from './provider-discovery.ts' -type ProviderProfile = 'openai' | 'ollama' - -type ProfileFile = { - profile: ProviderProfile - env?: { - OPENAI_BASE_URL?: string - OPENAI_MODEL?: string - OPENAI_API_KEY?: string - } -} - type LaunchOptions = { requestedProfile: ProviderProfile | 'auto' | null passthroughArgs: string[] @@ -93,10 +87,10 @@ function loadPersistedProfile(): ProfileFile | null { async function resolveOllamaDefaultModel( goal: ReturnType, -): Promise { +): Promise { const models = await listOllamaModels() const recommended = recommendOllamaModel(models, goal) - return recommended?.name || process.env.OPENAI_MODEL || 'llama3.1:8b' + return recommended?.name ?? null } function runCommand(command: string, env: NodeJS.ProcessEnv): Promise { @@ -113,41 +107,6 @@ function runCommand(command: string, env: NodeJS.ProcessEnv): Promise { }) } -async function buildEnv( - profile: ProviderProfile, - persisted: ProfileFile | null, - goal: ReturnType, -): Promise { - const persistedEnv = persisted?.env ?? {} - const env: NodeJS.ProcessEnv = { - ...process.env, - CLAUDE_CODE_USE_OPENAI: '1', - } - - if (profile === 'ollama') { - env.OPENAI_BASE_URL = - persistedEnv.OPENAI_BASE_URL || - process.env.OPENAI_BASE_URL || - getOllamaChatBaseUrl() - env.OPENAI_MODEL = - persistedEnv.OPENAI_MODEL || - process.env.OPENAI_MODEL || - await resolveOllamaDefaultModel(goal) - if (!process.env.OPENAI_API_KEY || process.env.OPENAI_API_KEY === 'SUA_CHAVE') { - delete env.OPENAI_API_KEY - } - return env - } - - env.OPENAI_BASE_URL = process.env.OPENAI_BASE_URL || persistedEnv.OPENAI_BASE_URL || 'https://api.openai.com/v1' - env.OPENAI_MODEL = - process.env.OPENAI_MODEL || - persistedEnv.OPENAI_MODEL || - getGoalDefaultOpenAIModel(goal) - env.OPENAI_API_KEY = process.env.OPENAI_API_KEY || persistedEnv.OPENAI_API_KEY - return env -} - function applyFastFlags(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv { env.CLAUDE_CODE_SIMPLE ??= '1' env.CLAUDE_CODE_DISABLE_THINKING ??= '1' @@ -181,18 +140,36 @@ async function main(): Promise { const persisted = loadPersistedProfile() let profile: ProviderProfile + let resolvedOllamaModel: string | null = null if (requestedProfile === 'auto') { if (persisted) { profile = persisted.profile + } else if (await hasLocalOllama()) { + resolvedOllamaModel = await resolveOllamaDefaultModel(options.goal) + profile = selectAutoProfile(resolvedOllamaModel) } else { - profile = (await hasLocalOllama()) ? 'ollama' : 'openai' + profile = 'openai' } } else { profile = requestedProfile } - const env = await buildEnv(profile, persisted, options.goal) + if (profile === 'ollama' && persisted?.profile !== 'ollama') { + resolvedOllamaModel ??= await resolveOllamaDefaultModel(options.goal) + if (!resolvedOllamaModel) { + console.error('No viable Ollama chat model was discovered. Pull a chat model first or save one with `bun run profile:init -- --provider ollama --model `.') + process.exit(1) + } + } + + const env = await buildLaunchEnv({ + profile, + persisted, + goal: options.goal, + getOllamaChatBaseUrl, + resolveOllamaDefaultModel: async () => resolvedOllamaModel || 'llama3.1:8b', + }) if (options.fast) { applyFastFlags(env) } diff --git a/scripts/provider-recommend.ts b/scripts/provider-recommend.ts index 8cfdc883..eca811e6 100644 --- a/scripts/provider-recommend.ts +++ b/scripts/provider-recommend.ts @@ -5,11 +5,21 @@ import { resolve } from 'node:path' import { applyBenchmarkLatency, getGoalDefaultOpenAIModel, + isViableOllamaChatModel, normalizeRecommendationGoal, rankOllamaModels, + selectRecommendedOllamaModel, type BenchmarkedOllamaModel, type RecommendationGoal, } from '../src/utils/providerRecommendation.ts' +import { + buildOllamaProfileEnv, + buildOpenAIProfileEnv, + createProfileFile, + sanitizeApiKey, + type ProfileFile, + type ProviderProfile, +} from '../src/utils/providerProfile.ts' import { benchmarkOllamaModel, getOllamaChatBaseUrl, @@ -17,18 +27,6 @@ import { listOllamaModels, } from './provider-discovery.ts' -type ProviderProfile = 'openai' | 'ollama' - -type ProfileFile = { - profile: ProviderProfile - env: { - OPENAI_BASE_URL?: string - OPENAI_MODEL?: string - OPENAI_API_KEY?: string - } - createdAt: string -} - type CliOptions = { apply: boolean benchmark: boolean @@ -90,11 +88,6 @@ function parseOptions(argv: string[]): CliOptions { return options } -function sanitizeApiKey(key: string | undefined): string | undefined { - if (!key || key === 'SUA_CHAVE') return undefined - return key -} - function printHumanSummary(payload: { goal: RecommendationGoal recommendedProfile: ProviderProfile @@ -138,29 +131,27 @@ async function maybeApplyProfile( goal: RecommendationGoal, baseUrl: string | null, ): Promise { - const env: ProfileFile['env'] = {} + let env: ProfileFile['env'] | null if (profile === 'ollama') { - env.OPENAI_BASE_URL = getOllamaChatBaseUrl(baseUrl ?? undefined) - env.OPENAI_MODEL = model - const key = sanitizeApiKey(process.env.OPENAI_API_KEY) - if (key) env.OPENAI_API_KEY = key + env = buildOllamaProfileEnv(model, { + baseUrl, + getOllamaChatBaseUrl, + }) } else { - const key = sanitizeApiKey(process.env.OPENAI_API_KEY) - if (!key) { + env = buildOpenAIProfileEnv({ + goal, + model: model || getGoalDefaultOpenAIModel(goal), + apiKey: process.env.OPENAI_API_KEY, + processEnv: process.env, + }) + + if (!env) { console.error('Cannot apply an OpenAI profile without OPENAI_API_KEY.') return false } - env.OPENAI_BASE_URL = - process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1' - env.OPENAI_MODEL = model || getGoalDefaultOpenAIModel(goal) - env.OPENAI_API_KEY = key } - const profileFile: ProfileFile = { - profile, - env, - createdAt: new Date().toISOString(), - } + const profileFile = createProfileFile(profile, env) writeFileSync( resolve(process.cwd(), '.openclaude-profile.json'), @@ -180,7 +171,9 @@ async function main(): Promise { : [] const heuristicRanked = rankOllamaModels(ollamaModels, options.goal) - const benchmarkInput = options.benchmark ? heuristicRanked.slice(0, 3) : [] + const benchmarkInput = options.benchmark + ? heuristicRanked.filter(isViableOllamaChatModel).slice(0, 3) + : [] const benchmarkResults: Record = {} for (const model of benchmarkInput) { @@ -197,7 +190,7 @@ async function main(): Promise { benchmarkMs: null, })) - const recommendedOllama = rankedModels[0] ?? null + const recommendedOllama = selectRecommendedOllamaModel(rankedModels) const openAIConfigured = Boolean(sanitizeApiKey(process.env.OPENAI_API_KEY)) let recommendedProfile: ProviderProfile diff --git a/src/utils/providerProfile.test.ts b/src/utils/providerProfile.test.ts new file mode 100644 index 00000000..f549584f --- /dev/null +++ b/src/utils/providerProfile.test.ts @@ -0,0 +1,92 @@ +import assert from 'node:assert/strict' +import test from 'node:test' + +import { + buildLaunchEnv, + buildOllamaProfileEnv, + selectAutoProfile, + type ProfileFile, +} from './providerProfile.ts' + +function profile(profile: ProfileFile['profile'], env: ProfileFile['env']): ProfileFile { + return { + profile, + env, + createdAt: '2026-04-01T00:00:00.000Z', + } +} + +test('matching persisted ollama env is reused for ollama launch', async () => { + const env = await buildLaunchEnv({ + profile: 'ollama', + persisted: profile('ollama', { + OPENAI_BASE_URL: 'http://127.0.0.1:11435/v1', + OPENAI_MODEL: 'mistral:7b-instruct', + }), + goal: 'balanced', + processEnv: {}, + getOllamaChatBaseUrl: () => 'http://localhost:11434/v1', + resolveOllamaDefaultModel: async () => 'llama3.1:8b', + }) + + assert.equal(env.OPENAI_BASE_URL, 'http://127.0.0.1:11435/v1') + assert.equal(env.OPENAI_MODEL, 'mistral:7b-instruct') +}) + +test('ollama launch ignores mismatched persisted openai env and shell model fallback', async () => { + const env = await buildLaunchEnv({ + profile: 'ollama', + persisted: profile('openai', { + OPENAI_BASE_URL: 'https://api.openai.com/v1', + OPENAI_MODEL: 'gpt-4o', + OPENAI_API_KEY: 'sk-persisted', + }), + goal: 'coding', + processEnv: { + OPENAI_BASE_URL: 'https://api.deepseek.com/v1', + OPENAI_MODEL: 'gpt-4o-mini', + }, + getOllamaChatBaseUrl: () => 'http://localhost:11434/v1', + resolveOllamaDefaultModel: async () => 'qwen2.5-coder:7b', + }) + + assert.equal(env.OPENAI_BASE_URL, 'http://localhost:11434/v1') + assert.equal(env.OPENAI_MODEL, 'qwen2.5-coder:7b') +}) + +test('openai launch ignores mismatched persisted ollama env', async () => { + const env = await buildLaunchEnv({ + profile: 'openai', + persisted: profile('ollama', { + OPENAI_BASE_URL: 'http://localhost:11434/v1', + OPENAI_MODEL: 'llama3.1:8b', + }), + goal: 'latency', + processEnv: { + OPENAI_API_KEY: 'sk-live', + }, + getOllamaChatBaseUrl: () => 'http://localhost:11434/v1', + resolveOllamaDefaultModel: async () => 'llama3.1:8b', + }) + + assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1') + assert.equal(env.OPENAI_MODEL, 'gpt-4o-mini') + assert.equal(env.OPENAI_API_KEY, 'sk-live') +}) + +test('ollama profiles never persist openai api keys', () => { + const env = buildOllamaProfileEnv('llama3.1:8b', { + getOllamaChatBaseUrl: () => 'http://localhost:11434/v1', + }) + + assert.deepEqual(env, { + OPENAI_BASE_URL: 'http://localhost:11434/v1', + OPENAI_MODEL: 'llama3.1:8b', + }) + assert.equal('OPENAI_API_KEY' in env, false) +}) + +test('auto profile falls back to openai when no viable ollama model exists', () => { + assert.equal(selectAutoProfile(null), 'openai') + assert.equal(selectAutoProfile('qwen2.5-coder:7b'), 'ollama') +}) diff --git a/src/utils/providerProfile.ts b/src/utils/providerProfile.ts new file mode 100644 index 00000000..2cd7188b --- /dev/null +++ b/src/utils/providerProfile.ts @@ -0,0 +1,123 @@ +import { + getGoalDefaultOpenAIModel, + type RecommendationGoal, +} from './providerRecommendation.ts' + +export type ProviderProfile = 'openai' | 'ollama' + +export type ProfileEnv = { + OPENAI_BASE_URL?: string + OPENAI_MODEL?: string + OPENAI_API_KEY?: string +} + +export type ProfileFile = { + profile: ProviderProfile + env: ProfileEnv + createdAt: string +} + +export function sanitizeApiKey( + key: string | null | undefined, +): string | undefined { + if (!key || key === 'SUA_CHAVE') return undefined + return key +} + +export function buildOllamaProfileEnv( + model: string, + options: { + baseUrl?: string | null + getOllamaChatBaseUrl: (baseUrl?: string) => string + }, +): ProfileEnv { + return { + OPENAI_BASE_URL: options.getOllamaChatBaseUrl(options.baseUrl ?? undefined), + OPENAI_MODEL: model, + } +} + +export function buildOpenAIProfileEnv(options: { + goal: RecommendationGoal + model?: string | null + apiKey?: string | null + processEnv?: NodeJS.ProcessEnv +}): ProfileEnv | null { + const processEnv = options.processEnv ?? process.env + const key = sanitizeApiKey(options.apiKey ?? processEnv.OPENAI_API_KEY) + if (!key) { + return null + } + + return { + OPENAI_BASE_URL: processEnv.OPENAI_BASE_URL || 'https://api.openai.com/v1', + OPENAI_MODEL: options.model || getGoalDefaultOpenAIModel(options.goal), + OPENAI_API_KEY: key, + } +} + +export function createProfileFile( + profile: ProviderProfile, + env: ProfileEnv, +): ProfileFile { + return { + profile, + env, + createdAt: new Date().toISOString(), + } +} + +export function selectAutoProfile( + recommendedOllamaModel: string | null, +): ProviderProfile { + return recommendedOllamaModel ? 'ollama' : 'openai' +} + +export async function buildLaunchEnv(options: { + profile: ProviderProfile + persisted: ProfileFile | null + goal: RecommendationGoal + processEnv?: NodeJS.ProcessEnv + getOllamaChatBaseUrl?: (baseUrl?: string) => string + resolveOllamaDefaultModel?: (goal: RecommendationGoal) => Promise +}): Promise { + const processEnv = options.processEnv ?? process.env + const persistedEnv = + options.persisted?.profile === options.profile + ? options.persisted.env ?? {} + : {} + + const env: NodeJS.ProcessEnv = { + ...processEnv, + CLAUDE_CODE_USE_OPENAI: '1', + } + + if (options.profile === 'ollama') { + const getOllamaBaseUrl = + options.getOllamaChatBaseUrl ?? (() => 'http://localhost:11434/v1') + const resolveOllamaModel = + options.resolveOllamaDefaultModel ?? (async () => 'llama3.1:8b') + + env.OPENAI_BASE_URL = persistedEnv.OPENAI_BASE_URL || getOllamaBaseUrl() + env.OPENAI_MODEL = + persistedEnv.OPENAI_MODEL || + (await resolveOllamaModel(options.goal)) + + if (!processEnv.OPENAI_API_KEY || processEnv.OPENAI_API_KEY === 'SUA_CHAVE') { + delete env.OPENAI_API_KEY + } + + return env + } + + env.OPENAI_BASE_URL = + processEnv.OPENAI_BASE_URL || + persistedEnv.OPENAI_BASE_URL || + 'https://api.openai.com/v1' + env.OPENAI_MODEL = + processEnv.OPENAI_MODEL || + persistedEnv.OPENAI_MODEL || + getGoalDefaultOpenAIModel(options.goal) + env.OPENAI_API_KEY = processEnv.OPENAI_API_KEY || persistedEnv.OPENAI_API_KEY + return env +} diff --git a/src/utils/providerRecommendation.test.ts b/src/utils/providerRecommendation.test.ts index 986e403f..f85777bf 100644 --- a/src/utils/providerRecommendation.test.ts +++ b/src/utils/providerRecommendation.test.ts @@ -83,6 +83,19 @@ test('non-chat embedding models are heavily demoted', () => { assert.equal(ranked[0]?.name, 'mistral:7b-instruct') }) +test('auto-pick ignores non-chat ollama models', () => { + const recommended = recommendOllamaModel( + [ + model('nomic-embed-text', { parameterSize: '0.5B' }), + model('bge-reranker-v2', { parameterSize: '1.5B' }), + model('whisper-large-v3', { parameterSize: '1.6B' }), + ], + 'balanced', + ) + + assert.equal(recommended, null) +}) + test('benchmark latency can reorder close recommendations', () => { const ranked = rankOllamaModels( [ @@ -111,6 +124,69 @@ test('benchmark latency can reorder close recommendations', () => { assert.equal(benchmarked[0]?.benchmarkMs, 350) }) +test('unbenchmarked models stay behind benchmarked candidates', () => { + const ranked = rankOllamaModels( + [ + model('phi4-mini:4b', { + parameterSize: '4B', + quantizationLevel: 'Q4_K_M', + }), + model('mistral:7b-instruct', { + parameterSize: '7B', + quantizationLevel: 'Q4_K_M', + }), + model('llama3.1:8b', { + parameterSize: '8B', + quantizationLevel: 'Q4_K_M', + }), + model('qwen2.5:14b', { + parameterSize: '14B', + quantizationLevel: 'Q4_K_M', + }), + ], + 'latency', + ) + + const benchmarked = applyBenchmarkLatency( + ranked, + { + 'phi4-mini:4b': 2400, + 'mistral:7b-instruct': 2200, + 'llama3.1:8b': 2100, + }, + 'latency', + ) + + assert.ok(benchmarked.slice(0, 3).every(item => item.benchmarkMs !== null)) + assert.equal(benchmarked[3]?.name, 'qwen2.5:14b') + assert.equal(benchmarked[3]?.benchmarkMs, null) +}) + +test('coding goal recognizes codestral and devstral families', () => { + const ranked = rankOllamaModels( + [ + model('mistral:7b-instruct', { + parameterSize: '7B', + quantizationLevel: 'Q4_K_M', + }), + model('codestral:22b', { + parameterSize: '22B', + quantizationLevel: 'Q4_K_M', + }), + model('devstral:24b', { + parameterSize: '24B', + quantizationLevel: 'Q4_K_M', + }), + ], + 'coding', + ) + + assert.deepEqual(ranked.slice(0, 2).map(item => item.name), [ + 'devstral:24b', + 'codestral:22b', + ]) +}) + test('goal defaults choose sensible openai models', () => { assert.equal(getGoalDefaultOpenAIModel('latency'), 'gpt-4o-mini') assert.equal(getGoalDefaultOpenAIModel('balanced'), 'gpt-4o') diff --git a/src/utils/providerRecommendation.ts b/src/utils/providerRecommendation.ts index e49c37aa..8bd1e2cd 100644 --- a/src/utils/providerRecommendation.ts +++ b/src/utils/providerRecommendation.ts @@ -23,6 +23,8 @@ const CODING_HINTS = [ 'coder', 'codellama', 'codegemma', + 'codestral', + 'devstral', 'starcoder', 'deepseek-coder', 'qwen2.5-coder', @@ -57,6 +59,16 @@ function includesAny(text: string, needles: string[]): boolean { return needles.some(needle => text.includes(needle)) } +export function isViableOllamaChatModel(model: OllamaModelDescriptor): boolean { + return !includesAny(modelHaystack(model), NON_CHAT_HINTS) +} + +export function selectRecommendedOllamaModel< + T extends OllamaModelDescriptor, +>(models: T[]): T | null { + return models.find(isViableOllamaChatModel) ?? null +} + function inferParameterBillions(model: OllamaModelDescriptor): number | null { const text = `${model.parameterSize ?? ''} ${model.name}`.toLowerCase() const match = text.match(/(\d+(?:\.\d+)?)\s*b\b/) @@ -265,7 +277,7 @@ export function recommendOllamaModel( models: OllamaModelDescriptor[], goal: RecommendationGoal, ): RankedOllamaModel | null { - return rankOllamaModels(models, goal)[0] ?? null + return selectRecommendedOllamaModel(rankOllamaModels(models, goal)) } export function applyBenchmarkLatency( @@ -276,7 +288,7 @@ export function applyBenchmarkLatency( const divisor = goal === 'latency' ? 120 : goal === 'coding' ? 500 : 240 - return models + const scoredModels = models .map(model => { const latency = benchmarkMs[model.name] ?? null const benchmarkPenalty = latency === null ? 0 : latency / divisor @@ -293,5 +305,13 @@ export function applyBenchmarkLatency( score: Number((model.score - benchmarkPenalty).toFixed(2)), } }) - .sort((a, b) => compareRankedModels(a, b, goal)) + + const benchmarkedModels = scoredModels.filter(model => model.benchmarkMs !== null) + if (benchmarkedModels.length === 0) { + return scoredModels.sort((a, b) => compareRankedModels(a, b, goal)) + } + + const unbenchmarkedModels = scoredModels.filter(model => model.benchmarkMs === null) + benchmarkedModels.sort((a, b) => compareRankedModels(a, b, goal)) + return [...benchmarkedModels, ...unbenchmarkedModels] }