fix: harden provider recommendation safety

2026-04-01 11:55:24 +05:30
parent 174eb8ad3b
commit 8fe03cba57
10 changed files with 434 additions and 141 deletions
--- a/PLAYBOOK.md
+++ b/PLAYBOOK.md
@@ -183,10 +183,10 @@ Fix:
 bun run profile:init -- --provider ollama --model llama3.1:8b
 ```
-Or auto-pick a local profile:
+Or pick a local Ollama profile automatically by goal:
 ```powershell
-bun run profile:auto -- --goal balanced
+bun run profile:init -- --provider ollama --goal balanced
 ```
 ## 6.5 Placeholder key (`SUA_CHAVE`) error
@@ -220,14 +220,16 @@ bun run profile:fast   # llama3.2:3b
 bun run profile:code   # qwen2.5-coder:7b
 ```
-Goal-based auto-selection:
+Goal-based local auto-selection:
 ```powershell
-bun run profile:auto -- --goal latency
+bun run profile:init -- --provider ollama --goal latency
-bun run profile:auto -- --goal balanced
+bun run profile:init -- --provider ollama --goal balanced
-bun run profile:auto -- --goal coding
+bun run profile:init -- --provider ollama --goal coding
 ```
 `profile:auto` is a best-available provider picker, not a local-only command. Use `--provider ollama` when you want to stay on a local model.
 ## 8. Practical Prompt Playbook (Copy/Paste)
 ## 8.1 Code understanding
--- a/README.md
+++ b/README.md
@@ -187,7 +187,7 @@ bun run doctor:runtime:json
 # persist a diagnostics report to reports/doctor-runtime.json
 bun run doctor:report
-# full local hardening check (typecheck + smoke + runtime doctor)
+# full local hardening check (smoke + runtime doctor)
 bun run hardening:check
 # strict hardening (includes project-wide typecheck)
@@ -203,13 +203,13 @@ Notes:
 Use profile launchers to avoid repeated environment setup:
 ```bash
-# one-time profile bootstrap (auto-detect ollama, otherwise openai)
+# one-time profile bootstrap (best available provider)
 bun run profile:init
 # preview the best provider/model for your goal
 bun run profile:recommend -- --goal coding --benchmark
-# auto-apply the best available profile for your goal
+# auto-apply the best available provider/model for your goal
 bun run profile:auto -- --goal latency
 # openai bootstrap with explicit key
@@ -234,6 +234,9 @@ bun run dev:ollama
 `profile:recommend` ranks installed Ollama models for `latency`, `balanced`, or `coding`, and `profile:auto` can persist the recommendation directly.
 If no profile exists yet, `dev:profile` now uses the same goal-aware defaults when picking the initial model.
 Use `--provider ollama` when you want a local-only path. Auto mode falls back to OpenAI when no viable local chat model is installed.
 Goal-based Ollama selection only recommends among models that are already installed and reachable from Ollama.
 `dev:openai` and `dev:ollama` run `doctor:runtime` first and only launch the app if checks pass.
 For `dev:ollama`, make sure Ollama is running locally before launch.
--- a/package.json
+++ b/package.json
@@ -27,7 +27,7 @@
    "dev:fast": "bun run profile:fast && bun run dev:ollama:fast",
    "dev:code": "bun run profile:code && bun run dev:profile",
    "start": "node dist/cli.mjs",
-    "test:provider-recommendation": "node --test --experimental-strip-types src/utils/providerRecommendation.test.ts",
+    "test:provider-recommendation": "node --test --experimental-strip-types src/utils/providerRecommendation.test.ts src/utils/providerProfile.test.ts",
    "typecheck": "tsc --noEmit",
    "smoke": "bun run build && node dist/cli.mjs --version",
    "doctor:runtime": "bun run scripts/system-check.ts",
--- a/scripts/provider-bootstrap.ts
+++ b/scripts/provider-bootstrap.ts
@@ -6,24 +6,20 @@ import {
  normalizeRecommendationGoal,
  recommendOllamaModel,
 } from '../src/utils/providerRecommendation.ts'
 import {
  buildOllamaProfileEnv,
  buildOpenAIProfileEnv,
  createProfileFile,
  selectAutoProfile,
  type ProfileFile,
  type ProviderProfile,
 } from '../src/utils/providerProfile.ts'
 import {
  getOllamaChatBaseUrl,
  hasLocalOllama,
  listOllamaModels,
 } from './provider-discovery.ts'
 type ProviderProfile = 'openai' | 'ollama'
 type ProfileFile = {
  profile: ProviderProfile
  env: {
    OPENAI_BASE_URL?: string
    OPENAI_MODEL?: string
    OPENAI_API_KEY?: string
  }
  createdAt: string
 }
 function parseArg(name: string): string | null {
  const args = process.argv.slice(2)
  const idx = args.indexOf(name)
@@ -37,25 +33,16 @@ function parseProviderArg(): ProviderProfile | 'auto' {
  return 'auto'
 }
 function sanitizeApiKey(key: string | null): string | undefined {
  if (!key || key === 'SUA_CHAVE') return undefined
  return key
 }
 async function resolveOllamaModel(
  argModel: string | null,
  argBaseUrl: string | null,
  goal: ReturnType<typeof normalizeRecommendationGoal>,
-): Promise<string> {
+) : Promise<string | null> {
  if (argModel) return argModel
  const discovered = await listOllamaModels(argBaseUrl || undefined)
  const recommended = recommendOllamaModel(discovered, goal)
-  if (recommended) {
+  return recommended?.name ?? null
    return recommended.name
  }
  return process.env.OPENAI_MODEL || 'llama3.1:8b'
 }
 async function main(): Promise<void> {
@@ -68,37 +55,57 @@ async function main(): Promise<void> {
  )
  let selected: ProviderProfile
  let resolvedOllamaModel: string | null = null
  if (provider === 'auto') {
-    selected = (await hasLocalOllama(argBaseUrl || undefined)) ? 'ollama' : 'openai'
+    if (await hasLocalOllama(argBaseUrl || undefined)) {
      resolvedOllamaModel = await resolveOllamaModel(argModel, argBaseUrl, goal)
      selected = selectAutoProfile(resolvedOllamaModel)
    } else {
      selected = 'openai'
    }
  } else {
    selected = provider
  }
-  const env: ProfileFile['env'] = {}
+  let env: ProfileFile['env']
  if (selected === 'ollama') {
-    env.OPENAI_BASE_URL = getOllamaChatBaseUrl(argBaseUrl || undefined)
+    resolvedOllamaModel ??= await resolveOllamaModel(argModel, argBaseUrl, goal)
-    env.OPENAI_MODEL = await resolveOllamaModel(argModel, argBaseUrl, goal)
+    if (!resolvedOllamaModel) {
-    const key = sanitizeApiKey(argApiKey || process.env.OPENAI_API_KEY || null)
+      console.error('No viable Ollama chat model was discovered. Pull a chat model first or pass --model explicitly.')
-    if (key) env.OPENAI_API_KEY = key
+      process.exit(1)
    }
    env = buildOllamaProfileEnv(
      resolvedOllamaModel,
      {
        baseUrl: argBaseUrl,
        getOllamaChatBaseUrl,
      },
    )
  } else {
-    env.OPENAI_BASE_URL = argBaseUrl || process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1'
+    const builtEnv = buildOpenAIProfileEnv({
-    env.OPENAI_MODEL =
+      goal,
      model:
        argModel ||
        process.env.OPENAI_MODEL ||
-      getGoalDefaultOpenAIModel(goal)
+        getGoalDefaultOpenAIModel(goal),
-    const key = sanitizeApiKey(argApiKey || process.env.OPENAI_API_KEY || null)
+      apiKey: argApiKey || process.env.OPENAI_API_KEY || null,
-    if (!key) {
+      processEnv: {
        ...process.env,
        OPENAI_BASE_URL:
          argBaseUrl || process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1',
      },
    })
    if (!builtEnv) {
      console.error('OpenAI profile requires a real API key. Use --api-key or set OPENAI_API_KEY.')
      process.exit(1)
    }
-    env.OPENAI_API_KEY = key
+
    env = builtEnv
  }
-  const profile: ProfileFile = {
+  const profile = createProfileFile(selected, env)
    profile: selected,
    env,
    createdAt: new Date().toISOString(),
  }
  const outputPath = resolve(process.cwd(), '.openclaude-profile.json')
  writeFileSync(outputPath, JSON.stringify(profile, null, 2), 'utf8')
--- a/scripts/provider-launch.ts
+++ b/scripts/provider-launch.ts
@@ -3,27 +3,21 @@ import { spawn } from 'node:child_process'
 import { existsSync, readFileSync } from 'node:fs'
 import { resolve } from 'node:path'
 import {
  getGoalDefaultOpenAIModel,
  normalizeRecommendationGoal,
  recommendOllamaModel,
 } from '../src/utils/providerRecommendation.ts'
 import {
  buildLaunchEnv,
  selectAutoProfile,
  type ProfileFile,
  type ProviderProfile,
 } from '../src/utils/providerProfile.ts'
 import {
  getOllamaChatBaseUrl,
  hasLocalOllama,
  listOllamaModels,
 } from './provider-discovery.ts'
 type ProviderProfile = 'openai' | 'ollama'
 type ProfileFile = {
  profile: ProviderProfile
  env?: {
    OPENAI_BASE_URL?: string
    OPENAI_MODEL?: string
    OPENAI_API_KEY?: string
  }
 }
 type LaunchOptions = {
  requestedProfile: ProviderProfile | 'auto' | null
  passthroughArgs: string[]
@@ -93,10 +87,10 @@ function loadPersistedProfile(): ProfileFile | null {
 async function resolveOllamaDefaultModel(
  goal: ReturnType<typeof normalizeRecommendationGoal>,
-): Promise<string> {
+): Promise<string | null> {
  const models = await listOllamaModels()
  const recommended = recommendOllamaModel(models, goal)
-  return recommended?.name || process.env.OPENAI_MODEL || 'llama3.1:8b'
+  return recommended?.name ?? null
 }
 function runCommand(command: string, env: NodeJS.ProcessEnv): Promise<number> {
@@ -113,41 +107,6 @@ function runCommand(command: string, env: NodeJS.ProcessEnv): Promise<number> {
  })
 }
 async function buildEnv(
  profile: ProviderProfile,
  persisted: ProfileFile | null,
  goal: ReturnType<typeof normalizeRecommendationGoal>,
 ): Promise<NodeJS.ProcessEnv> {
  const persistedEnv = persisted?.env ?? {}
  const env: NodeJS.ProcessEnv = {
    ...process.env,
    CLAUDE_CODE_USE_OPENAI: '1',
  }
  if (profile === 'ollama') {
    env.OPENAI_BASE_URL =
      persistedEnv.OPENAI_BASE_URL ||
      process.env.OPENAI_BASE_URL ||
      getOllamaChatBaseUrl()
    env.OPENAI_MODEL =
      persistedEnv.OPENAI_MODEL ||
      process.env.OPENAI_MODEL ||
      await resolveOllamaDefaultModel(goal)
    if (!process.env.OPENAI_API_KEY || process.env.OPENAI_API_KEY === 'SUA_CHAVE') {
      delete env.OPENAI_API_KEY
    }
    return env
  }
  env.OPENAI_BASE_URL = process.env.OPENAI_BASE_URL || persistedEnv.OPENAI_BASE_URL || 'https://api.openai.com/v1'
  env.OPENAI_MODEL =
    process.env.OPENAI_MODEL ||
    persistedEnv.OPENAI_MODEL ||
    getGoalDefaultOpenAIModel(goal)
  env.OPENAI_API_KEY = process.env.OPENAI_API_KEY || persistedEnv.OPENAI_API_KEY
  return env
 }
 function applyFastFlags(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv {
  env.CLAUDE_CODE_SIMPLE ??= '1'
  env.CLAUDE_CODE_DISABLE_THINKING ??= '1'
@@ -181,18 +140,36 @@ async function main(): Promise<void> {
  const persisted = loadPersistedProfile()
  let profile: ProviderProfile
  let resolvedOllamaModel: string | null = null
  if (requestedProfile === 'auto') {
    if (persisted) {
      profile = persisted.profile
    } else if (await hasLocalOllama()) {
      resolvedOllamaModel = await resolveOllamaDefaultModel(options.goal)
      profile = selectAutoProfile(resolvedOllamaModel)
    } else {
-      profile = (await hasLocalOllama()) ? 'ollama' : 'openai'
+      profile = 'openai'
    }
  } else {
    profile = requestedProfile
  }
-  const env = await buildEnv(profile, persisted, options.goal)
+  if (profile === 'ollama' && persisted?.profile !== 'ollama') {
    resolvedOllamaModel ??= await resolveOllamaDefaultModel(options.goal)
    if (!resolvedOllamaModel) {
      console.error('No viable Ollama chat model was discovered. Pull a chat model first or save one with `bun run profile:init -- --provider ollama --model <model>`.')
      process.exit(1)
    }
  }
  const env = await buildLaunchEnv({
    profile,
    persisted,
    goal: options.goal,
    getOllamaChatBaseUrl,
    resolveOllamaDefaultModel: async () => resolvedOllamaModel || 'llama3.1:8b',
  })
  if (options.fast) {
    applyFastFlags(env)
  }
--- a/scripts/provider-recommend.ts
+++ b/scripts/provider-recommend.ts
@@ -5,11 +5,21 @@ import { resolve } from 'node:path'
 import {
  applyBenchmarkLatency,
  getGoalDefaultOpenAIModel,
  isViableOllamaChatModel,
  normalizeRecommendationGoal,
  rankOllamaModels,
  selectRecommendedOllamaModel,
  type BenchmarkedOllamaModel,
  type RecommendationGoal,
 } from '../src/utils/providerRecommendation.ts'
 import {
  buildOllamaProfileEnv,
  buildOpenAIProfileEnv,
  createProfileFile,
  sanitizeApiKey,
  type ProfileFile,
  type ProviderProfile,
 } from '../src/utils/providerProfile.ts'
 import {
  benchmarkOllamaModel,
  getOllamaChatBaseUrl,
@@ -17,18 +27,6 @@ import {
  listOllamaModels,
 } from './provider-discovery.ts'
 type ProviderProfile = 'openai' | 'ollama'
 type ProfileFile = {
  profile: ProviderProfile
  env: {
    OPENAI_BASE_URL?: string
    OPENAI_MODEL?: string
    OPENAI_API_KEY?: string
  }
  createdAt: string
 }
 type CliOptions = {
  apply: boolean
  benchmark: boolean
@@ -90,11 +88,6 @@ function parseOptions(argv: string[]): CliOptions {
  return options
 }
 function sanitizeApiKey(key: string | undefined): string | undefined {
  if (!key || key === 'SUA_CHAVE') return undefined
  return key
 }
 function printHumanSummary(payload: {
  goal: RecommendationGoal
  recommendedProfile: ProviderProfile
@@ -138,29 +131,27 @@ async function maybeApplyProfile(
  goal: RecommendationGoal,
  baseUrl: string | null,
 ): Promise<boolean> {
-  const env: ProfileFile['env'] = {}
+  let env: ProfileFile['env'] | null
  if (profile === 'ollama') {
-    env.OPENAI_BASE_URL = getOllamaChatBaseUrl(baseUrl ?? undefined)
+    env = buildOllamaProfileEnv(model, {
-    env.OPENAI_MODEL = model
+      baseUrl,
-    const key = sanitizeApiKey(process.env.OPENAI_API_KEY)
+      getOllamaChatBaseUrl,
-    if (key) env.OPENAI_API_KEY = key
+    })
  } else {
-    const key = sanitizeApiKey(process.env.OPENAI_API_KEY)
+    env = buildOpenAIProfileEnv({
-    if (!key) {
+      goal,
      model: model || getGoalDefaultOpenAIModel(goal),
      apiKey: process.env.OPENAI_API_KEY,
      processEnv: process.env,
    })
    if (!env) {
      console.error('Cannot apply an OpenAI profile without OPENAI_API_KEY.')
      return false
    }
    env.OPENAI_BASE_URL =
      process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1'
    env.OPENAI_MODEL = model || getGoalDefaultOpenAIModel(goal)
    env.OPENAI_API_KEY = key
  }
-  const profileFile: ProfileFile = {
+  const profileFile = createProfileFile(profile, env)
    profile,
    env,
    createdAt: new Date().toISOString(),
  }
  writeFileSync(
    resolve(process.cwd(), '.openclaude-profile.json'),
@@ -180,7 +171,9 @@ async function main(): Promise<void> {
    : []
  const heuristicRanked = rankOllamaModels(ollamaModels, options.goal)
-  const benchmarkInput = options.benchmark ? heuristicRanked.slice(0, 3) : []
+  const benchmarkInput = options.benchmark
    ? heuristicRanked.filter(isViableOllamaChatModel).slice(0, 3)
    : []
  const benchmarkResults: Record<string, number | null> = {}
  for (const model of benchmarkInput) {
@@ -197,7 +190,7 @@ async function main(): Promise<void> {
        benchmarkMs: null,
      }))
-  const recommendedOllama = rankedModels[0] ?? null
+  const recommendedOllama = selectRecommendedOllamaModel(rankedModels)
  const openAIConfigured = Boolean(sanitizeApiKey(process.env.OPENAI_API_KEY))
  let recommendedProfile: ProviderProfile
--- a/src/utils/providerProfile.test.ts
+++ b/src/utils/providerProfile.test.ts
@@ -0,0 +1,92 @@
 import assert from 'node:assert/strict'
 import test from 'node:test'
 import {
  buildLaunchEnv,
  buildOllamaProfileEnv,
  selectAutoProfile,
  type ProfileFile,
 } from './providerProfile.ts'
 function profile(profile: ProfileFile['profile'], env: ProfileFile['env']): ProfileFile {
  return {
    profile,
    env,
    createdAt: '2026-04-01T00:00:00.000Z',
  }
 }
 test('matching persisted ollama env is reused for ollama launch', async () => {
  const env = await buildLaunchEnv({
    profile: 'ollama',
    persisted: profile('ollama', {
      OPENAI_BASE_URL: 'http://127.0.0.1:11435/v1',
      OPENAI_MODEL: 'mistral:7b-instruct',
    }),
    goal: 'balanced',
    processEnv: {},
    getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
    resolveOllamaDefaultModel: async () => 'llama3.1:8b',
  })
  assert.equal(env.OPENAI_BASE_URL, 'http://127.0.0.1:11435/v1')
  assert.equal(env.OPENAI_MODEL, 'mistral:7b-instruct')
 })
 test('ollama launch ignores mismatched persisted openai env and shell model fallback', async () => {
  const env = await buildLaunchEnv({
    profile: 'ollama',
    persisted: profile('openai', {
      OPENAI_BASE_URL: 'https://api.openai.com/v1',
      OPENAI_MODEL: 'gpt-4o',
      OPENAI_API_KEY: 'sk-persisted',
    }),
    goal: 'coding',
    processEnv: {
      OPENAI_BASE_URL: 'https://api.deepseek.com/v1',
      OPENAI_MODEL: 'gpt-4o-mini',
    },
    getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
    resolveOllamaDefaultModel: async () => 'qwen2.5-coder:7b',
  })
  assert.equal(env.OPENAI_BASE_URL, 'http://localhost:11434/v1')
  assert.equal(env.OPENAI_MODEL, 'qwen2.5-coder:7b')
 })
 test('openai launch ignores mismatched persisted ollama env', async () => {
  const env = await buildLaunchEnv({
    profile: 'openai',
    persisted: profile('ollama', {
      OPENAI_BASE_URL: 'http://localhost:11434/v1',
      OPENAI_MODEL: 'llama3.1:8b',
    }),
    goal: 'latency',
    processEnv: {
      OPENAI_API_KEY: 'sk-live',
    },
    getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
    resolveOllamaDefaultModel: async () => 'llama3.1:8b',
  })
  assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1')
  assert.equal(env.OPENAI_MODEL, 'gpt-4o-mini')
  assert.equal(env.OPENAI_API_KEY, 'sk-live')
 })
 test('ollama profiles never persist openai api keys', () => {
  const env = buildOllamaProfileEnv('llama3.1:8b', {
    getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
  })
  assert.deepEqual(env, {
    OPENAI_BASE_URL: 'http://localhost:11434/v1',
    OPENAI_MODEL: 'llama3.1:8b',
  })
  assert.equal('OPENAI_API_KEY' in env, false)
 })
 test('auto profile falls back to openai when no viable ollama model exists', () => {
  assert.equal(selectAutoProfile(null), 'openai')
  assert.equal(selectAutoProfile('qwen2.5-coder:7b'), 'ollama')
 })
--- a/src/utils/providerProfile.ts
+++ b/src/utils/providerProfile.ts
@@ -0,0 +1,123 @@
 import {
  getGoalDefaultOpenAIModel,
  type RecommendationGoal,
 } from './providerRecommendation.ts'
 export type ProviderProfile = 'openai' | 'ollama'
 export type ProfileEnv = {
  OPENAI_BASE_URL?: string
  OPENAI_MODEL?: string
  OPENAI_API_KEY?: string
 }
 export type ProfileFile = {
  profile: ProviderProfile
  env: ProfileEnv
  createdAt: string
 }
 export function sanitizeApiKey(
  key: string | null | undefined,
 ): string | undefined {
  if (!key || key === 'SUA_CHAVE') return undefined
  return key
 }
 export function buildOllamaProfileEnv(
  model: string,
  options: {
    baseUrl?: string | null
    getOllamaChatBaseUrl: (baseUrl?: string) => string
  },
 ): ProfileEnv {
  return {
    OPENAI_BASE_URL: options.getOllamaChatBaseUrl(options.baseUrl ?? undefined),
    OPENAI_MODEL: model,
  }
 }
 export function buildOpenAIProfileEnv(options: {
  goal: RecommendationGoal
  model?: string | null
  apiKey?: string | null
  processEnv?: NodeJS.ProcessEnv
 }): ProfileEnv | null {
  const processEnv = options.processEnv ?? process.env
  const key = sanitizeApiKey(options.apiKey ?? processEnv.OPENAI_API_KEY)
  if (!key) {
    return null
  }
  return {
    OPENAI_BASE_URL: processEnv.OPENAI_BASE_URL || 'https://api.openai.com/v1',
    OPENAI_MODEL: options.model || getGoalDefaultOpenAIModel(options.goal),
    OPENAI_API_KEY: key,
  }
 }
 export function createProfileFile(
  profile: ProviderProfile,
  env: ProfileEnv,
 ): ProfileFile {
  return {
    profile,
    env,
    createdAt: new Date().toISOString(),
  }
 }
 export function selectAutoProfile(
  recommendedOllamaModel: string | null,
 ): ProviderProfile {
  return recommendedOllamaModel ? 'ollama' : 'openai'
 }
 export async function buildLaunchEnv(options: {
  profile: ProviderProfile
  persisted: ProfileFile | null
  goal: RecommendationGoal
  processEnv?: NodeJS.ProcessEnv
  getOllamaChatBaseUrl?: (baseUrl?: string) => string
  resolveOllamaDefaultModel?: (goal: RecommendationGoal) => Promise<string>
 }): Promise<NodeJS.ProcessEnv> {
  const processEnv = options.processEnv ?? process.env
  const persistedEnv =
    options.persisted?.profile === options.profile
      ? options.persisted.env ?? {}
      : {}
  const env: NodeJS.ProcessEnv = {
    ...processEnv,
    CLAUDE_CODE_USE_OPENAI: '1',
  }
  if (options.profile === 'ollama') {
    const getOllamaBaseUrl =
      options.getOllamaChatBaseUrl ?? (() => 'http://localhost:11434/v1')
    const resolveOllamaModel =
      options.resolveOllamaDefaultModel ?? (async () => 'llama3.1:8b')
    env.OPENAI_BASE_URL = persistedEnv.OPENAI_BASE_URL || getOllamaBaseUrl()
    env.OPENAI_MODEL =
      persistedEnv.OPENAI_MODEL ||
      (await resolveOllamaModel(options.goal))
    if (!processEnv.OPENAI_API_KEY || processEnv.OPENAI_API_KEY === 'SUA_CHAVE') {
      delete env.OPENAI_API_KEY
    }
    return env
  }
  env.OPENAI_BASE_URL =
    processEnv.OPENAI_BASE_URL ||
    persistedEnv.OPENAI_BASE_URL ||
    'https://api.openai.com/v1'
  env.OPENAI_MODEL =
    processEnv.OPENAI_MODEL ||
    persistedEnv.OPENAI_MODEL ||
    getGoalDefaultOpenAIModel(options.goal)
  env.OPENAI_API_KEY = processEnv.OPENAI_API_KEY || persistedEnv.OPENAI_API_KEY
  return env
 }
--- a/src/utils/providerRecommendation.test.ts
+++ b/src/utils/providerRecommendation.test.ts
@@ -83,6 +83,19 @@ test('non-chat embedding models are heavily demoted', () => {
  assert.equal(ranked[0]?.name, 'mistral:7b-instruct')
 })
 test('auto-pick ignores non-chat ollama models', () => {
  const recommended = recommendOllamaModel(
    [
      model('nomic-embed-text', { parameterSize: '0.5B' }),
      model('bge-reranker-v2', { parameterSize: '1.5B' }),
      model('whisper-large-v3', { parameterSize: '1.6B' }),
    ],
    'balanced',
  )
  assert.equal(recommended, null)
 })
 test('benchmark latency can reorder close recommendations', () => {
  const ranked = rankOllamaModels(
    [
@@ -111,6 +124,69 @@ test('benchmark latency can reorder close recommendations', () => {
  assert.equal(benchmarked[0]?.benchmarkMs, 350)
 })
 test('unbenchmarked models stay behind benchmarked candidates', () => {
  const ranked = rankOllamaModels(
    [
      model('phi4-mini:4b', {
        parameterSize: '4B',
        quantizationLevel: 'Q4_K_M',
      }),
      model('mistral:7b-instruct', {
        parameterSize: '7B',
        quantizationLevel: 'Q4_K_M',
      }),
      model('llama3.1:8b', {
        parameterSize: '8B',
        quantizationLevel: 'Q4_K_M',
      }),
      model('qwen2.5:14b', {
        parameterSize: '14B',
        quantizationLevel: 'Q4_K_M',
      }),
    ],
    'latency',
  )
  const benchmarked = applyBenchmarkLatency(
    ranked,
    {
      'phi4-mini:4b': 2400,
      'mistral:7b-instruct': 2200,
      'llama3.1:8b': 2100,
    },
    'latency',
  )
  assert.ok(benchmarked.slice(0, 3).every(item => item.benchmarkMs !== null))
  assert.equal(benchmarked[3]?.name, 'qwen2.5:14b')
  assert.equal(benchmarked[3]?.benchmarkMs, null)
 })
 test('coding goal recognizes codestral and devstral families', () => {
  const ranked = rankOllamaModels(
    [
      model('mistral:7b-instruct', {
        parameterSize: '7B',
        quantizationLevel: 'Q4_K_M',
      }),
      model('codestral:22b', {
        parameterSize: '22B',
        quantizationLevel: 'Q4_K_M',
      }),
      model('devstral:24b', {
        parameterSize: '24B',
        quantizationLevel: 'Q4_K_M',
      }),
    ],
    'coding',
  )
  assert.deepEqual(ranked.slice(0, 2).map(item => item.name), [
    'devstral:24b',
    'codestral:22b',
  ])
 })
 test('goal defaults choose sensible openai models', () => {
  assert.equal(getGoalDefaultOpenAIModel('latency'), 'gpt-4o-mini')
  assert.equal(getGoalDefaultOpenAIModel('balanced'), 'gpt-4o')
--- a/src/utils/providerRecommendation.ts
+++ b/src/utils/providerRecommendation.ts
@@ -23,6 +23,8 @@ const CODING_HINTS = [
  'coder',
  'codellama',
  'codegemma',
  'codestral',
  'devstral',
  'starcoder',
  'deepseek-coder',
  'qwen2.5-coder',
@@ -57,6 +59,16 @@ function includesAny(text: string, needles: string[]): boolean {
  return needles.some(needle => text.includes(needle))
 }
 export function isViableOllamaChatModel(model: OllamaModelDescriptor): boolean {
  return !includesAny(modelHaystack(model), NON_CHAT_HINTS)
 }
 export function selectRecommendedOllamaModel<
  T extends OllamaModelDescriptor,
 >(models: T[]): T | null {
  return models.find(isViableOllamaChatModel) ?? null
 }
 function inferParameterBillions(model: OllamaModelDescriptor): number | null {
  const text = `${model.parameterSize ?? ''} ${model.name}`.toLowerCase()
  const match = text.match(/(\d+(?:\.\d+)?)\s*b\b/)
@@ -265,7 +277,7 @@ export function recommendOllamaModel(
  models: OllamaModelDescriptor[],
  goal: RecommendationGoal,
 ): RankedOllamaModel | null {
-  return rankOllamaModels(models, goal)[0] ?? null
+  return selectRecommendedOllamaModel(rankOllamaModels(models, goal))
 }
 export function applyBenchmarkLatency(
@@ -276,7 +288,7 @@ export function applyBenchmarkLatency(
  const divisor =
    goal === 'latency' ? 120 : goal === 'coding' ? 500 : 240
-  return models
+  const scoredModels = models
    .map(model => {
      const latency = benchmarkMs[model.name] ?? null
      const benchmarkPenalty = latency === null ? 0 : latency / divisor
@@ -293,5 +305,13 @@ export function applyBenchmarkLatency(
        score: Number((model.score - benchmarkPenalty).toFixed(2)),
      }
    })
-    .sort((a, b) => compareRankedModels(a, b, goal))
+
  const benchmarkedModels = scoredModels.filter(model => model.benchmarkMs !== null)
  if (benchmarkedModels.length === 0) {
    return scoredModels.sort((a, b) => compareRankedModels(a, b, goal))
  }
  const unbenchmarkedModels = scoredModels.filter(model => model.benchmarkMs === null)
  benchmarkedModels.sort((a, b) => compareRankedModels(a, b, goal))
  return [...benchmarkedModels, ...unbenchmarkedModels]
 }