Merge pull request #5 from Vasanthdev2004/codex/provider-profile-recommendations

feat: add intelligent provider profile recommendation
2026-04-01 21:34:30 +08:00
parent 0192dc0fa0 ce45bd080e
commit 11a3553055
11 changed files with 1820 additions and 160 deletions
--- a/PLAYBOOK.md
+++ b/PLAYBOOK.md
@@ -37,6 +37,18 @@ If everything is healthy, OpenClaude starts directly.
 bun run profile:init -- --provider ollama --model llama3.1:8b
 ```

+Or let OpenClaude recommend the best local model for your goal:
+
+```powershell
+bun run profile:init -- --provider ollama --goal coding
+```
+
+Preview recommendations before saving:
+
+```powershell
+bun run profile:recommend -- --goal coding --benchmark
+```
+
 ### 3.2 Confirm profile file

 ```powershell
@@ -171,6 +183,12 @@ Fix:
 bun run profile:init -- --provider ollama --model llama3.1:8b
 ```

+Or pick a local Ollama profile automatically by goal:
+
+```powershell
+bun run profile:init -- --provider ollama --goal balanced
+```
+
 ## 6.5 Placeholder key (`SUA_CHAVE`) error

 Cause:
@@ -202,6 +220,16 @@ bun run profile:fast   # llama3.2:3b
 bun run profile:code   # qwen2.5-coder:7b
 ```

+Goal-based local auto-selection:
+
+```powershell
+bun run profile:init -- --provider ollama --goal latency
+bun run profile:init -- --provider ollama --goal balanced
+bun run profile:init -- --provider ollama --goal coding
+```
+
+`profile:auto` is a best-available provider picker, not a local-only command. Use `--provider ollama` when you want to stay on a local model.
+
 ## 8. Practical Prompt Playbook (Copy/Paste)

 ## 8.1 Code understanding
--- a/README.md
+++ b/README.md
@@ -209,7 +209,7 @@ bun run doctor:runtime:json
 # persist a diagnostics report to reports/doctor-runtime.json
 bun run doctor:report

-# full local hardening check (typecheck + smoke + runtime doctor)
+# full local hardening check (smoke + runtime doctor)
 bun run hardening:check

 # strict hardening (includes project-wide typecheck)
@@ -226,9 +226,15 @@ Notes:
 Use profile launchers to avoid repeated environment setup:

 ```bash
-# one-time profile bootstrap (auto-detect ollama, otherwise openai)
+# one-time profile bootstrap (prefer viable local Ollama, otherwise OpenAI)
 bun run profile:init

+# preview the best provider/model for your goal
+bun run profile:recommend -- --goal coding --benchmark
+
+# auto-apply the best available local/openai provider/model for your goal
+bun run profile:auto -- --goal latency
+
 # codex bootstrap (defaults to codexplan and ~/.codex/auth.json)
 bun run profile:codex

@@ -238,6 +244,9 @@ bun run profile:init -- --provider openai --api-key sk-...
 # ollama bootstrap with custom model
 bun run profile:init -- --provider ollama --model llama3.1:8b

+# ollama bootstrap with intelligent model auto-selection
+bun run profile:init -- --provider ollama --goal coding
+
 # codex bootstrap with a fast model alias
 bun run profile:init -- --provider codex --model codexspark

@@ -254,6 +263,14 @@ bun run dev:openai
 bun run dev:ollama
 ```

+`profile:recommend` ranks installed Ollama models for `latency`, `balanced`, or `coding`, and `profile:auto` can persist the recommendation directly.
+If no profile exists yet, `dev:profile` now uses the same goal-aware defaults when picking the initial model.
+
+Use `--provider ollama` when you want a local-only path. Auto mode falls back to OpenAI when no viable local chat model is installed.
+Goal-based Ollama selection only recommends among models that are already installed and reachable from Ollama.
+
+Use `profile:codex` or `--provider codex` when you want the ChatGPT Codex backend.
+
 `dev:openai`, `dev:ollama`, and `dev:codex` run `doctor:runtime` first and only launch the app if checks pass.
 For `dev:ollama`, make sure Ollama is running locally before launch.

--- a/package.json
+++ b/package.json
@@ -22,12 +22,15 @@
    "dev:ollama": "bun run scripts/provider-launch.ts ollama",
    "dev:ollama:fast": "bun run scripts/provider-launch.ts ollama --fast --bare",
    "profile:init": "bun run scripts/provider-bootstrap.ts",
+    "profile:recommend": "bun run scripts/provider-recommend.ts",
+    "profile:auto": "bun run scripts/provider-recommend.ts --apply",
    "profile:codex": "bun run profile:init -- --provider codex --model codexplan",
    "profile:fast": "bun run profile:init -- --provider ollama --model llama3.2:3b",
    "profile:code": "bun run profile:init -- --provider ollama --model qwen2.5-coder:7b",
    "dev:fast": "bun run profile:fast && bun run dev:ollama:fast",
    "dev:code": "bun run profile:code && bun run dev:profile",
    "start": "node dist/cli.mjs",
+    "test:provider-recommendation": "node --test --experimental-strip-types src/utils/providerRecommendation.test.ts src/utils/providerProfile.test.ts",
    "typecheck": "tsc --noEmit",
    "smoke": "bun run build && node dist/cli.mjs --version",
    "test:provider": "bun test src/services/api/*.test.ts",
--- a/scripts/provider-bootstrap.ts
+++ b/scripts/provider-bootstrap.ts
@@ -2,25 +2,28 @@
 import { writeFileSync } from 'node:fs'
 import { resolve } from 'node:path'
 import {
-  DEFAULT_CODEX_BASE_URL,
  resolveCodexApiCredentials,
 } from '../src/services/api/providerConfig.js'
-
-type ProviderProfile = 'openai' | 'ollama' | 'codex' | 'gemini'
-
-type ProfileFile = {
-  profile: ProviderProfile
-  env: {
-    OPENAI_BASE_URL?: string
-    OPENAI_MODEL?: string
-    OPENAI_API_KEY?: string
-    CODEX_API_KEY?: string
-    GEMINI_API_KEY?: string
-    GEMINI_MODEL?: string
-    GEMINI_BASE_URL?: string
-  }
-  createdAt: string
-}
+import {
+  getGoalDefaultOpenAIModel,
+  normalizeRecommendationGoal,
+  recommendOllamaModel,
+} from '../src/utils/providerRecommendation.ts'
+import {
+  buildCodexProfileEnv,
+  buildGeminiProfileEnv,
+  buildOllamaProfileEnv,
+  buildOpenAIProfileEnv,
+  createProfileFile,
+  selectAutoProfile,
+  type ProfileFile,
+  type ProviderProfile,
+} from '../src/utils/providerProfile.ts'
+import {
+  getOllamaChatBaseUrl,
+  hasLocalOllama,
+  listOllamaModels,
+} from './provider-discovery.ts'

 function parseArg(name: string): string | null {
  const args = process.argv.slice(2)
@@ -35,27 +38,16 @@ function parseProviderArg(): ProviderProfile | 'auto' {
  return 'auto'
 }

-async function hasLocalOllama(): Promise<boolean> {
-  const endpoint = 'http://localhost:11434/api/tags'
-  const controller = new AbortController()
-  const timeout = setTimeout(() => controller.abort(), 1200)
+async function resolveOllamaModel(
+  argModel: string | null,
+  argBaseUrl: string | null,
+  goal: ReturnType<typeof normalizeRecommendationGoal>,
+): Promise<string | null> {
+  if (argModel) return argModel

-  try {
-    const response = await fetch(endpoint, {
-      method: 'GET',
-      signal: controller.signal,
-    })
-    return response.ok
-  } catch {
-    return false
-  } finally {
-    clearTimeout(timeout)
-  }
-}
-
-function sanitizeApiKey(key: string | null): string | undefined {
-  if (!key || key === 'SUA_CHAVE') return undefined
-  return key
+  const discovered = await listOllamaModels(argBaseUrl || undefined)
+  const recommended = recommendOllamaModel(discovered, goal)
+  return recommended?.name ?? null
 }

 async function main(): Promise<void> {
@@ -63,69 +55,104 @@ async function main(): Promise<void> {
  const argModel = parseArg('--model')
  const argBaseUrl = parseArg('--base-url')
  const argApiKey = parseArg('--api-key')
+  const goal = normalizeRecommendationGoal(
+    parseArg('--goal') || process.env.OPENCLAUDE_PROFILE_GOAL,
+  )

  let selected: ProviderProfile
+  let resolvedOllamaModel: string | null = null
  if (provider === 'auto') {
-    selected = (await hasLocalOllama()) ? 'ollama' : 'openai'
+    if (await hasLocalOllama(argBaseUrl || undefined)) {
+      resolvedOllamaModel = await resolveOllamaModel(argModel, argBaseUrl, goal)
+      selected = selectAutoProfile(resolvedOllamaModel)
+    } else {
+      selected = 'openai'
+    }
  } else {
    selected = provider
  }

-  const env: ProfileFile['env'] = {}
-
+  let env: ProfileFile['env']
  if (selected === 'gemini') {
-    env.GEMINI_MODEL = argModel || process.env.GEMINI_MODEL || 'gemini-2.0-flash'
-    const key = sanitizeApiKey(argApiKey || process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY || null)
-    if (!key) {
+    const builtEnv = buildGeminiProfileEnv({
+      model: argModel || null,
+      baseUrl: argBaseUrl || null,
+      apiKey: argApiKey || null,
+      processEnv: process.env,
+    })
+
+    if (!builtEnv) {
      console.error('Gemini profile requires an API key. Use --api-key or set GEMINI_API_KEY.')
      console.error('Get a free key at: https://aistudio.google.com/apikey')
      process.exit(1)
    }
-    env.GEMINI_API_KEY = key
-    if (argBaseUrl) env.GEMINI_BASE_URL = argBaseUrl
+
+    env = builtEnv
  } else if (selected === 'ollama') {
-    env.OPENAI_BASE_URL = argBaseUrl || 'http://localhost:11434/v1'
-    env.OPENAI_MODEL = argModel || process.env.OPENAI_MODEL || 'llama3.1:8b'
-    const key = sanitizeApiKey(argApiKey || process.env.OPENAI_API_KEY || null)
-    if (key) env.OPENAI_API_KEY = key
-  } else if (selected === 'codex') {
-    env.OPENAI_BASE_URL =
-      argBaseUrl || process.env.OPENAI_BASE_URL || DEFAULT_CODEX_BASE_URL
-    env.OPENAI_MODEL = argModel || process.env.OPENAI_MODEL || 'codexplan'
-    const key = sanitizeApiKey(argApiKey || process.env.CODEX_API_KEY || null)
-    if (key) {
-      env.CODEX_API_KEY = key
-    } else {
-      const credentials = resolveCodexApiCredentials(process.env)
-      if (!credentials.apiKey) {
-        const authHint = credentials.authPath
-          ? ` or make sure ${credentials.authPath} exists`
-          : ''
-        console.error(`Codex profile requires CODEX_API_KEY${authHint}.`)
-        process.exit(1)
-      }
+    resolvedOllamaModel ??= await resolveOllamaModel(argModel, argBaseUrl, goal)
+    if (!resolvedOllamaModel) {
+      console.error('No viable Ollama chat model was discovered. Pull a chat model first or pass --model explicitly.')
+      process.exit(1)
    }
+
+    env = buildOllamaProfileEnv(
+      resolvedOllamaModel,
+      {
+        baseUrl: argBaseUrl,
+        getOllamaChatBaseUrl,
+      },
+    )
+  } else if (selected === 'codex') {
+    const builtEnv = buildCodexProfileEnv({
+      model: argModel,
+      baseUrl: argBaseUrl,
+      apiKey: argApiKey || process.env.CODEX_API_KEY || null,
+      processEnv: process.env,
+    })
+
+    if (!builtEnv) {
+      const credentials = resolveCodexApiCredentials(
+        argApiKey
+          ? { ...process.env, CODEX_API_KEY: argApiKey }
+          : process.env,
+      )
+      const authHint = credentials.authPath
+        ? ` or make sure ${credentials.authPath} exists`
+        : ''
+      if (!credentials.apiKey) {
+        console.error(`Codex profile requires CODEX_API_KEY${authHint}.`)
+      } else {
+        console.error('Codex profile requires CHATGPT_ACCOUNT_ID or an auth.json that includes it.')
+      }
+      process.exit(1)
+    }
+
+    env = builtEnv
  } else {
-    env.OPENAI_BASE_URL = argBaseUrl || process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1'
-    env.OPENAI_MODEL = argModel || process.env.OPENAI_MODEL || 'gpt-4o'
-    const key = sanitizeApiKey(argApiKey || process.env.OPENAI_API_KEY || null)
-    if (!key) {
+    const builtEnv = buildOpenAIProfileEnv({
+      goal,
+      model: argModel || null,
+      baseUrl: argBaseUrl || null,
+      apiKey: argApiKey || process.env.OPENAI_API_KEY || null,
+      processEnv: process.env,
+    })
+
+    if (!builtEnv) {
      console.error('OpenAI profile requires a real API key. Use --api-key or set OPENAI_API_KEY.')
      process.exit(1)
    }
-    env.OPENAI_API_KEY = key
+
+    env = builtEnv
  }

-  const profile: ProfileFile = {
-    profile: selected,
-    env,
-    createdAt: new Date().toISOString(),
-  }
+  const profile = createProfileFile(selected, env)

  const outputPath = resolve(process.cwd(), '.openclaude-profile.json')
  writeFileSync(outputPath, JSON.stringify(profile, null, 2), 'utf8')

  console.log(`Saved profile: ${selected}`)
+  console.log(`Goal: ${goal}`)
+  console.log(`Model: ${profile.env.GEMINI_MODEL || profile.env.OPENAI_MODEL || getGoalDefaultOpenAIModel(goal)}`)
  console.log(`Path: ${outputPath}`)
  console.log('Next: bun run dev:profile')
 }
--- a/scripts/provider-discovery.ts
+++ b/scripts/provider-discovery.ts
@@ -0,0 +1,129 @@
+import type { OllamaModelDescriptor } from '../src/utils/providerRecommendation.ts'
+
+export const DEFAULT_OLLAMA_BASE_URL = 'http://localhost:11434'
+
+function withTimeoutSignal(timeoutMs: number): {
+  signal: AbortSignal
+  clear: () => void
+} {
+  const controller = new AbortController()
+  const timeout = setTimeout(() => controller.abort(), timeoutMs)
+  return {
+    signal: controller.signal,
+    clear: () => clearTimeout(timeout),
+  }
+}
+
+function trimTrailingSlash(value: string): string {
+  return value.replace(/\/+$/, '')
+}
+
+export function getOllamaApiBaseUrl(baseUrl?: string): string {
+  const parsed = new URL(
+    baseUrl || process.env.OLLAMA_BASE_URL || DEFAULT_OLLAMA_BASE_URL,
+  )
+  const pathname = trimTrailingSlash(parsed.pathname)
+  parsed.pathname = pathname.endsWith('/v1')
+    ? pathname.slice(0, -3) || '/'
+    : pathname || '/'
+  parsed.search = ''
+  parsed.hash = ''
+  return trimTrailingSlash(parsed.toString())
+}
+
+export function getOllamaChatBaseUrl(baseUrl?: string): string {
+  return `${getOllamaApiBaseUrl(baseUrl)}/v1`
+}
+
+export async function hasLocalOllama(baseUrl?: string): Promise<boolean> {
+  const { signal, clear } = withTimeoutSignal(1200)
+  try {
+    const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
+      method: 'GET',
+      signal,
+    })
+    return response.ok
+  } catch {
+    return false
+  } finally {
+    clear()
+  }
+}
+
+export async function listOllamaModels(
+  baseUrl?: string,
+): Promise<OllamaModelDescriptor[]> {
+  const { signal, clear } = withTimeoutSignal(5000)
+  try {
+    const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
+      method: 'GET',
+      signal,
+    })
+    if (!response.ok) {
+      return []
+    }
+
+    const data = await response.json() as {
+      models?: Array<{
+        name?: string
+        size?: number
+        details?: {
+          family?: string
+          families?: string[]
+          parameter_size?: string
+          quantization_level?: string
+        }
+      }>
+    }
+
+    return (data.models ?? [])
+      .filter(model => Boolean(model.name))
+      .map(model => ({
+        name: model.name!,
+        sizeBytes: typeof model.size === 'number' ? model.size : null,
+        family: model.details?.family ?? null,
+        families: model.details?.families ?? [],
+        parameterSize: model.details?.parameter_size ?? null,
+        quantizationLevel: model.details?.quantization_level ?? null,
+      }))
+  } catch {
+    return []
+  } finally {
+    clear()
+  }
+}
+
+export async function benchmarkOllamaModel(
+  modelName: string,
+  baseUrl?: string,
+): Promise<number | null> {
+  const start = Date.now()
+  const { signal, clear } = withTimeoutSignal(20000)
+  try {
+    const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/chat`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      signal,
+      body: JSON.stringify({
+        model: modelName,
+        stream: false,
+        messages: [{ role: 'user', content: 'Reply with OK.' }],
+        options: {
+          temperature: 0,
+          num_predict: 8,
+        },
+      }),
+    })
+    if (!response.ok) {
+      return null
+    }
+    await response.json()
+    return Date.now() - start
+  } catch {
+    return null
+  } finally {
+    clear()
+  }
+}
--- a/scripts/provider-launch.ts
+++ b/scripts/provider-launch.ts
@@ -3,43 +3,51 @@ import { spawn } from 'node:child_process'
 import { existsSync, readFileSync } from 'node:fs'
 import { resolve } from 'node:path'
 import {
-  DEFAULT_CODEX_BASE_URL,
  resolveCodexApiCredentials,
 } from '../src/services/api/providerConfig.js'
-
-type ProviderProfile = 'openai' | 'ollama' | 'codex' | 'gemini'
-
-type ProfileFile = {
-  profile: ProviderProfile
-  env?: {
-    OPENAI_BASE_URL?: string
-    OPENAI_MODEL?: string
-    OPENAI_API_KEY?: string
-    CODEX_API_KEY?: string
-    GEMINI_API_KEY?: string
-    GEMINI_MODEL?: string
-    GEMINI_BASE_URL?: string
-  }
-}
+import {
+  normalizeRecommendationGoal,
+  recommendOllamaModel,
+} from '../src/utils/providerRecommendation.ts'
+import {
+  buildLaunchEnv,
+  selectAutoProfile,
+  type ProfileFile,
+  type ProviderProfile,
+} from '../src/utils/providerProfile.ts'
+import {
+  getOllamaChatBaseUrl,
+  hasLocalOllama,
+  listOllamaModels,
+} from './provider-discovery.ts'

 type LaunchOptions = {
  requestedProfile: ProviderProfile | 'auto' | null
  passthroughArgs: string[]
  fast: boolean
+  goal: ReturnType<typeof normalizeRecommendationGoal>
 }

 function parseLaunchOptions(argv: string[]): LaunchOptions {
  let requestedProfile: ProviderProfile | 'auto' | null = 'auto'
  const passthroughArgs: string[] = []
  let fast = false
+  let goal = normalizeRecommendationGoal(process.env.OPENCLAUDE_PROFILE_GOAL)

-  for (const arg of argv) {
+  for (let i = 0; i < argv.length; i++) {
+    const arg = argv[i]!
    const lower = arg.toLowerCase()
    if (lower === '--fast') {
      fast = true
      continue
    }

+    if (lower === '--goal') {
+      goal = normalizeRecommendationGoal(argv[i + 1] ?? null)
+      i++
+      continue
+    }
+
    if ((lower === 'auto' || lower === 'openai' || lower === 'ollama' || lower === 'codex' || lower === 'gemini') && requestedProfile === 'auto') {
      requestedProfile = lower as ProviderProfile | 'auto'
      continue
@@ -62,6 +70,7 @@ function parseLaunchOptions(argv: string[]): LaunchOptions {
    requestedProfile,
    passthroughArgs,
    fast,
+    goal,
  }
 }

@@ -79,18 +88,12 @@ function loadPersistedProfile(): ProfileFile | null {
  }
 }

-async function hasLocalOllama(): Promise<boolean> {
-  const endpoint = 'http://localhost:11434/api/tags'
-  const controller = new AbortController()
-  const timeout = setTimeout(() => controller.abort(), 1200)
-  try {
-    const response = await fetch(endpoint, { signal: controller.signal })
-    return response.ok
-  } catch {
-    return false
-  } finally {
-    clearTimeout(timeout)
-  }
+async function resolveOllamaDefaultModel(
+  goal: ReturnType<typeof normalizeRecommendationGoal>,
+): Promise<string | null> {
+  const models = await listOllamaModels()
+  const recommended = recommendOllamaModel(models, goal)
+  return recommended?.name ?? null
 }

 function runCommand(command: string, env: NodeJS.ProcessEnv): Promise<number> {
@@ -107,57 +110,6 @@ function runCommand(command: string, env: NodeJS.ProcessEnv): Promise<number> {
  })
 }

-function buildEnv(profile: ProviderProfile, persisted: ProfileFile | null): NodeJS.ProcessEnv {
-  const persistedEnv = persisted?.env ?? {}
-
-  if (profile === 'gemini') {
-    const env: NodeJS.ProcessEnv = {
-      ...process.env,
-      CLAUDE_CODE_USE_GEMINI: '1',
-    }
-    delete env.CLAUDE_CODE_USE_OPENAI
-    env.GEMINI_MODEL = process.env.GEMINI_MODEL || persistedEnv.GEMINI_MODEL || 'gemini-2.0-flash'
-    env.GEMINI_API_KEY = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY || persistedEnv.GEMINI_API_KEY
-    if (persistedEnv.GEMINI_BASE_URL || process.env.GEMINI_BASE_URL) {
-      env.GEMINI_BASE_URL = process.env.GEMINI_BASE_URL || persistedEnv.GEMINI_BASE_URL
-    }
-    return env
-  }
-
-  const env: NodeJS.ProcessEnv = {
-    ...process.env,
-    CLAUDE_CODE_USE_OPENAI: '1',
-  }
-
-  if (profile === 'ollama') {
-    env.OPENAI_BASE_URL = persistedEnv.OPENAI_BASE_URL || process.env.OPENAI_BASE_URL || 'http://localhost:11434/v1'
-    env.OPENAI_MODEL = persistedEnv.OPENAI_MODEL || process.env.OPENAI_MODEL || 'llama3.1:8b'
-    if (!process.env.OPENAI_API_KEY || process.env.OPENAI_API_KEY === 'SUA_CHAVE') {
-      delete env.OPENAI_API_KEY
-    }
-    return env
-  }
-
-  if (profile === 'codex') {
-    env.OPENAI_BASE_URL =
-      process.env.OPENAI_BASE_URL ||
-      persistedEnv.OPENAI_BASE_URL ||
-      DEFAULT_CODEX_BASE_URL
-    env.OPENAI_MODEL =
-      process.env.OPENAI_MODEL ||
-      persistedEnv.OPENAI_MODEL ||
-      'codexplan'
-    env.CODEX_API_KEY =
-      process.env.CODEX_API_KEY || persistedEnv.CODEX_API_KEY
-    return env
-  }
-
-  env.OPENAI_BASE_URL = process.env.OPENAI_BASE_URL || persistedEnv.OPENAI_BASE_URL || 'https://api.openai.com/v1'
-  env.OPENAI_MODEL = process.env.OPENAI_MODEL || persistedEnv.OPENAI_MODEL || 'gpt-4o'
-  env.OPENAI_API_KEY = process.env.OPENAI_API_KEY || persistedEnv.OPENAI_API_KEY
-  return env
-}
-
 function applyFastFlags(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv {
  env.CLAUDE_CODE_SIMPLE ??= '1'
  env.CLAUDE_CODE_DISABLE_THINKING ??= '1'
@@ -193,24 +145,45 @@ async function main(): Promise<void> {
  const options = parseLaunchOptions(process.argv.slice(2))
  const requestedProfile = options.requestedProfile
  if (!requestedProfile) {
-    console.error('Usage: bun run scripts/provider-launch.ts [openai|ollama|codex|gemini|auto] [--fast] [-- <cli args>]')
+    console.error('Usage: bun run scripts/provider-launch.ts [openai|ollama|codex|gemini|auto] [--fast] [--goal <latency|balanced|coding>] [-- <cli args>]')
    process.exit(1)
  }

  const persisted = loadPersistedProfile()
  let profile: ProviderProfile
+  let resolvedOllamaModel: string | null = null

  if (requestedProfile === 'auto') {
    if (persisted) {
      profile = persisted.profile
+    } else if (await hasLocalOllama()) {
+      resolvedOllamaModel = await resolveOllamaDefaultModel(options.goal)
+      profile = selectAutoProfile(resolvedOllamaModel)
    } else {
-      profile = (await hasLocalOllama()) ? 'ollama' : 'openai'
+      profile = 'openai'
    }
  } else {
    profile = requestedProfile
  }

-  const env = buildEnv(profile, persisted)
+  if (
+    profile === 'ollama' &&
+    (persisted?.profile !== 'ollama' || !persisted?.env?.OPENAI_MODEL)
+  ) {
+    resolvedOllamaModel ??= await resolveOllamaDefaultModel(options.goal)
+    if (!resolvedOllamaModel) {
+      console.error('No viable Ollama chat model was discovered. Pull a chat model first or save one with `bun run profile:init -- --provider ollama --model <model>`.')
+      process.exit(1)
+    }
+  }
+
+  const env = await buildLaunchEnv({
+    profile,
+    persisted,
+    goal: options.goal,
+    getOllamaChatBaseUrl,
+    resolveOllamaDefaultModel: async () => resolvedOllamaModel || 'llama3.1:8b',
+  })
  if (options.fast) {
    applyFastFlags(env)
  }
@@ -234,6 +207,11 @@ async function main(): Promise<void> {
      console.error(`CODEX_API_KEY is required for codex profile${authHint}. Run: bun run profile:init -- --provider codex --model codexplan`)
      process.exit(1)
    }
+
+    if (!credentials.accountId) {
+      console.error('CHATGPT_ACCOUNT_ID is required for codex profile. Set CHATGPT_ACCOUNT_ID/CODEX_ACCOUNT_ID or use an auth.json that includes it.')
+      process.exit(1)
+    }
  }

  printSummary(profile, env)
--- a/scripts/provider-recommend.ts
+++ b/scripts/provider-recommend.ts
@@ -0,0 +1,270 @@
+// @ts-nocheck
+import { writeFileSync } from 'node:fs'
+import { resolve } from 'node:path'
+
+import {
+  applyBenchmarkLatency,
+  getGoalDefaultOpenAIModel,
+  isViableOllamaChatModel,
+  normalizeRecommendationGoal,
+  rankOllamaModels,
+  selectRecommendedOllamaModel,
+  type BenchmarkedOllamaModel,
+  type RecommendationGoal,
+} from '../src/utils/providerRecommendation.ts'
+import {
+  buildOllamaProfileEnv,
+  buildOpenAIProfileEnv,
+  createProfileFile,
+  sanitizeApiKey,
+  type ProfileFile,
+  type ProviderProfile,
+} from '../src/utils/providerProfile.ts'
+import {
+  benchmarkOllamaModel,
+  getOllamaChatBaseUrl,
+  hasLocalOllama,
+  listOllamaModels,
+} from './provider-discovery.ts'
+
+type CliOptions = {
+  apply: boolean
+  benchmark: boolean
+  goal: RecommendationGoal
+  json: boolean
+  provider: ProviderProfile | 'auto'
+  baseUrl: string | null
+}
+
+function parseOptions(argv: string[]): CliOptions {
+  const options: CliOptions = {
+    apply: false,
+    benchmark: false,
+    goal: normalizeRecommendationGoal(process.env.OPENCLAUDE_PROFILE_GOAL),
+    json: false,
+    provider: 'auto',
+    baseUrl: null,
+  }
+
+  for (let i = 0; i < argv.length; i++) {
+    const arg = argv[i]?.toLowerCase()
+    if (!arg) continue
+
+    if (arg === '--apply') {
+      options.apply = true
+      continue
+    }
+    if (arg === '--benchmark') {
+      options.benchmark = true
+      continue
+    }
+    if (arg === '--json') {
+      options.json = true
+      continue
+    }
+    if (arg === '--goal') {
+      options.goal = normalizeRecommendationGoal(argv[i + 1] ?? null)
+      i++
+      continue
+    }
+    if (arg === '--provider') {
+      const provider = argv[i + 1]?.toLowerCase()
+      if (
+        provider === 'openai' ||
+        provider === 'ollama' ||
+        provider === 'auto'
+      ) {
+        options.provider = provider
+      }
+      i++
+      continue
+    }
+    if (arg === '--base-url') {
+      options.baseUrl = argv[i + 1] ?? null
+      i++
+    }
+  }
+
+  return options
+}
+
+function printHumanSummary(payload: {
+  goal: RecommendationGoal
+  recommendedProfile: ProviderProfile
+  recommendedModel: string
+  rankedModels: BenchmarkedOllamaModel[]
+  benchmarked: boolean
+  applied: boolean
+}): void {
+  console.log(`Recommendation goal: ${payload.goal}`)
+  console.log(`Recommended profile: ${payload.recommendedProfile}`)
+  console.log(`Recommended model: ${payload.recommendedModel}`)
+
+  if (payload.rankedModels.length > 0) {
+    console.log('\nRanked Ollama models:')
+    for (const [index, model] of payload.rankedModels.slice(0, 5).entries()) {
+      const benchmarkPart =
+        payload.benchmarked && model.benchmarkMs !== null
+          ? ` | ${Math.round(model.benchmarkMs)}ms`
+          : ''
+      console.log(
+        `${index + 1}. ${model.name} | score=${model.score}${benchmarkPart} | ${model.summary}`,
+      )
+    }
+  }
+
+  if (payload.applied) {
+    console.log('\nSaved .openclaude-profile.json with the recommended profile.')
+    console.log('Next: bun run dev:profile')
+  } else {
+    console.log(
+      '\nTip: run `bun run profile:auto -- --goal ' +
+        payload.goal +
+        '` to apply this automatically.',
+    )
+  }
+}
+
+async function maybeApplyProfile(
+  profile: ProviderProfile,
+  model: string,
+  goal: RecommendationGoal,
+  baseUrl: string | null,
+): Promise<boolean> {
+  let env: ProfileFile['env'] | null
+  if (profile === 'ollama') {
+    env = buildOllamaProfileEnv(model, {
+      baseUrl,
+      getOllamaChatBaseUrl,
+    })
+  } else {
+    env = buildOpenAIProfileEnv({
+      goal,
+      model: model || getGoalDefaultOpenAIModel(goal),
+      apiKey: process.env.OPENAI_API_KEY,
+      processEnv: process.env,
+    })
+
+    if (!env) {
+      console.error('Cannot apply an OpenAI profile without OPENAI_API_KEY.')
+      return false
+    }
+  }
+
+  const profileFile = createProfileFile(profile, env)
+
+  writeFileSync(
+    resolve(process.cwd(), '.openclaude-profile.json'),
+    JSON.stringify(profileFile, null, 2),
+    'utf8',
+  )
+  return true
+}
+
+async function main(): Promise<void> {
+  const options = parseOptions(process.argv.slice(2))
+  const ollamaAvailable =
+    options.provider !== 'openai' &&
+    (await hasLocalOllama(options.baseUrl ?? undefined))
+  const ollamaModels = ollamaAvailable
+    ? await listOllamaModels(options.baseUrl ?? undefined)
+    : []
+
+  const heuristicRanked = rankOllamaModels(ollamaModels, options.goal)
+  const benchmarkInput = options.benchmark
+    ? heuristicRanked.filter(isViableOllamaChatModel).slice(0, 3)
+    : []
+
+  const benchmarkResults: Record<string, number | null> = {}
+  for (const model of benchmarkInput) {
+    benchmarkResults[model.name] = await benchmarkOllamaModel(
+      model.name,
+      options.baseUrl ?? undefined,
+    )
+  }
+
+  const rankedModels: BenchmarkedOllamaModel[] = options.benchmark
+    ? applyBenchmarkLatency(heuristicRanked, benchmarkResults, options.goal)
+    : heuristicRanked.map(model => ({
+        ...model,
+        benchmarkMs: null,
+      }))
+
+  const recommendedOllama = selectRecommendedOllamaModel(rankedModels)
+  const openAIConfigured = Boolean(sanitizeApiKey(process.env.OPENAI_API_KEY))
+
+  let recommendedProfile: ProviderProfile
+  let recommendedModel: string
+
+  if (options.provider === 'openai') {
+    recommendedProfile = 'openai'
+    recommendedModel = getGoalDefaultOpenAIModel(options.goal)
+  } else if (options.provider === 'ollama') {
+    if (!recommendedOllama) {
+      console.error(
+        'No Ollama models were discovered. Pull a model first or switch to --provider openai.',
+      )
+      process.exit(1)
+    }
+    recommendedProfile = 'ollama'
+    recommendedModel = recommendedOllama.name
+  } else if (recommendedOllama) {
+    recommendedProfile = 'ollama'
+    recommendedModel = recommendedOllama.name
+  } else {
+    recommendedProfile = 'openai'
+    recommendedModel = getGoalDefaultOpenAIModel(options.goal)
+  }
+
+  let applied = false
+  if (options.apply) {
+    applied = await maybeApplyProfile(
+      recommendedProfile,
+      recommendedModel,
+      options.goal,
+      options.baseUrl,
+    )
+    if (!applied) {
+      process.exit(1)
+    }
+  }
+
+  const payload = {
+    goal: options.goal,
+    provider: options.provider,
+    ollamaAvailable,
+    openAIConfigured,
+    recommendedProfile,
+    recommendedModel,
+    benchmarked: options.benchmark,
+    rankedModels,
+    applied,
+  }
+
+  if (options.json) {
+    console.log(JSON.stringify(payload, null, 2))
+    return
+  }
+
+  printHumanSummary({
+    goal: options.goal,
+    recommendedProfile,
+    recommendedModel,
+    rankedModels,
+    benchmarked: options.benchmark,
+    applied,
+  })
+
+  if (!recommendedOllama && !openAIConfigured) {
+    console.log(
+      '\nNo local Ollama model was detected and OPENAI_API_KEY is unset.',
+    )
+    console.log(
+      'Next steps: `ollama pull qwen2.5-coder:7b` or set OPENAI_API_KEY.',
+    )
+  }
+}
+
+await main()
+
+export {}
--- a/src/utils/providerProfile.test.ts
+++ b/src/utils/providerProfile.test.ts
@@ -0,0 +1,383 @@
+import assert from 'node:assert/strict'
+import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import test from 'node:test'
+
+import {
+  buildCodexProfileEnv,
+  buildGeminiProfileEnv,
+  buildLaunchEnv,
+  buildOllamaProfileEnv,
+  buildOpenAIProfileEnv,
+  selectAutoProfile,
+  type ProfileFile,
+} from './providerProfile.ts'
+
+function profile(profile: ProfileFile['profile'], env: ProfileFile['env']): ProfileFile {
+  return {
+    profile,
+    env,
+    createdAt: '2026-04-01T00:00:00.000Z',
+  }
+}
+
+const missingCodexAuthPath = join(tmpdir(), 'openclaude-missing-codex-auth.json')
+
+test('matching persisted ollama env is reused for ollama launch', async () => {
+  const env = await buildLaunchEnv({
+    profile: 'ollama',
+    persisted: profile('ollama', {
+      OPENAI_BASE_URL: 'http://127.0.0.1:11435/v1',
+      OPENAI_MODEL: 'mistral:7b-instruct',
+    }),
+    goal: 'balanced',
+    processEnv: {},
+    getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
+    resolveOllamaDefaultModel: async () => 'llama3.1:8b',
+  })
+
+  assert.equal(env.OPENAI_BASE_URL, 'http://127.0.0.1:11435/v1')
+  assert.equal(env.OPENAI_MODEL, 'mistral:7b-instruct')
+})
+
+test('ollama launch ignores mismatched persisted openai env and shell model fallback', async () => {
+  const env = await buildLaunchEnv({
+    profile: 'ollama',
+    persisted: profile('openai', {
+      OPENAI_BASE_URL: 'https://api.openai.com/v1',
+      OPENAI_MODEL: 'gpt-4o',
+      OPENAI_API_KEY: 'sk-persisted',
+    }),
+    goal: 'coding',
+    processEnv: {
+      OPENAI_BASE_URL: 'https://api.deepseek.com/v1',
+      OPENAI_MODEL: 'gpt-4o-mini',
+      OPENAI_API_KEY: 'sk-live',
+      CODEX_API_KEY: 'codex-live',
+      CHATGPT_ACCOUNT_ID: 'acct_live',
+    },
+    getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
+    resolveOllamaDefaultModel: async () => 'qwen2.5-coder:7b',
+  })
+
+  assert.equal(env.OPENAI_BASE_URL, 'http://localhost:11434/v1')
+  assert.equal(env.OPENAI_MODEL, 'qwen2.5-coder:7b')
+  assert.equal(env.OPENAI_API_KEY, undefined)
+  assert.equal(env.CODEX_API_KEY, undefined)
+  assert.equal(env.CHATGPT_ACCOUNT_ID, undefined)
+})
+
+test('openai launch ignores mismatched persisted ollama env', async () => {
+  const env = await buildLaunchEnv({
+    profile: 'openai',
+    persisted: profile('ollama', {
+      OPENAI_BASE_URL: 'http://localhost:11434/v1',
+      OPENAI_MODEL: 'llama3.1:8b',
+    }),
+    goal: 'latency',
+    processEnv: {
+      OPENAI_API_KEY: 'sk-live',
+      CODEX_API_KEY: 'codex-live',
+      CHATGPT_ACCOUNT_ID: 'acct_live',
+    },
+    getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
+    resolveOllamaDefaultModel: async () => 'llama3.1:8b',
+  })
+
+  assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1')
+  assert.equal(env.OPENAI_MODEL, 'gpt-4o-mini')
+  assert.equal(env.OPENAI_API_KEY, 'sk-live')
+  assert.equal(env.CODEX_API_KEY, undefined)
+  assert.equal(env.CHATGPT_ACCOUNT_ID, undefined)
+})
+
+test('openai launch ignores codex shell transport hints', async () => {
+  const env = await buildLaunchEnv({
+    profile: 'openai',
+    persisted: null,
+    goal: 'balanced',
+    processEnv: {
+      OPENAI_API_KEY: 'sk-live',
+      OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
+      OPENAI_MODEL: 'codexplan',
+    },
+  })
+
+  assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1')
+  assert.equal(env.OPENAI_MODEL, 'gpt-4o')
+  assert.equal(env.OPENAI_API_KEY, 'sk-live')
+})
+
+test('openai launch ignores codex persisted transport hints', async () => {
+  const env = await buildLaunchEnv({
+    profile: 'openai',
+    persisted: profile('openai', {
+      OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
+      OPENAI_MODEL: 'codexplan',
+      OPENAI_API_KEY: 'sk-persisted',
+    }),
+    goal: 'balanced',
+    processEnv: {
+      OPENAI_API_KEY: 'sk-live',
+    },
+  })
+
+  assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1')
+  assert.equal(env.OPENAI_MODEL, 'gpt-4o')
+  assert.equal(env.OPENAI_API_KEY, 'sk-live')
+})
+
+test('matching persisted gemini env is reused for gemini launch', async () => {
+  const env = await buildLaunchEnv({
+    profile: 'gemini',
+    persisted: profile('gemini', {
+      GEMINI_MODEL: 'gemini-2.5-flash',
+      GEMINI_API_KEY: 'gem-persisted',
+      GEMINI_BASE_URL: 'https://example.test/v1beta/openai',
+    }),
+    goal: 'balanced',
+    processEnv: {},
+  })
+
+  assert.equal(env.CLAUDE_CODE_USE_GEMINI, '1')
+  assert.equal(env.CLAUDE_CODE_USE_OPENAI, undefined)
+  assert.equal(env.GEMINI_MODEL, 'gemini-2.5-flash')
+  assert.equal(env.GEMINI_API_KEY, 'gem-persisted')
+  assert.equal(env.GEMINI_BASE_URL, 'https://example.test/v1beta/openai')
+})
+
+test('gemini launch ignores mismatched persisted openai env and strips other provider secrets', async () => {
+  const env = await buildLaunchEnv({
+    profile: 'gemini',
+    persisted: profile('openai', {
+      OPENAI_BASE_URL: 'https://api.openai.com/v1',
+      OPENAI_MODEL: 'gpt-4o',
+      OPENAI_API_KEY: 'sk-persisted',
+    }),
+    goal: 'balanced',
+    processEnv: {
+      GEMINI_API_KEY: 'gem-live',
+      GOOGLE_API_KEY: 'google-live',
+      OPENAI_API_KEY: 'sk-live',
+      OPENAI_BASE_URL: 'https://api.openai.com/v1',
+      OPENAI_MODEL: 'gpt-4o-mini',
+      CODEX_API_KEY: 'codex-live',
+      CHATGPT_ACCOUNT_ID: 'acct_live',
+      CLAUDE_CODE_USE_OPENAI: '1',
+    },
+  })
+
+  assert.equal(env.CLAUDE_CODE_USE_GEMINI, '1')
+  assert.equal(env.CLAUDE_CODE_USE_OPENAI, undefined)
+  assert.equal(env.GEMINI_MODEL, 'gemini-2.0-flash')
+  assert.equal(env.GEMINI_API_KEY, 'gem-live')
+  assert.equal(
+    env.GEMINI_BASE_URL,
+    'https://generativelanguage.googleapis.com/v1beta/openai',
+  )
+  assert.equal(env.GOOGLE_API_KEY, undefined)
+  assert.equal(env.OPENAI_API_KEY, undefined)
+  assert.equal(env.CODEX_API_KEY, undefined)
+  assert.equal(env.CHATGPT_ACCOUNT_ID, undefined)
+})
+
+test('matching persisted codex env is reused for codex launch', async () => {
+  const env = await buildLaunchEnv({
+    profile: 'codex',
+    persisted: profile('codex', {
+      OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
+      OPENAI_MODEL: 'codexspark',
+      CODEX_API_KEY: 'codex-persisted',
+      CHATGPT_ACCOUNT_ID: 'acct_persisted',
+    }),
+    goal: 'balanced',
+    processEnv: {
+      CODEX_AUTH_JSON_PATH: missingCodexAuthPath,
+    },
+  })
+
+  assert.equal(env.OPENAI_BASE_URL, 'https://chatgpt.com/backend-api/codex')
+  assert.equal(env.OPENAI_MODEL, 'codexspark')
+  assert.equal(env.CODEX_API_KEY, 'codex-persisted')
+  assert.equal(env.CHATGPT_ACCOUNT_ID, 'acct_persisted')
+})
+
+test('codex launch normalizes poisoned persisted base urls', async () => {
+  const env = await buildLaunchEnv({
+    profile: 'codex',
+    persisted: profile('codex', {
+      OPENAI_BASE_URL: 'https://api.openai.com/v1',
+      OPENAI_MODEL: 'codexspark',
+      CHATGPT_ACCOUNT_ID: 'acct_persisted',
+    }),
+    goal: 'balanced',
+    processEnv: {
+      CODEX_AUTH_JSON_PATH: missingCodexAuthPath,
+    },
+  })
+
+  assert.equal(env.OPENAI_BASE_URL, 'https://chatgpt.com/backend-api/codex')
+  assert.equal(env.OPENAI_MODEL, 'codexspark')
+})
+
+test('codex launch ignores mismatched persisted openai env', async () => {
+  const env = await buildLaunchEnv({
+    profile: 'codex',
+    persisted: profile('openai', {
+      OPENAI_BASE_URL: 'https://api.openai.com/v1',
+      OPENAI_MODEL: 'gpt-4o',
+      OPENAI_API_KEY: 'sk-persisted',
+    }),
+    goal: 'balanced',
+    processEnv: {
+      OPENAI_BASE_URL: 'https://api.openai.com/v1',
+      OPENAI_MODEL: 'gpt-4o-mini',
+      OPENAI_API_KEY: 'sk-live',
+      CODEX_API_KEY: 'codex-live',
+      CHATGPT_ACCOUNT_ID: 'acct_live',
+    },
+  })
+
+  assert.equal(env.OPENAI_BASE_URL, 'https://chatgpt.com/backend-api/codex')
+  assert.equal(env.OPENAI_MODEL, 'codexplan')
+  assert.equal(env.OPENAI_API_KEY, undefined)
+  assert.equal(env.CODEX_API_KEY, 'codex-live')
+  assert.equal(env.CHATGPT_ACCOUNT_ID, 'acct_live')
+})
+
+test('codex launch ignores placeholder codex env keys', async () => {
+  const env = await buildLaunchEnv({
+    profile: 'codex',
+    persisted: profile('codex', {
+      OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
+      OPENAI_MODEL: 'codexspark',
+      CODEX_API_KEY: 'codex-persisted',
+      CHATGPT_ACCOUNT_ID: 'acct_persisted',
+    }),
+    goal: 'balanced',
+    processEnv: {
+      CODEX_API_KEY: 'SUA_CHAVE',
+      CODEX_AUTH_JSON_PATH: missingCodexAuthPath,
+    },
+  })
+
+  assert.equal(env.CODEX_API_KEY, 'codex-persisted')
+  assert.equal(env.CHATGPT_ACCOUNT_ID, 'acct_persisted')
+})
+
+test('codex launch prefers auth account id over stale persisted value', async () => {
+  const codexHome = mkdtempSync(join(tmpdir(), 'openclaude-codex-'))
+  try {
+    writeFileSync(
+      join(codexHome, 'auth.json'),
+      JSON.stringify({
+        access_token: 'codex-live',
+        account_id: 'acct_auth',
+      }),
+      'utf8',
+    )
+
+    const env = await buildLaunchEnv({
+      profile: 'codex',
+      persisted: profile('codex', {
+        OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
+        OPENAI_MODEL: 'codexspark',
+        CHATGPT_ACCOUNT_ID: 'acct_persisted',
+      }),
+      goal: 'balanced',
+      processEnv: {
+        CODEX_HOME: codexHome,
+      },
+    })
+
+    assert.equal(env.CHATGPT_ACCOUNT_ID, 'acct_auth')
+  } finally {
+    rmSync(codexHome, { recursive: true, force: true })
+  }
+})
+
+test('ollama profiles never persist openai api keys', () => {
+  const env = buildOllamaProfileEnv('llama3.1:8b', {
+    getOllamaChatBaseUrl: () => 'http://localhost:11434/v1',
+  })
+
+  assert.deepEqual(env, {
+    OPENAI_BASE_URL: 'http://localhost:11434/v1',
+    OPENAI_MODEL: 'llama3.1:8b',
+  })
+  assert.equal('OPENAI_API_KEY' in env, false)
+})
+
+test('codex profiles accept explicit codex credentials', () => {
+  const env = buildCodexProfileEnv({
+    model: 'codexspark',
+    apiKey: 'codex-live',
+    processEnv: {
+      CHATGPT_ACCOUNT_ID: 'acct_123',
+    },
+  })
+
+  assert.deepEqual(env, {
+    OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
+    OPENAI_MODEL: 'codexspark',
+    CODEX_API_KEY: 'codex-live',
+    CHATGPT_ACCOUNT_ID: 'acct_123',
+  })
+})
+
+test('codex profiles require a chatgpt account id', () => {
+  const env = buildCodexProfileEnv({
+    model: 'codexspark',
+    apiKey: 'codex-live',
+    processEnv: {
+      CODEX_AUTH_JSON_PATH: missingCodexAuthPath,
+    },
+  })
+
+  assert.equal(env, null)
+})
+
+test('gemini profiles accept google api key fallback', () => {
+  const env = buildGeminiProfileEnv({
+    processEnv: {
+      GOOGLE_API_KEY: 'gem-live',
+    },
+  })
+
+  assert.deepEqual(env, {
+    GEMINI_MODEL: 'gemini-2.0-flash',
+    GEMINI_API_KEY: 'gem-live',
+  })
+})
+
+test('gemini profiles require a key', () => {
+  const env = buildGeminiProfileEnv({
+    processEnv: {},
+  })
+
+  assert.equal(env, null)
+})
+
+test('openai profiles ignore codex shell transport hints', () => {
+  const env = buildOpenAIProfileEnv({
+    goal: 'balanced',
+    apiKey: 'sk-live',
+    processEnv: {
+      OPENAI_BASE_URL: 'https://chatgpt.com/backend-api/codex',
+      OPENAI_MODEL: 'codexplan',
+      OPENAI_API_KEY: 'sk-live',
+    },
+  })
+
+  assert.deepEqual(env, {
+    OPENAI_BASE_URL: 'https://api.openai.com/v1',
+    OPENAI_MODEL: 'gpt-4o',
+    OPENAI_API_KEY: 'sk-live',
+  })
+})
+
+test('auto profile falls back to openai when no viable ollama model exists', () => {
+  assert.equal(selectAutoProfile(null), 'openai')
+  assert.equal(selectAutoProfile('qwen2.5-coder:7b'), 'ollama')
+})
--- a/src/utils/providerProfile.ts
+++ b/src/utils/providerProfile.ts
@@ -0,0 +1,314 @@
+import {
+  DEFAULT_CODEX_BASE_URL,
+  DEFAULT_OPENAI_BASE_URL,
+  isCodexBaseUrl,
+  resolveCodexApiCredentials,
+  resolveProviderRequest,
+} from '../services/api/providerConfig.ts'
+import {
+  getGoalDefaultOpenAIModel,
+  type RecommendationGoal,
+} from './providerRecommendation.ts'
+
+const DEFAULT_GEMINI_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta/openai'
+const DEFAULT_GEMINI_MODEL = 'gemini-2.0-flash'
+
+export type ProviderProfile = 'openai' | 'ollama' | 'codex' | 'gemini'
+
+export type ProfileEnv = {
+  OPENAI_BASE_URL?: string
+  OPENAI_MODEL?: string
+  OPENAI_API_KEY?: string
+  CODEX_API_KEY?: string
+  CHATGPT_ACCOUNT_ID?: string
+  CODEX_ACCOUNT_ID?: string
+  GEMINI_API_KEY?: string
+  GEMINI_MODEL?: string
+  GEMINI_BASE_URL?: string
+}
+
+export type ProfileFile = {
+  profile: ProviderProfile
+  env: ProfileEnv
+  createdAt: string
+}
+
+export function sanitizeApiKey(
+  key: string | null | undefined,
+): string | undefined {
+  if (!key || key === 'SUA_CHAVE') return undefined
+  return key
+}
+
+export function buildOllamaProfileEnv(
+  model: string,
+  options: {
+    baseUrl?: string | null
+    getOllamaChatBaseUrl: (baseUrl?: string) => string
+  },
+): ProfileEnv {
+  return {
+    OPENAI_BASE_URL: options.getOllamaChatBaseUrl(options.baseUrl ?? undefined),
+    OPENAI_MODEL: model,
+  }
+}
+
+export function buildGeminiProfileEnv(options: {
+  model?: string | null
+  baseUrl?: string | null
+  apiKey?: string | null
+  processEnv?: NodeJS.ProcessEnv
+}): ProfileEnv | null {
+  const processEnv = options.processEnv ?? process.env
+  const key = sanitizeApiKey(
+    options.apiKey ??
+      processEnv.GEMINI_API_KEY ??
+      processEnv.GOOGLE_API_KEY,
+  )
+  if (!key) {
+    return null
+  }
+
+  const env: ProfileEnv = {
+    GEMINI_MODEL:
+      options.model || processEnv.GEMINI_MODEL || DEFAULT_GEMINI_MODEL,
+    GEMINI_API_KEY: key,
+  }
+
+  const baseUrl = options.baseUrl || processEnv.GEMINI_BASE_URL
+  if (baseUrl) {
+    env.GEMINI_BASE_URL = baseUrl
+  }
+
+  return env
+}
+
+export function buildOpenAIProfileEnv(options: {
+  goal: RecommendationGoal
+  model?: string | null
+  baseUrl?: string | null
+  apiKey?: string | null
+  processEnv?: NodeJS.ProcessEnv
+}): ProfileEnv | null {
+  const processEnv = options.processEnv ?? process.env
+  const key = sanitizeApiKey(options.apiKey ?? processEnv.OPENAI_API_KEY)
+  if (!key) {
+    return null
+  }
+
+  const defaultModel = getGoalDefaultOpenAIModel(options.goal)
+  const shellOpenAIRequest = resolveProviderRequest({
+    model: processEnv.OPENAI_MODEL,
+    baseUrl: processEnv.OPENAI_BASE_URL,
+    fallbackModel: defaultModel,
+  })
+  const useShellOpenAIConfig = shellOpenAIRequest.transport === 'chat_completions'
+
+  return {
+    OPENAI_BASE_URL:
+      options.baseUrl ||
+      (useShellOpenAIConfig ? processEnv.OPENAI_BASE_URL : undefined) ||
+      DEFAULT_OPENAI_BASE_URL,
+    OPENAI_MODEL:
+      options.model ||
+      (useShellOpenAIConfig ? processEnv.OPENAI_MODEL : undefined) ||
+      defaultModel,
+    OPENAI_API_KEY: key,
+  }
+}
+
+export function buildCodexProfileEnv(options: {
+  model?: string | null
+  baseUrl?: string | null
+  apiKey?: string | null
+  processEnv?: NodeJS.ProcessEnv
+}): ProfileEnv | null {
+  const processEnv = options.processEnv ?? process.env
+  const key = sanitizeApiKey(options.apiKey ?? processEnv.CODEX_API_KEY)
+  const credentialEnv = key
+    ? ({ ...processEnv, CODEX_API_KEY: key } as NodeJS.ProcessEnv)
+    : processEnv
+  const credentials = resolveCodexApiCredentials(credentialEnv)
+  if (!credentials.apiKey || !credentials.accountId) {
+    return null
+  }
+
+  const env: ProfileEnv = {
+    OPENAI_BASE_URL: options.baseUrl || DEFAULT_CODEX_BASE_URL,
+    OPENAI_MODEL: options.model || 'codexplan',
+  }
+
+  if (key) {
+    env.CODEX_API_KEY = key
+  }
+
+  env.CHATGPT_ACCOUNT_ID = credentials.accountId
+
+  return env
+}
+
+export function createProfileFile(
+  profile: ProviderProfile,
+  env: ProfileEnv,
+): ProfileFile {
+  return {
+    profile,
+    env,
+    createdAt: new Date().toISOString(),
+  }
+}
+
+export function selectAutoProfile(
+  recommendedOllamaModel: string | null,
+): ProviderProfile {
+  return recommendedOllamaModel ? 'ollama' : 'openai'
+}
+
+export async function buildLaunchEnv(options: {
+  profile: ProviderProfile
+  persisted: ProfileFile | null
+  goal: RecommendationGoal
+  processEnv?: NodeJS.ProcessEnv
+  getOllamaChatBaseUrl?: (baseUrl?: string) => string
+  resolveOllamaDefaultModel?: (goal: RecommendationGoal) => Promise<string>
+}): Promise<NodeJS.ProcessEnv> {
+  const processEnv = options.processEnv ?? process.env
+  const persistedEnv =
+    options.persisted?.profile === options.profile
+      ? options.persisted.env ?? {}
+      : {}
+
+  const shellGeminiKey = sanitizeApiKey(
+    processEnv.GEMINI_API_KEY ?? processEnv.GOOGLE_API_KEY,
+  )
+  const persistedGeminiKey = sanitizeApiKey(persistedEnv.GEMINI_API_KEY)
+
+  if (options.profile === 'gemini') {
+    const env: NodeJS.ProcessEnv = {
+      ...processEnv,
+      CLAUDE_CODE_USE_GEMINI: '1',
+    }
+
+    delete env.CLAUDE_CODE_USE_OPENAI
+
+    env.GEMINI_MODEL =
+      processEnv.GEMINI_MODEL ||
+      persistedEnv.GEMINI_MODEL ||
+      DEFAULT_GEMINI_MODEL
+    env.GEMINI_BASE_URL =
+      processEnv.GEMINI_BASE_URL ||
+      persistedEnv.GEMINI_BASE_URL ||
+      DEFAULT_GEMINI_BASE_URL
+
+    const geminiKey = shellGeminiKey || persistedGeminiKey
+    if (geminiKey) {
+      env.GEMINI_API_KEY = geminiKey
+    } else {
+      delete env.GEMINI_API_KEY
+    }
+
+    delete env.GOOGLE_API_KEY
+    delete env.OPENAI_BASE_URL
+    delete env.OPENAI_MODEL
+    delete env.OPENAI_API_KEY
+    delete env.CODEX_API_KEY
+    delete env.CHATGPT_ACCOUNT_ID
+    delete env.CODEX_ACCOUNT_ID
+
+    return env
+  }
+
+  const env: NodeJS.ProcessEnv = {
+    ...processEnv,
+    CLAUDE_CODE_USE_OPENAI: '1',
+  }
+
+  delete env.CLAUDE_CODE_USE_GEMINI
+  delete env.GEMINI_API_KEY
+  delete env.GEMINI_MODEL
+  delete env.GEMINI_BASE_URL
+  delete env.GOOGLE_API_KEY
+
+  if (options.profile === 'ollama') {
+    const getOllamaBaseUrl =
+      options.getOllamaChatBaseUrl ?? (() => 'http://localhost:11434/v1')
+    const resolveOllamaModel =
+      options.resolveOllamaDefaultModel ?? (async () => 'llama3.1:8b')
+
+    env.OPENAI_BASE_URL = persistedEnv.OPENAI_BASE_URL || getOllamaBaseUrl()
+    env.OPENAI_MODEL =
+      persistedEnv.OPENAI_MODEL ||
+      (await resolveOllamaModel(options.goal))
+
+    delete env.OPENAI_API_KEY
+    delete env.CODEX_API_KEY
+    delete env.CHATGPT_ACCOUNT_ID
+    delete env.CODEX_ACCOUNT_ID
+
+    return env
+  }
+
+  if (options.profile === 'codex') {
+    env.OPENAI_BASE_URL =
+      persistedEnv.OPENAI_BASE_URL && isCodexBaseUrl(persistedEnv.OPENAI_BASE_URL)
+        ? persistedEnv.OPENAI_BASE_URL
+        : DEFAULT_CODEX_BASE_URL
+    env.OPENAI_MODEL = persistedEnv.OPENAI_MODEL || 'codexplan'
+    delete env.OPENAI_API_KEY
+
+    const codexKey =
+      sanitizeApiKey(processEnv.CODEX_API_KEY) ||
+      sanitizeApiKey(persistedEnv.CODEX_API_KEY)
+    const liveCodexCredentials = resolveCodexApiCredentials(processEnv)
+    const codexAccountId =
+      processEnv.CHATGPT_ACCOUNT_ID ||
+      processEnv.CODEX_ACCOUNT_ID ||
+      liveCodexCredentials.accountId ||
+      persistedEnv.CHATGPT_ACCOUNT_ID ||
+      persistedEnv.CODEX_ACCOUNT_ID
+    if (codexKey) {
+      env.CODEX_API_KEY = codexKey
+    } else {
+      delete env.CODEX_API_KEY
+    }
+
+    if (codexAccountId) {
+      env.CHATGPT_ACCOUNT_ID = codexAccountId
+    } else {
+      delete env.CHATGPT_ACCOUNT_ID
+    }
+    delete env.CODEX_ACCOUNT_ID
+
+    return env
+  }
+
+  const defaultOpenAIModel = getGoalDefaultOpenAIModel(options.goal)
+  const shellOpenAIRequest = resolveProviderRequest({
+    model: processEnv.OPENAI_MODEL,
+    baseUrl: processEnv.OPENAI_BASE_URL,
+    fallbackModel: defaultOpenAIModel,
+  })
+  const persistedOpenAIRequest = resolveProviderRequest({
+    model: persistedEnv.OPENAI_MODEL,
+    baseUrl: persistedEnv.OPENAI_BASE_URL,
+    fallbackModel: defaultOpenAIModel,
+  })
+  const useShellOpenAIConfig = shellOpenAIRequest.transport === 'chat_completions'
+  const usePersistedOpenAIConfig =
+    (!persistedEnv.OPENAI_MODEL && !persistedEnv.OPENAI_BASE_URL) ||
+    persistedOpenAIRequest.transport === 'chat_completions'
+
+  env.OPENAI_BASE_URL =
+    (useShellOpenAIConfig ? processEnv.OPENAI_BASE_URL : undefined) ||
+    (usePersistedOpenAIConfig ? persistedEnv.OPENAI_BASE_URL : undefined) ||
+    DEFAULT_OPENAI_BASE_URL
+  env.OPENAI_MODEL =
+    (useShellOpenAIConfig ? processEnv.OPENAI_MODEL : undefined) ||
+    (usePersistedOpenAIConfig ? persistedEnv.OPENAI_MODEL : undefined) ||
+    defaultOpenAIModel
+  env.OPENAI_API_KEY = processEnv.OPENAI_API_KEY || persistedEnv.OPENAI_API_KEY
+  delete env.CODEX_API_KEY
+  delete env.CHATGPT_ACCOUNT_ID
+  delete env.CODEX_ACCOUNT_ID
+  return env
+}
--- a/src/utils/providerRecommendation.test.ts
+++ b/src/utils/providerRecommendation.test.ts
@@ -0,0 +1,194 @@
+import assert from 'node:assert/strict'
+import test from 'node:test'
+
+import {
+  applyBenchmarkLatency,
+  getGoalDefaultOpenAIModel,
+  normalizeRecommendationGoal,
+  rankOllamaModels,
+  recommendOllamaModel,
+  type OllamaModelDescriptor,
+} from './providerRecommendation.ts'
+
+function model(
+  name: string,
+  overrides: Partial<OllamaModelDescriptor> = {},
+): OllamaModelDescriptor {
+  return {
+    name,
+    sizeBytes: null,
+    family: null,
+    families: [],
+    parameterSize: null,
+    quantizationLevel: null,
+    ...overrides,
+  }
+}
+
+test('normalizes recommendation goals safely', () => {
+  assert.equal(normalizeRecommendationGoal('coding'), 'coding')
+  assert.equal(normalizeRecommendationGoal(' LATENCY '), 'latency')
+  assert.equal(normalizeRecommendationGoal('weird'), 'balanced')
+  assert.equal(normalizeRecommendationGoal(undefined), 'balanced')
+})
+
+test('coding goal prefers coding-oriented ollama models', () => {
+  const recommended = recommendOllamaModel(
+    [
+      model('llama3.1:8b', {
+        parameterSize: '8B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+      model('qwen2.5-coder:7b', {
+        parameterSize: '7B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+    ],
+    'coding',
+  )
+
+  assert.equal(recommended?.name, 'qwen2.5-coder:7b')
+})
+
+test('latency goal prefers smaller models', () => {
+  const recommended = recommendOllamaModel(
+    [
+      model('llama3.1:70b', {
+        parameterSize: '70B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+      model('llama3.2:3b', {
+        parameterSize: '3B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+    ],
+    'latency',
+  )
+
+  assert.equal(recommended?.name, 'llama3.2:3b')
+})
+
+test('non-chat embedding models are heavily demoted', () => {
+  const ranked = rankOllamaModels(
+    [
+      model('nomic-embed-text', { parameterSize: '0.5B' }),
+      model('mistral:7b-instruct', {
+        parameterSize: '7B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+    ],
+    'balanced',
+  )
+
+  assert.equal(ranked[0]?.name, 'mistral:7b-instruct')
+})
+
+test('auto-pick ignores non-chat ollama models', () => {
+  const recommended = recommendOllamaModel(
+    [
+      model('nomic-embed-text', { parameterSize: '0.5B' }),
+      model('bge-reranker-v2', { parameterSize: '1.5B' }),
+      model('whisper-large-v3', { parameterSize: '1.6B' }),
+    ],
+    'balanced',
+  )
+
+  assert.equal(recommended, null)
+})
+
+test('benchmark latency can reorder close recommendations', () => {
+  const ranked = rankOllamaModels(
+    [
+      model('llama3.1:8b', {
+        parameterSize: '8B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+      model('mistral:7b-instruct', {
+        parameterSize: '7B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+    ],
+    'latency',
+  )
+
+  const benchmarked = applyBenchmarkLatency(
+    ranked,
+    {
+      'llama3.1:8b': 2000,
+      'mistral:7b-instruct': 350,
+    },
+    'latency',
+  )
+
+  assert.equal(benchmarked[0]?.name, 'mistral:7b-instruct')
+  assert.equal(benchmarked[0]?.benchmarkMs, 350)
+})
+
+test('unbenchmarked models stay behind benchmarked candidates', () => {
+  const ranked = rankOllamaModels(
+    [
+      model('phi4-mini:4b', {
+        parameterSize: '4B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+      model('mistral:7b-instruct', {
+        parameterSize: '7B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+      model('llama3.1:8b', {
+        parameterSize: '8B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+      model('qwen2.5:14b', {
+        parameterSize: '14B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+    ],
+    'latency',
+  )
+
+  const benchmarked = applyBenchmarkLatency(
+    ranked,
+    {
+      'phi4-mini:4b': 2400,
+      'mistral:7b-instruct': 2200,
+      'llama3.1:8b': 2100,
+    },
+    'latency',
+  )
+
+  assert.ok(benchmarked.slice(0, 3).every(item => item.benchmarkMs !== null))
+  assert.equal(benchmarked[3]?.name, 'qwen2.5:14b')
+  assert.equal(benchmarked[3]?.benchmarkMs, null)
+})
+
+test('coding goal recognizes codestral and devstral families', () => {
+  const ranked = rankOllamaModels(
+    [
+      model('mistral:7b-instruct', {
+        parameterSize: '7B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+      model('codestral:22b', {
+        parameterSize: '22B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+      model('devstral:24b', {
+        parameterSize: '24B',
+        quantizationLevel: 'Q4_K_M',
+      }),
+    ],
+    'coding',
+  )
+
+  assert.deepEqual(ranked.slice(0, 2).map(item => item.name), [
+    'devstral:24b',
+    'codestral:22b',
+  ])
+})
+
+test('goal defaults choose sensible openai models', () => {
+  assert.equal(getGoalDefaultOpenAIModel('latency'), 'gpt-4o-mini')
+  assert.equal(getGoalDefaultOpenAIModel('balanced'), 'gpt-4o')
+  assert.equal(getGoalDefaultOpenAIModel('coding'), 'gpt-4o')
+})
--- a/src/utils/providerRecommendation.ts
+++ b/src/utils/providerRecommendation.ts
@@ -0,0 +1,317 @@
+export type RecommendationGoal = 'latency' | 'balanced' | 'coding'
+
+export type OllamaModelDescriptor = {
+  name: string
+  sizeBytes?: number | null
+  family?: string | null
+  families?: string[]
+  parameterSize?: string | null
+  quantizationLevel?: string | null
+}
+
+export type RankedOllamaModel = OllamaModelDescriptor & {
+  score: number
+  reasons: string[]
+  summary: string
+}
+
+export type BenchmarkedOllamaModel = RankedOllamaModel & {
+  benchmarkMs: number | null
+}
+
+const CODING_HINTS = [
+  'coder',
+  'codellama',
+  'codegemma',
+  'codestral',
+  'devstral',
+  'starcoder',
+  'deepseek-coder',
+  'qwen2.5-coder',
+  'qwen-coder',
+]
+
+const GENERAL_HINTS = [
+  'llama',
+  'qwen',
+  'mistral',
+  'gemma',
+  'phi',
+  'deepseek',
+]
+
+const INSTRUCT_HINTS = ['instruct', 'chat', 'assistant']
+const NON_CHAT_HINTS = ['embed', 'embedding', 'rerank', 'bge', 'whisper']
+
+function modelHaystack(model: OllamaModelDescriptor): string {
+  return [
+    model.name,
+    model.family ?? '',
+    ...(model.families ?? []),
+    model.parameterSize ?? '',
+    model.quantizationLevel ?? '',
+  ]
+    .join(' ')
+    .toLowerCase()
+}
+
+function includesAny(text: string, needles: string[]): boolean {
+  return needles.some(needle => text.includes(needle))
+}
+
+export function isViableOllamaChatModel(model: OllamaModelDescriptor): boolean {
+  return !includesAny(modelHaystack(model), NON_CHAT_HINTS)
+}
+
+export function selectRecommendedOllamaModel<
+  T extends OllamaModelDescriptor,
+>(models: T[]): T | null {
+  return models.find(isViableOllamaChatModel) ?? null
+}
+
+function inferParameterBillions(model: OllamaModelDescriptor): number | null {
+  const text = `${model.parameterSize ?? ''} ${model.name}`.toLowerCase()
+  const match = text.match(/(\d+(?:\.\d+)?)\s*b\b/)
+  if (match?.[1]) {
+    return Number(match[1])
+  }
+  if (typeof model.sizeBytes === 'number' && model.sizeBytes > 0) {
+    return Number((model.sizeBytes / 1_000_000_000).toFixed(1))
+  }
+  return null
+}
+
+function quantizationBucket(model: OllamaModelDescriptor): string {
+  return (model.quantizationLevel ?? model.name).toLowerCase()
+}
+
+function scoreSizeTier(
+  paramsB: number | null,
+  goal: RecommendationGoal,
+  reasons: string[],
+): number {
+  if (paramsB === null) {
+    reasons.push('unknown size')
+    return 0
+  }
+
+  if (goal === 'latency') {
+    if (paramsB <= 4) {
+      reasons.push('tiny model for low latency')
+      return 32
+    }
+    if (paramsB <= 8) {
+      reasons.push('small model for fast responses')
+      return 26
+    }
+    if (paramsB <= 14) {
+      reasons.push('mid-sized model with acceptable latency')
+      return 16
+    }
+    if (paramsB <= 24) {
+      reasons.push('larger model may be slower')
+      return 8
+    }
+    reasons.push('large model likely slower locally')
+    return paramsB <= 40 ? 0 : -8
+  }
+
+  if (goal === 'coding') {
+    if (paramsB >= 7 && paramsB <= 14) {
+      reasons.push('strong coding size tier')
+      return 24
+    }
+    if (paramsB > 14 && paramsB <= 34) {
+      reasons.push('large coding-capable size tier')
+      return 28
+    }
+    if (paramsB > 34) {
+      reasons.push('very large model with higher quality potential')
+      return 18
+    }
+    reasons.push('compact model may trade off coding depth')
+    return 12
+  }
+
+  if (paramsB >= 7 && paramsB <= 14) {
+    reasons.push('great balanced size tier')
+    return 26
+  }
+  if (paramsB >= 3 && paramsB < 7) {
+    reasons.push('compact balanced size tier')
+    return 18
+  }
+  if (paramsB > 14 && paramsB <= 24) {
+    reasons.push('high quality balanced size tier')
+    return 20
+  }
+  if (paramsB > 24) {
+    reasons.push('large model for quality-first usage')
+    return 10
+  }
+  reasons.push('very small model for general usage')
+  return 8
+}
+
+function scoreQuantization(
+  model: OllamaModelDescriptor,
+  goal: RecommendationGoal,
+  reasons: string[],
+): number {
+  const quant = quantizationBucket(model)
+  if (quant.includes('q4')) {
+    reasons.push('efficient Q4 quantization')
+    return goal === 'latency' ? 8 : 4
+  }
+  if (quant.includes('q5')) {
+    reasons.push('balanced Q5 quantization')
+    return goal === 'latency' ? 6 : 5
+  }
+  if (quant.includes('q8')) {
+    reasons.push('higher quality Q8 quantization')
+    return goal === 'latency' ? 2 : 5
+  }
+  return 0
+}
+
+function compareRankedModels(
+  a: RankedOllamaModel | BenchmarkedOllamaModel,
+  b: RankedOllamaModel | BenchmarkedOllamaModel,
+  goal: RecommendationGoal,
+): number {
+  if (b.score !== a.score) {
+    return b.score - a.score
+  }
+
+  const aSize = inferParameterBillions(a) ?? Number.POSITIVE_INFINITY
+  const bSize = inferParameterBillions(b) ?? Number.POSITIVE_INFINITY
+
+  if (goal === 'latency') {
+    return aSize - bSize
+  }
+
+  if (goal === 'coding') {
+    return bSize - aSize
+  }
+
+  const target = 14
+  return Math.abs(aSize - target) - Math.abs(bSize - target)
+}
+
+export function normalizeRecommendationGoal(
+  goal: string | null | undefined,
+): RecommendationGoal {
+  const normalized = goal?.trim().toLowerCase()
+  if (
+    normalized === 'latency' ||
+    normalized === 'balanced' ||
+    normalized === 'coding'
+  ) {
+    return normalized
+  }
+  return 'balanced'
+}
+
+export function getGoalDefaultOpenAIModel(goal: RecommendationGoal): string {
+  switch (goal) {
+    case 'latency':
+      return 'gpt-4o-mini'
+    case 'coding':
+      return 'gpt-4o'
+    case 'balanced':
+    default:
+      return 'gpt-4o'
+  }
+}
+
+export function rankOllamaModels(
+  models: OllamaModelDescriptor[],
+  goal: RecommendationGoal,
+): RankedOllamaModel[] {
+  return models
+    .map(model => {
+      const haystack = modelHaystack(model)
+      const reasons: string[] = []
+      let score = 0
+
+      if (includesAny(haystack, NON_CHAT_HINTS)) {
+        score -= 40
+        reasons.push('not a chat-first model')
+      }
+
+      if (includesAny(haystack, CODING_HINTS)) {
+        score += goal === 'coding' ? 24 : goal === 'balanced' ? 10 : 4
+        reasons.push('coding-oriented model family')
+      }
+
+      if (includesAny(haystack, GENERAL_HINTS)) {
+        score += goal === 'latency' ? 4 : goal === 'coding' ? 6 : 8
+        reasons.push('strong general-purpose model family')
+      }
+
+      if (includesAny(haystack, INSTRUCT_HINTS)) {
+        score += goal === 'latency' ? 2 : 6
+        reasons.push('chat/instruct tuned')
+      }
+
+      if (haystack.includes('vision') || haystack.includes('vl')) {
+        score -= 2
+        reasons.push('vision model adds extra overhead')
+      }
+
+      score += scoreSizeTier(inferParameterBillions(model), goal, reasons)
+      score += scoreQuantization(model, goal, reasons)
+
+      const summary = reasons.slice(0, 3).join(', ')
+      return {
+        ...model,
+        score,
+        reasons,
+        summary,
+      }
+    })
+    .sort((a, b) => compareRankedModels(a, b, goal))
+}
+
+export function recommendOllamaModel(
+  models: OllamaModelDescriptor[],
+  goal: RecommendationGoal,
+): RankedOllamaModel | null {
+  return selectRecommendedOllamaModel(rankOllamaModels(models, goal))
+}
+
+export function applyBenchmarkLatency(
+  models: RankedOllamaModel[],
+  benchmarkMs: Record<string, number | null>,
+  goal: RecommendationGoal,
+): BenchmarkedOllamaModel[] {
+  const divisor =
+    goal === 'latency' ? 120 : goal === 'coding' ? 500 : 240
+
+  const scoredModels = models
+    .map(model => {
+      const latency = benchmarkMs[model.name] ?? null
+      const benchmarkPenalty = latency === null ? 0 : latency / divisor
+      const reasons =
+        latency === null
+          ? model.reasons
+          : [`benchmarked at ${Math.round(latency)}ms`, ...model.reasons]
+
+      return {
+        ...model,
+        benchmarkMs: latency,
+        reasons,
+        summary: reasons.slice(0, 3).join(', '),
+        score: Number((model.score - benchmarkPenalty).toFixed(2)),
+      }
+    })
+
+  const benchmarkedModels = scoredModels.filter(model => model.benchmarkMs !== null)
+  if (benchmarkedModels.length === 0) {
+    return scoredModels.sort((a, b) => compareRankedModels(a, b, goal))
+  }
+
+  const unbenchmarkedModels = scoredModels.filter(model => model.benchmarkMs === null)
+  benchmarkedModels.sort((a, b) => compareRankedModels(a, b, goal))
+  return [...benchmarkedModels, ...unbenchmarkedModels]
+}