feat(api): smart model routing primitive (cheap-for-simple, strong-for-hard) (#785)

Most everyday turns ("ok", "thanks", "yep go ahead", "what does that do?") get no measurable quality improvement from Opus-tier models over Haiku-tier, but cost ~10x more and stream slower. Smart routing opts a user into automatically routing obviously-simple turns to a cheaper model while keeping the strong model for anything non-trivial. New module src/services/api/smartModelRouting.ts: - routeModel(input, config) → { model, complexity, reason } - Pure primitive: no env reads, no state, caller supplies everything. - Config is opt-in (enabled: false by default). Routes to strong (conservative) when ANY of: - First turn of session (task-setup is worth the quality) - Code fence or inline code span present - Reasoning/planning keyword (plan, design, refactor, debug, architect, investigate, root cause, etc. — 20+ anchors) - Multi-paragraph input - Over char/word cutoff (defaults: 160 chars, 28 words; matches hermes) Routes to simple only for clearly-trivial chatter. Decision includes a reason string for a future UI indicator that shows which tier handled the turn. Integration into query path is intentionally deferred to a follow-up PR so the heuristics can be reviewed and tuned in isolation first. Co-authored-by: OpenClaude <openclaude@gitlawb.com>
2026-04-21 21:50:24 +08:00
parent b95d2221df
commit e908864da7
2 changed files with 406 additions and 0 deletions
--- a/src/services/api/smartModelRouting.test.ts
+++ b/src/services/api/smartModelRouting.test.ts
@@ -0,0 +1,191 @@
 import { describe, expect, test } from 'bun:test'
 import {
  routeModel,
  type SmartRoutingConfig,
 } from './smartModelRouting.ts'
 const ENABLED: SmartRoutingConfig = {
  enabled: true,
  simpleModel: 'claude-haiku-4-5',
  strongModel: 'claude-opus-4-7',
 }
 describe('routeModel — disabled / misconfigured', () => {
  test('disabled config routes to strong', () => {
    const decision = routeModel(
      { userText: 'hi' },
      { ...ENABLED, enabled: false },
    )
    expect(decision.model).toBe('claude-opus-4-7')
    expect(decision.complexity).toBe('strong')
    expect(decision.reason).toContain('disabled')
  })
  test('missing simpleModel falls back to strong', () => {
    const decision = routeModel(
      { userText: 'hi' },
      { ...ENABLED, simpleModel: '' },
    )
    expect(decision.model).toBe('claude-opus-4-7')
    expect(decision.complexity).toBe('strong')
  })
  test('simpleModel === strongModel routes to strong (no-op)', () => {
    const decision = routeModel(
      { userText: 'hi' },
      { ...ENABLED, simpleModel: 'claude-opus-4-7' },
    )
    expect(decision.model).toBe('claude-opus-4-7')
    expect(decision.complexity).toBe('strong')
  })
 })
 describe('routeModel — simple path', () => {
  test('short greeting routes to simple', () => {
    const decision = routeModel({ userText: 'thanks!', turnNumber: 5 }, ENABLED)
    expect(decision.model).toBe('claude-haiku-4-5')
    expect(decision.complexity).toBe('simple')
  })
  test('empty input routes to simple', () => {
    const decision = routeModel({ userText: '   ' }, ENABLED)
    expect(decision.model).toBe('claude-haiku-4-5')
    expect(decision.complexity).toBe('simple')
  })
  test('mid-length chatter routes to simple', () => {
    const decision = routeModel(
      { userText: 'yep looks good, go ahead', turnNumber: 10 },
      ENABLED,
    )
    expect(decision.complexity).toBe('simple')
  })
 })
 describe('routeModel — strong path', () => {
  test('first turn always routes to strong, even when short', () => {
    const decision = routeModel(
      { userText: 'fix the bug', turnNumber: 1 },
      ENABLED,
    )
    expect(decision.model).toBe('claude-opus-4-7')
    expect(decision.complexity).toBe('strong')
    expect(decision.reason).toContain('first turn')
  })
  test('code fence routes to strong', () => {
    const decision = routeModel(
      {
        userText: 'change this:\n```\nfoo()\n```',
        turnNumber: 5,
      },
      ENABLED,
    )
    expect(decision.complexity).toBe('strong')
    expect(decision.reason).toContain('code')
  })
  test('inline code span routes to strong', () => {
    const decision = routeModel(
      { userText: 'rename `foo` to `bar`', turnNumber: 5 },
      ENABLED,
    )
    expect(decision.complexity).toBe('strong')
  })
  test('reasoning keyword "plan" routes to strong even when short', () => {
    const decision = routeModel(
      { userText: 'plan the refactor', turnNumber: 5 },
      ENABLED,
    )
    expect(decision.complexity).toBe('strong')
    expect(decision.reason).toContain('keyword')
  })
  test('reasoning keyword "debug" routes to strong', () => {
    const decision = routeModel(
      { userText: 'debug the test', turnNumber: 5 },
      ENABLED,
    )
    expect(decision.complexity).toBe('strong')
  })
  test('"root cause" multi-word keyword routes to strong', () => {
    const decision = routeModel(
      { userText: 'find the root cause', turnNumber: 5 },
      ENABLED,
    )
    expect(decision.complexity).toBe('strong')
  })
  test('multi-paragraph input routes to strong', () => {
    const decision = routeModel(
      {
        userText: 'first thought.\n\nsecond thought.',
        turnNumber: 5,
      },
      ENABLED,
    )
    expect(decision.complexity).toBe('strong')
    expect(decision.reason).toContain('multi-paragraph')
  })
  test('over-long input routes to strong', () => {
    const long = 'ok '.repeat(100) // ~300 chars, 100 words
    const decision = routeModel(
      { userText: long, turnNumber: 5 },
      ENABLED,
    )
    expect(decision.complexity).toBe('strong')
  })
  test('exactly at the boundary stays simple', () => {
    const text = 'a'.repeat(160)
    const decision = routeModel(
      { userText: text, turnNumber: 5 },
      { ...ENABLED, simpleMaxChars: 160, simpleMaxWords: 28 },
    )
    expect(decision.complexity).toBe('simple')
  })
  test('one char over the boundary routes to strong', () => {
    const text = 'a'.repeat(161)
    const decision = routeModel(
      { userText: text, turnNumber: 5 },
      { ...ENABLED, simpleMaxChars: 160, simpleMaxWords: 28 },
    )
    expect(decision.complexity).toBe('strong')
    expect(decision.reason).toContain('160 chars')
  })
 })
 describe('routeModel — config overrides', () => {
  test('custom simpleMaxChars is honored', () => {
    const decision = routeModel(
      { userText: 'abcdefghijklmnop', turnNumber: 5 },
      { ...ENABLED, simpleMaxChars: 10 },
    )
    expect(decision.complexity).toBe('strong')
    expect(decision.reason).toContain('10 chars')
  })
  test('custom simpleMaxWords is honored', () => {
    const decision = routeModel(
      { userText: 'one two three four five', turnNumber: 5 },
      { ...ENABLED, simpleMaxWords: 3 },
    )
    expect(decision.complexity).toBe('strong')
    expect(decision.reason).toContain('3 words')
  })
 })
 describe('routeModel — reason strings', () => {
  test('simple decisions include char + word counts', () => {
    const decision = routeModel(
      { userText: 'sounds good', turnNumber: 5 },
      ENABLED,
    )
    expect(decision.reason).toMatch(/\d+ chars, \d+ words/)
  })
 })
--- a/src/services/api/smartModelRouting.ts
+++ b/src/services/api/smartModelRouting.ts
@@ -0,0 +1,215 @@
 /**
 * Smart model routing — cheap-for-simple, strong-for-hard.
 *
 * For everyday short chatter ("ok", "thanks", "what does this do?") the
 * incremental quality of Opus/GPT-5 over Haiku/Mini is negligible while the
 * cost and latency are an order of magnitude worse. Smart routing opts a
 * user into routing such "obviously simple" turns to a cheaper model while
 * keeping the strong model for the anything-non-trivial path.
 *
 * This module is a pure primitive: it takes a turn description (the user's
 * text + light context) and returns which model to use, based on config.
 * It never reads env vars or state directly — caller supplies everything.
 *
 * Off by default. Users opt in via settings.smartRouting.enabled. Intent:
 * make this a copy-paste-small config block rather than a hidden heuristic,
 * so the tradeoff is visible and the user controls it.
 */
 export type SmartRoutingConfig = {
  enabled: boolean
  /** Model to use for turns classified as "simple". */
  simpleModel: string
  /** Model to use for turns classified as "strong" (or when unsure). */
  strongModel: string
  /** Max characters in user input to qualify as "simple". Default 160. */
  simpleMaxChars?: number
  /** Max whitespace-separated words to qualify as "simple". Default 28. */
  simpleMaxWords?: number
 }
 export type RoutingDecision = {
  model: string
  complexity: 'simple' | 'strong'
  /** Human-readable reason — useful for the UI indicator and debug logs. */
  reason: string
 }
 export type RoutingInput = {
  /** The user's message text for this turn. */
  userText: string
  /**
   * Optional: how many tool-use blocks the assistant has emitted in the
   * recent conversation. High values correlate with "continue this work"
   * follow-ups that can still be cheap, UNLESS the user also typed code
   * or strong-keyword text.
   */
  recentToolUses?: number
  /**
   * Optional: turn number within the current session (1-indexed). The first
   * turn is often task-setup and benefits from the strong model even if
   * short — a bare "build X" opens the whole task.
   */
  turnNumber?: number
 }
 const DEFAULT_SIMPLE_MAX_CHARS = 160
 const DEFAULT_SIMPLE_MAX_WORDS = 28
 // Keywords that strongly suggest reasoning/planning/design work.
 // Matching is word-boundary / case-insensitive. Must include enough anchors
 // that short prompts like "plan the refactor" route to strong even under
 // the char/word cutoff.
 const STRONG_KEYWORDS = [
  'plan',
  'design',
  'architect',
  'architecture',
  'refactor',
  'debug',
  'investigate',
  'analyze',
  'analyse',
  'implement',
  'optimize',
  'optimise',
  'review',
  'audit',
  'diagnose',
  'root cause',
  'root-cause',
  'why does',
  'why is',
  'how should',
  'why did',
  'propose',
  'trace',
  'reproduce',
 ]
 const STRONG_KEYWORD_RE = new RegExp(
  `\\b(?:${STRONG_KEYWORDS.map(k => k.replace(/[-]/g, '[-\\s]')).join('|')})\\b`,
  'i',
 )
 const CODE_FENCE_RE = /```[\s\S]*?```|`[^`\n]+`/
 function countWords(text: string): number {
  const trimmed = text.trim()
  if (!trimmed) return 0
  return trimmed.split(/\s+/).length
 }
 function hasMultiParagraph(text: string): boolean {
  return /\n\s*\n/.test(text)
 }
 function hasCode(text: string): boolean {
  return CODE_FENCE_RE.test(text)
 }
 function hasStrongKeyword(text: string): boolean {
  return STRONG_KEYWORD_RE.test(text)
 }
 /**
 * Decide whether to route to the simple or strong model based on heuristics.
 * Returns the chosen model + a reason. When routing is disabled or both
 * models match, the strong model is used (safe default).
 */
 export function routeModel(
  input: RoutingInput,
  config: SmartRoutingConfig,
 ): RoutingDecision {
  if (!config.enabled) {
    return {
      model: config.strongModel,
      complexity: 'strong',
      reason: 'smart-routing disabled',
    }
  }
  if (!config.simpleModel || !config.strongModel) {
    return {
      model: config.strongModel,
      complexity: 'strong',
      reason: 'simpleModel or strongModel missing from config',
    }
  }
  if (config.simpleModel === config.strongModel) {
    return {
      model: config.strongModel,
      complexity: 'strong',
      reason: 'simpleModel equals strongModel',
    }
  }
  const text = input.userText ?? ''
  const trimmed = text.trim()
  if (!trimmed) {
    // Empty input (e.g. resuming a tool-use chain) — cheap by default.
    return {
      model: config.simpleModel,
      complexity: 'simple',
      reason: 'empty user text',
    }
  }
  // First turn of a session is task-setup — always use strong.
  if (input.turnNumber === 1) {
    return {
      model: config.strongModel,
      complexity: 'strong',
      reason: 'first turn of session',
    }
  }
  const maxChars = config.simpleMaxChars ?? DEFAULT_SIMPLE_MAX_CHARS
  const maxWords = config.simpleMaxWords ?? DEFAULT_SIMPLE_MAX_WORDS
  if (hasCode(trimmed)) {
    return {
      model: config.strongModel,
      complexity: 'strong',
      reason: 'contains code block or inline code',
    }
  }
  if (hasStrongKeyword(trimmed)) {
    return {
      model: config.strongModel,
      complexity: 'strong',
      reason: 'contains reasoning/planning keyword',
    }
  }
  if (hasMultiParagraph(trimmed)) {
    return {
      model: config.strongModel,
      complexity: 'strong',
      reason: 'multi-paragraph input',
    }
  }
  if (trimmed.length > maxChars) {
    return {
      model: config.strongModel,
      complexity: 'strong',
      reason: `input > ${maxChars} chars`,
    }
  }
  if (countWords(trimmed) > maxWords) {
    return {
      model: config.strongModel,
      complexity: 'strong',
      reason: `input > ${maxWords} words`,
    }
  }
  return {
    model: config.simpleModel,
    complexity: 'simple',
    reason: `short (${trimmed.length} chars, ${countWords(trimmed)} words)`,
  }
 }