feat(api): smart model routing primitive (cheap-for-simple, strong-for-hard) (#785)

Most everyday turns ("ok", "thanks", "yep go ahead", "what does that do?") get no measurable quality improvement from Opus-tier models over Haiku-tier, but cost ~10x more and stream slower. Smart routing opts a user into automatically routing obviously-simple turns to a cheaper model while keeping the strong model for anything non-trivial. New module src/services/api/smartModelRouting.ts: - routeModel(input, config) → { model, complexity, reason } - Pure primitive: no env reads, no state, caller supplies everything. - Config is opt-in (enabled: false by default). Routes to strong (conservative) when ANY of: - First turn of session (task-setup is worth the quality) - Code fence or inline code span present - Reasoning/planning keyword (plan, design, refactor, debug, architect, investigate, root cause, etc. — 20+ anchors) - Multi-paragraph input - Over char/word cutoff (defaults: 160 chars, 28 words; matches hermes) Routes to simple only for clearly-trivial chatter. Decision includes a reason string for a future UI indicator that shows which tier handled the turn. Integration into query path is intentionally deferred to a follow-up PR so the heuristics can be reviewed and tuned in isolation first. Co-authored-by: OpenClaude <openclaude@gitlawb.com>
2026-04-21 21:50:24 +08:00
parent b95d2221df
commit e908864da7
2 changed files with 406 additions and 0 deletions
--- a/src/services/api/smartModelRouting.test.ts
+++ b/src/services/api/smartModelRouting.test.ts
@@ -0,0 +1,191 @@
+import { describe, expect, test } from 'bun:test'
+
+import {
+  routeModel,
+  type SmartRoutingConfig,
+} from './smartModelRouting.ts'
+
+const ENABLED: SmartRoutingConfig = {
+  enabled: true,
+  simpleModel: 'claude-haiku-4-5',
+  strongModel: 'claude-opus-4-7',
+}
+
+describe('routeModel — disabled / misconfigured', () => {
+  test('disabled config routes to strong', () => {
+    const decision = routeModel(
+      { userText: 'hi' },
+      { ...ENABLED, enabled: false },
+    )
+    expect(decision.model).toBe('claude-opus-4-7')
+    expect(decision.complexity).toBe('strong')
+    expect(decision.reason).toContain('disabled')
+  })
+
+  test('missing simpleModel falls back to strong', () => {
+    const decision = routeModel(
+      { userText: 'hi' },
+      { ...ENABLED, simpleModel: '' },
+    )
+    expect(decision.model).toBe('claude-opus-4-7')
+    expect(decision.complexity).toBe('strong')
+  })
+
+  test('simpleModel === strongModel routes to strong (no-op)', () => {
+    const decision = routeModel(
+      { userText: 'hi' },
+      { ...ENABLED, simpleModel: 'claude-opus-4-7' },
+    )
+    expect(decision.model).toBe('claude-opus-4-7')
+    expect(decision.complexity).toBe('strong')
+  })
+})
+
+describe('routeModel — simple path', () => {
+  test('short greeting routes to simple', () => {
+    const decision = routeModel({ userText: 'thanks!', turnNumber: 5 }, ENABLED)
+    expect(decision.model).toBe('claude-haiku-4-5')
+    expect(decision.complexity).toBe('simple')
+  })
+
+  test('empty input routes to simple', () => {
+    const decision = routeModel({ userText: '   ' }, ENABLED)
+    expect(decision.model).toBe('claude-haiku-4-5')
+    expect(decision.complexity).toBe('simple')
+  })
+
+  test('mid-length chatter routes to simple', () => {
+    const decision = routeModel(
+      { userText: 'yep looks good, go ahead', turnNumber: 10 },
+      ENABLED,
+    )
+    expect(decision.complexity).toBe('simple')
+  })
+})
+
+describe('routeModel — strong path', () => {
+  test('first turn always routes to strong, even when short', () => {
+    const decision = routeModel(
+      { userText: 'fix the bug', turnNumber: 1 },
+      ENABLED,
+    )
+    expect(decision.model).toBe('claude-opus-4-7')
+    expect(decision.complexity).toBe('strong')
+    expect(decision.reason).toContain('first turn')
+  })
+
+  test('code fence routes to strong', () => {
+    const decision = routeModel(
+      {
+        userText: 'change this:\n```\nfoo()\n```',
+        turnNumber: 5,
+      },
+      ENABLED,
+    )
+    expect(decision.complexity).toBe('strong')
+    expect(decision.reason).toContain('code')
+  })
+
+  test('inline code span routes to strong', () => {
+    const decision = routeModel(
+      { userText: 'rename `foo` to `bar`', turnNumber: 5 },
+      ENABLED,
+    )
+    expect(decision.complexity).toBe('strong')
+  })
+
+  test('reasoning keyword "plan" routes to strong even when short', () => {
+    const decision = routeModel(
+      { userText: 'plan the refactor', turnNumber: 5 },
+      ENABLED,
+    )
+    expect(decision.complexity).toBe('strong')
+    expect(decision.reason).toContain('keyword')
+  })
+
+  test('reasoning keyword "debug" routes to strong', () => {
+    const decision = routeModel(
+      { userText: 'debug the test', turnNumber: 5 },
+      ENABLED,
+    )
+    expect(decision.complexity).toBe('strong')
+  })
+
+  test('"root cause" multi-word keyword routes to strong', () => {
+    const decision = routeModel(
+      { userText: 'find the root cause', turnNumber: 5 },
+      ENABLED,
+    )
+    expect(decision.complexity).toBe('strong')
+  })
+
+  test('multi-paragraph input routes to strong', () => {
+    const decision = routeModel(
+      {
+        userText: 'first thought.\n\nsecond thought.',
+        turnNumber: 5,
+      },
+      ENABLED,
+    )
+    expect(decision.complexity).toBe('strong')
+    expect(decision.reason).toContain('multi-paragraph')
+  })
+
+  test('over-long input routes to strong', () => {
+    const long = 'ok '.repeat(100) // ~300 chars, 100 words
+    const decision = routeModel(
+      { userText: long, turnNumber: 5 },
+      ENABLED,
+    )
+    expect(decision.complexity).toBe('strong')
+  })
+
+  test('exactly at the boundary stays simple', () => {
+    const text = 'a'.repeat(160)
+    const decision = routeModel(
+      { userText: text, turnNumber: 5 },
+      { ...ENABLED, simpleMaxChars: 160, simpleMaxWords: 28 },
+    )
+    expect(decision.complexity).toBe('simple')
+  })
+
+  test('one char over the boundary routes to strong', () => {
+    const text = 'a'.repeat(161)
+    const decision = routeModel(
+      { userText: text, turnNumber: 5 },
+      { ...ENABLED, simpleMaxChars: 160, simpleMaxWords: 28 },
+    )
+    expect(decision.complexity).toBe('strong')
+    expect(decision.reason).toContain('160 chars')
+  })
+})
+
+describe('routeModel — config overrides', () => {
+  test('custom simpleMaxChars is honored', () => {
+    const decision = routeModel(
+      { userText: 'abcdefghijklmnop', turnNumber: 5 },
+      { ...ENABLED, simpleMaxChars: 10 },
+    )
+    expect(decision.complexity).toBe('strong')
+    expect(decision.reason).toContain('10 chars')
+  })
+
+  test('custom simpleMaxWords is honored', () => {
+    const decision = routeModel(
+      { userText: 'one two three four five', turnNumber: 5 },
+      { ...ENABLED, simpleMaxWords: 3 },
+    )
+    expect(decision.complexity).toBe('strong')
+    expect(decision.reason).toContain('3 words')
+  })
+})
+
+describe('routeModel — reason strings', () => {
+  test('simple decisions include char + word counts', () => {
+    const decision = routeModel(
+      { userText: 'sounds good', turnNumber: 5 },
+      ENABLED,
+    )
+    expect(decision.reason).toMatch(/\d+ chars, \d+ words/)
+  })
+})
--- a/src/services/api/smartModelRouting.ts
+++ b/src/services/api/smartModelRouting.ts
@@ -0,0 +1,215 @@
+/**
+ * Smart model routing — cheap-for-simple, strong-for-hard.
+ *
+ * For everyday short chatter ("ok", "thanks", "what does this do?") the
+ * incremental quality of Opus/GPT-5 over Haiku/Mini is negligible while the
+ * cost and latency are an order of magnitude worse. Smart routing opts a
+ * user into routing such "obviously simple" turns to a cheaper model while
+ * keeping the strong model for the anything-non-trivial path.
+ *
+ * This module is a pure primitive: it takes a turn description (the user's
+ * text + light context) and returns which model to use, based on config.
+ * It never reads env vars or state directly — caller supplies everything.
+ *
+ * Off by default. Users opt in via settings.smartRouting.enabled. Intent:
+ * make this a copy-paste-small config block rather than a hidden heuristic,
+ * so the tradeoff is visible and the user controls it.
+ */
+
+export type SmartRoutingConfig = {
+  enabled: boolean
+  /** Model to use for turns classified as "simple". */
+  simpleModel: string
+  /** Model to use for turns classified as "strong" (or when unsure). */
+  strongModel: string
+  /** Max characters in user input to qualify as "simple". Default 160. */
+  simpleMaxChars?: number
+  /** Max whitespace-separated words to qualify as "simple". Default 28. */
+  simpleMaxWords?: number
+}
+
+export type RoutingDecision = {
+  model: string
+  complexity: 'simple' | 'strong'
+  /** Human-readable reason — useful for the UI indicator and debug logs. */
+  reason: string
+}
+
+export type RoutingInput = {
+  /** The user's message text for this turn. */
+  userText: string
+  /**
+   * Optional: how many tool-use blocks the assistant has emitted in the
+   * recent conversation. High values correlate with "continue this work"
+   * follow-ups that can still be cheap, UNLESS the user also typed code
+   * or strong-keyword text.
+   */
+  recentToolUses?: number
+  /**
+   * Optional: turn number within the current session (1-indexed). The first
+   * turn is often task-setup and benefits from the strong model even if
+   * short — a bare "build X" opens the whole task.
+   */
+  turnNumber?: number
+}
+
+const DEFAULT_SIMPLE_MAX_CHARS = 160
+const DEFAULT_SIMPLE_MAX_WORDS = 28
+
+// Keywords that strongly suggest reasoning/planning/design work.
+// Matching is word-boundary / case-insensitive. Must include enough anchors
+// that short prompts like "plan the refactor" route to strong even under
+// the char/word cutoff.
+const STRONG_KEYWORDS = [
+  'plan',
+  'design',
+  'architect',
+  'architecture',
+  'refactor',
+  'debug',
+  'investigate',
+  'analyze',
+  'analyse',
+  'implement',
+  'optimize',
+  'optimise',
+  'review',
+  'audit',
+  'diagnose',
+  'root cause',
+  'root-cause',
+  'why does',
+  'why is',
+  'how should',
+  'why did',
+  'propose',
+  'trace',
+  'reproduce',
+]
+
+const STRONG_KEYWORD_RE = new RegExp(
+  `\\b(?:${STRONG_KEYWORDS.map(k => k.replace(/[-]/g, '[-\\s]')).join('|')})\\b`,
+  'i',
+)
+
+const CODE_FENCE_RE = /```[\s\S]*?```|`[^`\n]+`/
+
+function countWords(text: string): number {
+  const trimmed = text.trim()
+  if (!trimmed) return 0
+  return trimmed.split(/\s+/).length
+}
+
+function hasMultiParagraph(text: string): boolean {
+  return /\n\s*\n/.test(text)
+}
+
+function hasCode(text: string): boolean {
+  return CODE_FENCE_RE.test(text)
+}
+
+function hasStrongKeyword(text: string): boolean {
+  return STRONG_KEYWORD_RE.test(text)
+}
+
+/**
+ * Decide whether to route to the simple or strong model based on heuristics.
+ * Returns the chosen model + a reason. When routing is disabled or both
+ * models match, the strong model is used (safe default).
+ */
+export function routeModel(
+  input: RoutingInput,
+  config: SmartRoutingConfig,
+): RoutingDecision {
+  if (!config.enabled) {
+    return {
+      model: config.strongModel,
+      complexity: 'strong',
+      reason: 'smart-routing disabled',
+    }
+  }
+  if (!config.simpleModel || !config.strongModel) {
+    return {
+      model: config.strongModel,
+      complexity: 'strong',
+      reason: 'simpleModel or strongModel missing from config',
+    }
+  }
+  if (config.simpleModel === config.strongModel) {
+    return {
+      model: config.strongModel,
+      complexity: 'strong',
+      reason: 'simpleModel equals strongModel',
+    }
+  }
+
+  const text = input.userText ?? ''
+  const trimmed = text.trim()
+
+  if (!trimmed) {
+    // Empty input (e.g. resuming a tool-use chain) — cheap by default.
+    return {
+      model: config.simpleModel,
+      complexity: 'simple',
+      reason: 'empty user text',
+    }
+  }
+
+  // First turn of a session is task-setup — always use strong.
+  if (input.turnNumber === 1) {
+    return {
+      model: config.strongModel,
+      complexity: 'strong',
+      reason: 'first turn of session',
+    }
+  }
+
+  const maxChars = config.simpleMaxChars ?? DEFAULT_SIMPLE_MAX_CHARS
+  const maxWords = config.simpleMaxWords ?? DEFAULT_SIMPLE_MAX_WORDS
+
+  if (hasCode(trimmed)) {
+    return {
+      model: config.strongModel,
+      complexity: 'strong',
+      reason: 'contains code block or inline code',
+    }
+  }
+
+  if (hasStrongKeyword(trimmed)) {
+    return {
+      model: config.strongModel,
+      complexity: 'strong',
+      reason: 'contains reasoning/planning keyword',
+    }
+  }
+
+  if (hasMultiParagraph(trimmed)) {
+    return {
+      model: config.strongModel,
+      complexity: 'strong',
+      reason: 'multi-paragraph input',
+    }
+  }
+
+  if (trimmed.length > maxChars) {
+    return {
+      model: config.strongModel,
+      complexity: 'strong',
+      reason: `input > ${maxChars} chars`,
+    }
+  }
+
+  if (countWords(trimmed) > maxWords) {
+    return {
+      model: config.strongModel,
+      complexity: 'strong',
+      reason: `input > ${maxWords} words`,
+    }
+  }
+
+  return {
+    model: config.simpleModel,
+    complexity: 'simple',
+    reason: `short (${trimmed.length} chars, ${countWords(trimmed)} words)`,
+  }
+}