feat(api): smart model routing primitive (cheap-for-simple, strong-for-hard) (#785)
Most everyday turns ("ok", "thanks", "yep go ahead", "what does that do?")
get no measurable quality improvement from Opus-tier models over Haiku-tier,
but cost ~10x more and stream slower. Smart routing opts a user into
automatically routing obviously-simple turns to a cheaper model while
keeping the strong model for anything non-trivial.
New module src/services/api/smartModelRouting.ts:
- routeModel(input, config) → { model, complexity, reason }
- Pure primitive: no env reads, no state, caller supplies everything.
- Config is opt-in (enabled: false by default).
Routes to strong (conservative) when ANY of:
- First turn of session (task-setup is worth the quality)
- Code fence or inline code span present
- Reasoning/planning keyword (plan, design, refactor, debug, architect,
investigate, root cause, etc. — 20+ anchors)
- Multi-paragraph input
- Over char/word cutoff (defaults: 160 chars, 28 words; matches hermes)
Routes to simple only for clearly-trivial chatter.
Decision includes a reason string for a future UI indicator that shows
which tier handled the turn.
Integration into query path is intentionally deferred to a follow-up PR so
the heuristics can be reviewed and tuned in isolation first.
Co-authored-by: OpenClaude <openclaude@gitlawb.com>
This commit is contained in:
191
src/services/api/smartModelRouting.test.ts
Normal file
191
src/services/api/smartModelRouting.test.ts
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
import { describe, expect, test } from 'bun:test'
|
||||||
|
|
||||||
|
import {
|
||||||
|
routeModel,
|
||||||
|
type SmartRoutingConfig,
|
||||||
|
} from './smartModelRouting.ts'
|
||||||
|
|
||||||
|
const ENABLED: SmartRoutingConfig = {
|
||||||
|
enabled: true,
|
||||||
|
simpleModel: 'claude-haiku-4-5',
|
||||||
|
strongModel: 'claude-opus-4-7',
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('routeModel — disabled / misconfigured', () => {
|
||||||
|
test('disabled config routes to strong', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: 'hi' },
|
||||||
|
{ ...ENABLED, enabled: false },
|
||||||
|
)
|
||||||
|
expect(decision.model).toBe('claude-opus-4-7')
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
expect(decision.reason).toContain('disabled')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('missing simpleModel falls back to strong', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: 'hi' },
|
||||||
|
{ ...ENABLED, simpleModel: '' },
|
||||||
|
)
|
||||||
|
expect(decision.model).toBe('claude-opus-4-7')
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('simpleModel === strongModel routes to strong (no-op)', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: 'hi' },
|
||||||
|
{ ...ENABLED, simpleModel: 'claude-opus-4-7' },
|
||||||
|
)
|
||||||
|
expect(decision.model).toBe('claude-opus-4-7')
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('routeModel — simple path', () => {
|
||||||
|
test('short greeting routes to simple', () => {
|
||||||
|
const decision = routeModel({ userText: 'thanks!', turnNumber: 5 }, ENABLED)
|
||||||
|
expect(decision.model).toBe('claude-haiku-4-5')
|
||||||
|
expect(decision.complexity).toBe('simple')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('empty input routes to simple', () => {
|
||||||
|
const decision = routeModel({ userText: ' ' }, ENABLED)
|
||||||
|
expect(decision.model).toBe('claude-haiku-4-5')
|
||||||
|
expect(decision.complexity).toBe('simple')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('mid-length chatter routes to simple', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: 'yep looks good, go ahead', turnNumber: 10 },
|
||||||
|
ENABLED,
|
||||||
|
)
|
||||||
|
expect(decision.complexity).toBe('simple')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('routeModel — strong path', () => {
|
||||||
|
test('first turn always routes to strong, even when short', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: 'fix the bug', turnNumber: 1 },
|
||||||
|
ENABLED,
|
||||||
|
)
|
||||||
|
expect(decision.model).toBe('claude-opus-4-7')
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
expect(decision.reason).toContain('first turn')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('code fence routes to strong', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{
|
||||||
|
userText: 'change this:\n```\nfoo()\n```',
|
||||||
|
turnNumber: 5,
|
||||||
|
},
|
||||||
|
ENABLED,
|
||||||
|
)
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
expect(decision.reason).toContain('code')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('inline code span routes to strong', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: 'rename `foo` to `bar`', turnNumber: 5 },
|
||||||
|
ENABLED,
|
||||||
|
)
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('reasoning keyword "plan" routes to strong even when short', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: 'plan the refactor', turnNumber: 5 },
|
||||||
|
ENABLED,
|
||||||
|
)
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
expect(decision.reason).toContain('keyword')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('reasoning keyword "debug" routes to strong', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: 'debug the test', turnNumber: 5 },
|
||||||
|
ENABLED,
|
||||||
|
)
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('"root cause" multi-word keyword routes to strong', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: 'find the root cause', turnNumber: 5 },
|
||||||
|
ENABLED,
|
||||||
|
)
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('multi-paragraph input routes to strong', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{
|
||||||
|
userText: 'first thought.\n\nsecond thought.',
|
||||||
|
turnNumber: 5,
|
||||||
|
},
|
||||||
|
ENABLED,
|
||||||
|
)
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
expect(decision.reason).toContain('multi-paragraph')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('over-long input routes to strong', () => {
|
||||||
|
const long = 'ok '.repeat(100) // ~300 chars, 100 words
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: long, turnNumber: 5 },
|
||||||
|
ENABLED,
|
||||||
|
)
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('exactly at the boundary stays simple', () => {
|
||||||
|
const text = 'a'.repeat(160)
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: text, turnNumber: 5 },
|
||||||
|
{ ...ENABLED, simpleMaxChars: 160, simpleMaxWords: 28 },
|
||||||
|
)
|
||||||
|
expect(decision.complexity).toBe('simple')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('one char over the boundary routes to strong', () => {
|
||||||
|
const text = 'a'.repeat(161)
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: text, turnNumber: 5 },
|
||||||
|
{ ...ENABLED, simpleMaxChars: 160, simpleMaxWords: 28 },
|
||||||
|
)
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
expect(decision.reason).toContain('160 chars')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('routeModel — config overrides', () => {
|
||||||
|
test('custom simpleMaxChars is honored', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: 'abcdefghijklmnop', turnNumber: 5 },
|
||||||
|
{ ...ENABLED, simpleMaxChars: 10 },
|
||||||
|
)
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
expect(decision.reason).toContain('10 chars')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('custom simpleMaxWords is honored', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: 'one two three four five', turnNumber: 5 },
|
||||||
|
{ ...ENABLED, simpleMaxWords: 3 },
|
||||||
|
)
|
||||||
|
expect(decision.complexity).toBe('strong')
|
||||||
|
expect(decision.reason).toContain('3 words')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('routeModel — reason strings', () => {
|
||||||
|
test('simple decisions include char + word counts', () => {
|
||||||
|
const decision = routeModel(
|
||||||
|
{ userText: 'sounds good', turnNumber: 5 },
|
||||||
|
ENABLED,
|
||||||
|
)
|
||||||
|
expect(decision.reason).toMatch(/\d+ chars, \d+ words/)
|
||||||
|
})
|
||||||
|
})
|
||||||
215
src/services/api/smartModelRouting.ts
Normal file
215
src/services/api/smartModelRouting.ts
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
/**
|
||||||
|
* Smart model routing — cheap-for-simple, strong-for-hard.
|
||||||
|
*
|
||||||
|
* For everyday short chatter ("ok", "thanks", "what does this do?") the
|
||||||
|
* incremental quality of Opus/GPT-5 over Haiku/Mini is negligible while the
|
||||||
|
* cost and latency are an order of magnitude worse. Smart routing opts a
|
||||||
|
* user into routing such "obviously simple" turns to a cheaper model while
|
||||||
|
* keeping the strong model for the anything-non-trivial path.
|
||||||
|
*
|
||||||
|
* This module is a pure primitive: it takes a turn description (the user's
|
||||||
|
* text + light context) and returns which model to use, based on config.
|
||||||
|
* It never reads env vars or state directly — caller supplies everything.
|
||||||
|
*
|
||||||
|
* Off by default. Users opt in via settings.smartRouting.enabled. Intent:
|
||||||
|
* make this a copy-paste-small config block rather than a hidden heuristic,
|
||||||
|
* so the tradeoff is visible and the user controls it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export type SmartRoutingConfig = {
|
||||||
|
enabled: boolean
|
||||||
|
/** Model to use for turns classified as "simple". */
|
||||||
|
simpleModel: string
|
||||||
|
/** Model to use for turns classified as "strong" (or when unsure). */
|
||||||
|
strongModel: string
|
||||||
|
/** Max characters in user input to qualify as "simple". Default 160. */
|
||||||
|
simpleMaxChars?: number
|
||||||
|
/** Max whitespace-separated words to qualify as "simple". Default 28. */
|
||||||
|
simpleMaxWords?: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export type RoutingDecision = {
|
||||||
|
model: string
|
||||||
|
complexity: 'simple' | 'strong'
|
||||||
|
/** Human-readable reason — useful for the UI indicator and debug logs. */
|
||||||
|
reason: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export type RoutingInput = {
|
||||||
|
/** The user's message text for this turn. */
|
||||||
|
userText: string
|
||||||
|
/**
|
||||||
|
* Optional: how many tool-use blocks the assistant has emitted in the
|
||||||
|
* recent conversation. High values correlate with "continue this work"
|
||||||
|
* follow-ups that can still be cheap, UNLESS the user also typed code
|
||||||
|
* or strong-keyword text.
|
||||||
|
*/
|
||||||
|
recentToolUses?: number
|
||||||
|
/**
|
||||||
|
* Optional: turn number within the current session (1-indexed). The first
|
||||||
|
* turn is often task-setup and benefits from the strong model even if
|
||||||
|
* short — a bare "build X" opens the whole task.
|
||||||
|
*/
|
||||||
|
turnNumber?: number
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_SIMPLE_MAX_CHARS = 160
|
||||||
|
const DEFAULT_SIMPLE_MAX_WORDS = 28
|
||||||
|
|
||||||
|
// Keywords that strongly suggest reasoning/planning/design work.
|
||||||
|
// Matching is word-boundary / case-insensitive. Must include enough anchors
|
||||||
|
// that short prompts like "plan the refactor" route to strong even under
|
||||||
|
// the char/word cutoff.
|
||||||
|
const STRONG_KEYWORDS = [
|
||||||
|
'plan',
|
||||||
|
'design',
|
||||||
|
'architect',
|
||||||
|
'architecture',
|
||||||
|
'refactor',
|
||||||
|
'debug',
|
||||||
|
'investigate',
|
||||||
|
'analyze',
|
||||||
|
'analyse',
|
||||||
|
'implement',
|
||||||
|
'optimize',
|
||||||
|
'optimise',
|
||||||
|
'review',
|
||||||
|
'audit',
|
||||||
|
'diagnose',
|
||||||
|
'root cause',
|
||||||
|
'root-cause',
|
||||||
|
'why does',
|
||||||
|
'why is',
|
||||||
|
'how should',
|
||||||
|
'why did',
|
||||||
|
'propose',
|
||||||
|
'trace',
|
||||||
|
'reproduce',
|
||||||
|
]
|
||||||
|
|
||||||
|
const STRONG_KEYWORD_RE = new RegExp(
|
||||||
|
`\\b(?:${STRONG_KEYWORDS.map(k => k.replace(/[-]/g, '[-\\s]')).join('|')})\\b`,
|
||||||
|
'i',
|
||||||
|
)
|
||||||
|
|
||||||
|
const CODE_FENCE_RE = /```[\s\S]*?```|`[^`\n]+`/
|
||||||
|
|
||||||
|
function countWords(text: string): number {
|
||||||
|
const trimmed = text.trim()
|
||||||
|
if (!trimmed) return 0
|
||||||
|
return trimmed.split(/\s+/).length
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasMultiParagraph(text: string): boolean {
|
||||||
|
return /\n\s*\n/.test(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasCode(text: string): boolean {
|
||||||
|
return CODE_FENCE_RE.test(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasStrongKeyword(text: string): boolean {
|
||||||
|
return STRONG_KEYWORD_RE.test(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decide whether to route to the simple or strong model based on heuristics.
|
||||||
|
* Returns the chosen model + a reason. When routing is disabled or both
|
||||||
|
* models match, the strong model is used (safe default).
|
||||||
|
*/
|
||||||
|
export function routeModel(
|
||||||
|
input: RoutingInput,
|
||||||
|
config: SmartRoutingConfig,
|
||||||
|
): RoutingDecision {
|
||||||
|
if (!config.enabled) {
|
||||||
|
return {
|
||||||
|
model: config.strongModel,
|
||||||
|
complexity: 'strong',
|
||||||
|
reason: 'smart-routing disabled',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!config.simpleModel || !config.strongModel) {
|
||||||
|
return {
|
||||||
|
model: config.strongModel,
|
||||||
|
complexity: 'strong',
|
||||||
|
reason: 'simpleModel or strongModel missing from config',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (config.simpleModel === config.strongModel) {
|
||||||
|
return {
|
||||||
|
model: config.strongModel,
|
||||||
|
complexity: 'strong',
|
||||||
|
reason: 'simpleModel equals strongModel',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const text = input.userText ?? ''
|
||||||
|
const trimmed = text.trim()
|
||||||
|
|
||||||
|
if (!trimmed) {
|
||||||
|
// Empty input (e.g. resuming a tool-use chain) — cheap by default.
|
||||||
|
return {
|
||||||
|
model: config.simpleModel,
|
||||||
|
complexity: 'simple',
|
||||||
|
reason: 'empty user text',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// First turn of a session is task-setup — always use strong.
|
||||||
|
if (input.turnNumber === 1) {
|
||||||
|
return {
|
||||||
|
model: config.strongModel,
|
||||||
|
complexity: 'strong',
|
||||||
|
reason: 'first turn of session',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const maxChars = config.simpleMaxChars ?? DEFAULT_SIMPLE_MAX_CHARS
|
||||||
|
const maxWords = config.simpleMaxWords ?? DEFAULT_SIMPLE_MAX_WORDS
|
||||||
|
|
||||||
|
if (hasCode(trimmed)) {
|
||||||
|
return {
|
||||||
|
model: config.strongModel,
|
||||||
|
complexity: 'strong',
|
||||||
|
reason: 'contains code block or inline code',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasStrongKeyword(trimmed)) {
|
||||||
|
return {
|
||||||
|
model: config.strongModel,
|
||||||
|
complexity: 'strong',
|
||||||
|
reason: 'contains reasoning/planning keyword',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasMultiParagraph(trimmed)) {
|
||||||
|
return {
|
||||||
|
model: config.strongModel,
|
||||||
|
complexity: 'strong',
|
||||||
|
reason: 'multi-paragraph input',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trimmed.length > maxChars) {
|
||||||
|
return {
|
||||||
|
model: config.strongModel,
|
||||||
|
complexity: 'strong',
|
||||||
|
reason: `input > ${maxChars} chars`,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (countWords(trimmed) > maxWords) {
|
||||||
|
return {
|
||||||
|
model: config.strongModel,
|
||||||
|
complexity: 'strong',
|
||||||
|
reason: `input > ${maxWords} words`,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
model: config.simpleModel,
|
||||||
|
complexity: 'simple',
|
||||||
|
reason: `short (${trimmed.length} chars, ${countWords(trimmed)} words)`,
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user