feat(api): smart model routing primitive (cheap-for-simple, strong-for-hard) (#785)
Most everyday turns ("ok", "thanks", "yep go ahead", "what does that do?")
get no measurable quality improvement from Opus-tier models over Haiku-tier,
but cost ~10x more and stream slower. Smart routing opts a user into
automatically routing obviously-simple turns to a cheaper model while
keeping the strong model for anything non-trivial.
New module src/services/api/smartModelRouting.ts:
- routeModel(input, config) → { model, complexity, reason }
- Pure primitive: no env reads, no state, caller supplies everything.
- Config is opt-in (enabled: false by default).
Routes to strong (conservative) when ANY of:
- First turn of session (task-setup is worth the quality)
- Code fence or inline code span present
- Reasoning/planning keyword (plan, design, refactor, debug, architect,
investigate, root cause, etc. — 20+ anchors)
- Multi-paragraph input
- Over char/word cutoff (defaults: 160 chars, 28 words; matches hermes)
Routes to simple only for clearly-trivial chatter.
Decision includes a reason string for a future UI indicator that shows
which tier handled the turn.
Integration into query path is intentionally deferred to a follow-up PR so
the heuristics can be reviewed and tuned in isolation first.
Co-authored-by: OpenClaude <openclaude@gitlawb.com>
This commit is contained in:
191
src/services/api/smartModelRouting.test.ts
Normal file
191
src/services/api/smartModelRouting.test.ts
Normal file
@@ -0,0 +1,191 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
|
||||
import {
|
||||
routeModel,
|
||||
type SmartRoutingConfig,
|
||||
} from './smartModelRouting.ts'
|
||||
|
||||
const ENABLED: SmartRoutingConfig = {
|
||||
enabled: true,
|
||||
simpleModel: 'claude-haiku-4-5',
|
||||
strongModel: 'claude-opus-4-7',
|
||||
}
|
||||
|
||||
describe('routeModel — disabled / misconfigured', () => {
|
||||
test('disabled config routes to strong', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'hi' },
|
||||
{ ...ENABLED, enabled: false },
|
||||
)
|
||||
expect(decision.model).toBe('claude-opus-4-7')
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('disabled')
|
||||
})
|
||||
|
||||
test('missing simpleModel falls back to strong', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'hi' },
|
||||
{ ...ENABLED, simpleModel: '' },
|
||||
)
|
||||
expect(decision.model).toBe('claude-opus-4-7')
|
||||
expect(decision.complexity).toBe('strong')
|
||||
})
|
||||
|
||||
test('simpleModel === strongModel routes to strong (no-op)', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'hi' },
|
||||
{ ...ENABLED, simpleModel: 'claude-opus-4-7' },
|
||||
)
|
||||
expect(decision.model).toBe('claude-opus-4-7')
|
||||
expect(decision.complexity).toBe('strong')
|
||||
})
|
||||
})
|
||||
|
||||
describe('routeModel — simple path', () => {
|
||||
test('short greeting routes to simple', () => {
|
||||
const decision = routeModel({ userText: 'thanks!', turnNumber: 5 }, ENABLED)
|
||||
expect(decision.model).toBe('claude-haiku-4-5')
|
||||
expect(decision.complexity).toBe('simple')
|
||||
})
|
||||
|
||||
test('empty input routes to simple', () => {
|
||||
const decision = routeModel({ userText: ' ' }, ENABLED)
|
||||
expect(decision.model).toBe('claude-haiku-4-5')
|
||||
expect(decision.complexity).toBe('simple')
|
||||
})
|
||||
|
||||
test('mid-length chatter routes to simple', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'yep looks good, go ahead', turnNumber: 10 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('simple')
|
||||
})
|
||||
})
|
||||
|
||||
describe('routeModel — strong path', () => {
|
||||
test('first turn always routes to strong, even when short', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'fix the bug', turnNumber: 1 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.model).toBe('claude-opus-4-7')
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('first turn')
|
||||
})
|
||||
|
||||
test('code fence routes to strong', () => {
|
||||
const decision = routeModel(
|
||||
{
|
||||
userText: 'change this:\n```\nfoo()\n```',
|
||||
turnNumber: 5,
|
||||
},
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('code')
|
||||
})
|
||||
|
||||
test('inline code span routes to strong', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'rename `foo` to `bar`', turnNumber: 5 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
})
|
||||
|
||||
test('reasoning keyword "plan" routes to strong even when short', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'plan the refactor', turnNumber: 5 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('keyword')
|
||||
})
|
||||
|
||||
test('reasoning keyword "debug" routes to strong', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'debug the test', turnNumber: 5 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
})
|
||||
|
||||
test('"root cause" multi-word keyword routes to strong', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'find the root cause', turnNumber: 5 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
})
|
||||
|
||||
test('multi-paragraph input routes to strong', () => {
|
||||
const decision = routeModel(
|
||||
{
|
||||
userText: 'first thought.\n\nsecond thought.',
|
||||
turnNumber: 5,
|
||||
},
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('multi-paragraph')
|
||||
})
|
||||
|
||||
test('over-long input routes to strong', () => {
|
||||
const long = 'ok '.repeat(100) // ~300 chars, 100 words
|
||||
const decision = routeModel(
|
||||
{ userText: long, turnNumber: 5 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
})
|
||||
|
||||
test('exactly at the boundary stays simple', () => {
|
||||
const text = 'a'.repeat(160)
|
||||
const decision = routeModel(
|
||||
{ userText: text, turnNumber: 5 },
|
||||
{ ...ENABLED, simpleMaxChars: 160, simpleMaxWords: 28 },
|
||||
)
|
||||
expect(decision.complexity).toBe('simple')
|
||||
})
|
||||
|
||||
test('one char over the boundary routes to strong', () => {
|
||||
const text = 'a'.repeat(161)
|
||||
const decision = routeModel(
|
||||
{ userText: text, turnNumber: 5 },
|
||||
{ ...ENABLED, simpleMaxChars: 160, simpleMaxWords: 28 },
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('160 chars')
|
||||
})
|
||||
})
|
||||
|
||||
describe('routeModel — config overrides', () => {
|
||||
test('custom simpleMaxChars is honored', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'abcdefghijklmnop', turnNumber: 5 },
|
||||
{ ...ENABLED, simpleMaxChars: 10 },
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('10 chars')
|
||||
})
|
||||
|
||||
test('custom simpleMaxWords is honored', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'one two three four five', turnNumber: 5 },
|
||||
{ ...ENABLED, simpleMaxWords: 3 },
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('3 words')
|
||||
})
|
||||
})
|
||||
|
||||
describe('routeModel — reason strings', () => {
|
||||
test('simple decisions include char + word counts', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'sounds good', turnNumber: 5 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.reason).toMatch(/\d+ chars, \d+ words/)
|
||||
})
|
||||
})
|
||||
215
src/services/api/smartModelRouting.ts
Normal file
215
src/services/api/smartModelRouting.ts
Normal file
@@ -0,0 +1,215 @@
|
||||
/**
|
||||
* Smart model routing — cheap-for-simple, strong-for-hard.
|
||||
*
|
||||
* For everyday short chatter ("ok", "thanks", "what does this do?") the
|
||||
* incremental quality of Opus/GPT-5 over Haiku/Mini is negligible while the
|
||||
* cost and latency are an order of magnitude worse. Smart routing opts a
|
||||
* user into routing such "obviously simple" turns to a cheaper model while
|
||||
* keeping the strong model for the anything-non-trivial path.
|
||||
*
|
||||
* This module is a pure primitive: it takes a turn description (the user's
|
||||
* text + light context) and returns which model to use, based on config.
|
||||
* It never reads env vars or state directly — caller supplies everything.
|
||||
*
|
||||
* Off by default. Users opt in via settings.smartRouting.enabled. Intent:
|
||||
* make this a copy-paste-small config block rather than a hidden heuristic,
|
||||
* so the tradeoff is visible and the user controls it.
|
||||
*/
|
||||
|
||||
export type SmartRoutingConfig = {
|
||||
enabled: boolean
|
||||
/** Model to use for turns classified as "simple". */
|
||||
simpleModel: string
|
||||
/** Model to use for turns classified as "strong" (or when unsure). */
|
||||
strongModel: string
|
||||
/** Max characters in user input to qualify as "simple". Default 160. */
|
||||
simpleMaxChars?: number
|
||||
/** Max whitespace-separated words to qualify as "simple". Default 28. */
|
||||
simpleMaxWords?: number
|
||||
}
|
||||
|
||||
export type RoutingDecision = {
|
||||
model: string
|
||||
complexity: 'simple' | 'strong'
|
||||
/** Human-readable reason — useful for the UI indicator and debug logs. */
|
||||
reason: string
|
||||
}
|
||||
|
||||
export type RoutingInput = {
|
||||
/** The user's message text for this turn. */
|
||||
userText: string
|
||||
/**
|
||||
* Optional: how many tool-use blocks the assistant has emitted in the
|
||||
* recent conversation. High values correlate with "continue this work"
|
||||
* follow-ups that can still be cheap, UNLESS the user also typed code
|
||||
* or strong-keyword text.
|
||||
*/
|
||||
recentToolUses?: number
|
||||
/**
|
||||
* Optional: turn number within the current session (1-indexed). The first
|
||||
* turn is often task-setup and benefits from the strong model even if
|
||||
* short — a bare "build X" opens the whole task.
|
||||
*/
|
||||
turnNumber?: number
|
||||
}
|
||||
|
||||
const DEFAULT_SIMPLE_MAX_CHARS = 160
|
||||
const DEFAULT_SIMPLE_MAX_WORDS = 28
|
||||
|
||||
// Keywords that strongly suggest reasoning/planning/design work.
|
||||
// Matching is word-boundary / case-insensitive. Must include enough anchors
|
||||
// that short prompts like "plan the refactor" route to strong even under
|
||||
// the char/word cutoff.
|
||||
const STRONG_KEYWORDS = [
|
||||
'plan',
|
||||
'design',
|
||||
'architect',
|
||||
'architecture',
|
||||
'refactor',
|
||||
'debug',
|
||||
'investigate',
|
||||
'analyze',
|
||||
'analyse',
|
||||
'implement',
|
||||
'optimize',
|
||||
'optimise',
|
||||
'review',
|
||||
'audit',
|
||||
'diagnose',
|
||||
'root cause',
|
||||
'root-cause',
|
||||
'why does',
|
||||
'why is',
|
||||
'how should',
|
||||
'why did',
|
||||
'propose',
|
||||
'trace',
|
||||
'reproduce',
|
||||
]
|
||||
|
||||
const STRONG_KEYWORD_RE = new RegExp(
|
||||
`\\b(?:${STRONG_KEYWORDS.map(k => k.replace(/[-]/g, '[-\\s]')).join('|')})\\b`,
|
||||
'i',
|
||||
)
|
||||
|
||||
const CODE_FENCE_RE = /```[\s\S]*?```|`[^`\n]+`/
|
||||
|
||||
function countWords(text: string): number {
|
||||
const trimmed = text.trim()
|
||||
if (!trimmed) return 0
|
||||
return trimmed.split(/\s+/).length
|
||||
}
|
||||
|
||||
function hasMultiParagraph(text: string): boolean {
|
||||
return /\n\s*\n/.test(text)
|
||||
}
|
||||
|
||||
function hasCode(text: string): boolean {
|
||||
return CODE_FENCE_RE.test(text)
|
||||
}
|
||||
|
||||
function hasStrongKeyword(text: string): boolean {
|
||||
return STRONG_KEYWORD_RE.test(text)
|
||||
}
|
||||
|
||||
/**
|
||||
* Decide whether to route to the simple or strong model based on heuristics.
|
||||
* Returns the chosen model + a reason. When routing is disabled or both
|
||||
* models match, the strong model is used (safe default).
|
||||
*/
|
||||
export function routeModel(
|
||||
input: RoutingInput,
|
||||
config: SmartRoutingConfig,
|
||||
): RoutingDecision {
|
||||
if (!config.enabled) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'smart-routing disabled',
|
||||
}
|
||||
}
|
||||
if (!config.simpleModel || !config.strongModel) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'simpleModel or strongModel missing from config',
|
||||
}
|
||||
}
|
||||
if (config.simpleModel === config.strongModel) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'simpleModel equals strongModel',
|
||||
}
|
||||
}
|
||||
|
||||
const text = input.userText ?? ''
|
||||
const trimmed = text.trim()
|
||||
|
||||
if (!trimmed) {
|
||||
// Empty input (e.g. resuming a tool-use chain) — cheap by default.
|
||||
return {
|
||||
model: config.simpleModel,
|
||||
complexity: 'simple',
|
||||
reason: 'empty user text',
|
||||
}
|
||||
}
|
||||
|
||||
// First turn of a session is task-setup — always use strong.
|
||||
if (input.turnNumber === 1) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'first turn of session',
|
||||
}
|
||||
}
|
||||
|
||||
const maxChars = config.simpleMaxChars ?? DEFAULT_SIMPLE_MAX_CHARS
|
||||
const maxWords = config.simpleMaxWords ?? DEFAULT_SIMPLE_MAX_WORDS
|
||||
|
||||
if (hasCode(trimmed)) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'contains code block or inline code',
|
||||
}
|
||||
}
|
||||
|
||||
if (hasStrongKeyword(trimmed)) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'contains reasoning/planning keyword',
|
||||
}
|
||||
}
|
||||
|
||||
if (hasMultiParagraph(trimmed)) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'multi-paragraph input',
|
||||
}
|
||||
}
|
||||
|
||||
if (trimmed.length > maxChars) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: `input > ${maxChars} chars`,
|
||||
}
|
||||
}
|
||||
|
||||
if (countWords(trimmed) > maxWords) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: `input > ${maxWords} words`,
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
model: config.simpleModel,
|
||||
complexity: 'simple',
|
||||
reason: `short (${trimmed.length} chars, ${countWords(trimmed)} words)`,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user