feat: per-agent model routing — route different agents to different providers (#238)

* feat: add agentModels and agentRouting to SettingsSchema

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat: add agentRouting module for per-agent provider resolution

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat: thread providerOverride through OpenAI shim for per-agent routing

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat: getAnthropicClient accepts providerOverride for agent routing

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat: thread providerOverride through Options and queryModel calls

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat: thread providerOverride through query loop and ToolUseContext

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat: resolve agent routing in runAgent and inject providerOverride

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* docs: add Agent Routing configuration guide to README

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* test: add unit tests for resolveAgentProvider + plaintext api_key note

- 15 tests covering priority chain (name > subagentType > default > null)
- normalize() case-insensitive and hyphen/underscore equivalence
- Edge cases: null settings, missing config sections, non-existent model
- README note about api_key stored in plaintext

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* security: address code review — SSRF, credential leak, key collision

- base_url schema now uses z.string().url() for SSRF mitigation
- Strip auth headers (Authorization, x-api-key, api-key) from
  defaultHeaders when providerOverride is active, preventing
  Anthropic credentials from leaking to third-party endpoints
- Warn on duplicate normalized routing keys to prevent silent shadowing
- providerOverride.apiKey is never logged (verified via grep)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: 冯俊辉 <fengjunhui@shiyanjia.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
JasonVon
2026-04-03 21:47:26 +08:00
committed by GitHub
parent 59ab2701f7
commit fb32e3f829
11 changed files with 319 additions and 9 deletions

View File

@@ -176,6 +176,8 @@ export type ToolUseContext = {
querySource?: QuerySource
/** Optional callback to get the latest tools (e.g., after MCP servers connect mid-query) */
refreshTools?: () => Tools
/** Per-agent provider override from agentRouting config */
providerOverride?: { model: string; baseURL: string; apiKey: string }
}
abortController: AbortController
readFileState: FileStateCache

View File

@@ -702,6 +702,7 @@ async function* queryLoop(
skipCacheWrite,
agentId: toolUseContext.agentId,
addNotification: toolUseContext.addNotification,
providerOverride: toolUseContext.options.providerOverride,
...(params.taskBudget && {
taskBudget: {
total: params.taskBudget.total,

View File

@@ -0,0 +1,125 @@
import { describe, expect, test } from 'bun:test'
import { resolveAgentProvider } from './agentRouting.js'
import type { SettingsJson } from '../../utils/settings/types.js'
const baseSettings = {
agentModels: {
'deepseek-chat': { base_url: 'https://api.deepseek.com/v1', api_key: 'sk-ds' },
'gpt-4o': { base_url: 'https://api.openai.com/v1', api_key: 'sk-oai' },
},
agentRouting: {
Explore: 'deepseek-chat',
'general-purpose': 'gpt-4o',
'frontend-dev': 'deepseek-chat',
default: 'gpt-4o',
},
} as unknown as SettingsJson
describe('resolveAgentProvider', () => {
// ── Priority chain ──────────────────────────────────────────
test('name takes priority over subagentType', () => {
const result = resolveAgentProvider('frontend-dev', 'Explore', baseSettings)
expect(result).toEqual({
model: 'deepseek-chat',
baseURL: 'https://api.deepseek.com/v1',
apiKey: 'sk-ds',
})
})
test('subagentType used when name has no match', () => {
const result = resolveAgentProvider('unknown-name', 'Explore', baseSettings)
expect(result).toEqual({
model: 'deepseek-chat',
baseURL: 'https://api.deepseek.com/v1',
apiKey: 'sk-ds',
})
})
test('falls back to "default" when neither name nor subagentType match', () => {
const result = resolveAgentProvider('nobody', 'unknown-type', baseSettings)
expect(result).toEqual({
model: 'gpt-4o',
baseURL: 'https://api.openai.com/v1',
apiKey: 'sk-oai',
})
})
test('returns null when no routing match and no default', () => {
const settings = {
agentModels: baseSettings.agentModels,
agentRouting: { Explore: 'deepseek-chat' },
} as unknown as SettingsJson
const result = resolveAgentProvider('nobody', 'unknown-type', settings)
expect(result).toBeNull()
})
test('returns null when name and subagentType are both undefined', () => {
const settings = {
agentModels: baseSettings.agentModels,
agentRouting: { Explore: 'deepseek-chat' },
} as unknown as SettingsJson
const result = resolveAgentProvider(undefined, undefined, settings)
expect(result).toBeNull()
})
// ── normalize() matching ────────────────────────────────────
test('matching is case-insensitive', () => {
const result = resolveAgentProvider(undefined, 'explore', baseSettings)
expect(result?.model).toBe('deepseek-chat')
})
test('matching is case-insensitive (UPPER)', () => {
const result = resolveAgentProvider(undefined, 'EXPLORE', baseSettings)
expect(result?.model).toBe('deepseek-chat')
})
test('hyphen and underscore are equivalent', () => {
const result = resolveAgentProvider(undefined, 'general_purpose', baseSettings)
expect(result?.model).toBe('gpt-4o')
})
test('underscore in config matches hyphen in input', () => {
const settings = {
agentModels: baseSettings.agentModels,
agentRouting: { general_purpose: 'deepseek-chat' },
} as unknown as SettingsJson
const result = resolveAgentProvider(undefined, 'general-purpose', settings)
expect(result?.model).toBe('deepseek-chat')
})
// ── Edge cases ──────────────────────────────────────────────
test('returns null when settings is null', () => {
expect(resolveAgentProvider('Explore', 'Explore', null)).toBeNull()
})
test('returns null when agentRouting is missing', () => {
const settings = { agentModels: baseSettings.agentModels } as unknown as SettingsJson
expect(resolveAgentProvider(undefined, 'Explore', settings)).toBeNull()
})
test('returns null when agentModels is missing', () => {
const settings = { agentRouting: baseSettings.agentRouting } as unknown as SettingsJson
expect(resolveAgentProvider(undefined, 'Explore', settings)).toBeNull()
})
test('returns null when routing references non-existent model', () => {
const settings = {
agentModels: {},
agentRouting: { Explore: 'non-existent-model' },
} as unknown as SettingsJson
expect(resolveAgentProvider(undefined, 'Explore', settings)).toBeNull()
})
test('subagentType only (no name)', () => {
const result = resolveAgentProvider(undefined, 'Explore', baseSettings)
expect(result?.model).toBe('deepseek-chat')
})
test('name only (no subagentType)', () => {
const result = resolveAgentProvider('frontend-dev', undefined, baseSettings)
expect(result?.model).toBe('deepseek-chat')
})
})

View File

@@ -0,0 +1,75 @@
import type { SettingsJson } from '../../utils/settings/types.js'
/**
* Provider override resolved from agent routing config.
* When present, the API client should use these instead of global env vars.
*/
export interface ProviderOverride {
/** Model name to send to the API (e.g. "deepseek-chat", "gpt-4o") */
model: string
/** OpenAI-compatible base URL */
baseURL: string
/** API key for this provider */
apiKey: string
}
/**
* Normalize an agent identifier for case-insensitive, hyphen/underscore-agnostic matching.
*/
function normalize(key: string): string {
return key.toLowerCase().replace(/[-_]/g, '')
}
/**
* Look up agent.routing by name or subagent_type, then resolve via agent.models.
*
* Priority: name > subagentType > "default" > null (use global provider)
*/
export function resolveAgentProvider(
name: string | undefined,
subagentType: string | undefined,
settings: SettingsJson | null,
): ProviderOverride | null {
if (!settings) return null
const routing = settings.agentRouting
const models = settings.agentModels
if (!routing || !models) return null
// Build normalized lookup from routing config.
// Warn on duplicate normalized keys (e.g. "explore-agent" and "explore_agent"
// both normalize to "exploreagent") to prevent silent shadowing.
const normalizedRouting = new Map<string, string>()
for (const [key, value] of Object.entries(routing)) {
const nk = normalize(key)
if (normalizedRouting.has(nk)) {
console.error(`[agentRouting] Warning: routing key "${key}" collides with an existing key after normalization (both map to "${nk}"). First entry wins.`)
}
if (!normalizedRouting.has(nk)) {
normalizedRouting.set(nk, value)
}
}
// Try name first, then subagentType, then "default"
const candidates = [name, subagentType, 'default'].filter(Boolean) as string[]
let modelName: string | undefined
for (const candidate of candidates) {
const match = normalizedRouting.get(normalize(candidate))
if (match) {
modelName = match
break
}
}
if (!modelName) return null
const modelConfig = models[modelName]
if (!modelConfig) return null
return {
model: modelName,
baseURL: modelConfig.base_url,
apiKey: modelConfig.api_key,
}
}

View File

@@ -704,6 +704,7 @@ export type Options = {
// so the model can pace itself. `remaining` is computed by the caller
// (query.ts decrements across the agentic loop).
taskBudget?: { total: number; remaining?: number }
providerOverride?: { model: string; baseURL: string; apiKey: string }
}
export async function queryModelWithoutStreaming({
@@ -820,6 +821,7 @@ export async function* executeNonStreamingRequest(
model: string
fetchOverride?: Options['fetchOverride']
source: string
providerOverride?: Options['providerOverride']
},
retryOptions: {
model: string
@@ -847,6 +849,7 @@ export async function* executeNonStreamingRequest(
model: clientOptions.model,
fetchOverride: clientOptions.fetchOverride,
source: clientOptions.source,
providerOverride: clientOptions.providerOverride,
}),
async (anthropic, attempt, context) => {
const start = Date.now()
@@ -1782,6 +1785,7 @@ async function* queryModel(
model: options.model,
fetchOverride: options.fetchOverride,
source: options.querySource,
providerOverride: options.providerOverride,
}),
async (anthropic, attempt, context) => {
attemptNumber = attempt
@@ -2549,7 +2553,7 @@ async function* queryModel(
: 'other') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
const result = yield* executeNonStreamingRequest(
{ model: options.model, source: options.querySource },
{ model: options.model, source: options.querySource, providerOverride: options.providerOverride },
{
model: options.model,
fallbackModel: options.fallbackModel,

View File

@@ -95,12 +95,14 @@ export async function getAnthropicClient({
model,
fetchOverride,
source,
providerOverride,
}: {
apiKey?: string
maxRetries: number
model?: string
fetchOverride?: ClientOptions['fetch']
source?: string
providerOverride?: { model: string; baseURL: string; apiKey: string }
}): Promise<Anthropic> {
const containerId = process.env.CLAUDE_CODE_CONTAINER_ID
const remoteSessionId = process.env.CLAUDE_CODE_REMOTE_SESSION_ID
@@ -154,6 +156,24 @@ export async function getAnthropicClient({
fetch: resolvedFetch,
}),
}
// Agent routing override: use per-agent provider when configured.
// Strip auth-related headers to prevent leaking Anthropic credentials
// to third-party endpoints (SSRF / credential forwarding mitigation).
if (providerOverride) {
const { createOpenAIShimClient } = await import('./openaiShim.js')
const safeHeaders: Record<string, string> = {}
for (const [k, v] of Object.entries(defaultHeaders)) {
const lower = k.toLowerCase()
if (lower === 'authorization' || lower === 'x-api-key' || lower === 'api-key') continue
safeHeaders[k] = v
}
return createOpenAIShimClient({
defaultHeaders: safeHeaders,
maxRetries,
timeout: parseInt(process.env.API_TIMEOUT_MS || String(600 * 1000), 10),
providerOverride,
}) as unknown as Anthropic
}
if (
isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB) ||

View File

@@ -683,10 +683,12 @@ class OpenAIShimStream {
class OpenAIShimMessages {
private defaultHeaders: Record<string, string>
private reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
private providerOverride?: { model: string; baseURL: string; apiKey: string }
constructor(defaultHeaders: Record<string, string>, reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh') {
constructor(defaultHeaders: Record<string, string>, reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh', providerOverride?: { model: string; baseURL: string; apiKey: string }) {
this.defaultHeaders = defaultHeaders
this.reasoningEffort = reasoningEffort
this.providerOverride = providerOverride
}
create(
@@ -698,7 +700,7 @@ class OpenAIShimMessages {
let httpResponse: Response | undefined
const promise = (async () => {
const request = resolveProviderRequest({ model: params.model, reasoningEffortOverride: self.reasoningEffort })
const request = resolveProviderRequest({ model: self.providerOverride?.model ?? params.model, baseUrl: self.providerOverride?.baseURL, reasoningEffortOverride: self.reasoningEffort })
const response = await self._doRequest(request, params, options)
httpResponse = response
@@ -857,7 +859,7 @@ class OpenAIShimMessages {
...(options?.headers ?? {}),
}
const apiKey = process.env.OPENAI_API_KEY ?? ''
const apiKey = this.providerOverride?.apiKey ?? process.env.OPENAI_API_KEY ?? ''
// Detect Azure endpoints by hostname (not raw URL) to prevent bypass via
// path segments like https://evil.com/cognitiveservices.azure.com/
let isAzure = false
@@ -1056,8 +1058,8 @@ class OpenAIShimBeta {
messages: OpenAIShimMessages
reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
constructor(defaultHeaders: Record<string, string>, reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh') {
this.messages = new OpenAIShimMessages(defaultHeaders, reasoningEffort)
constructor(defaultHeaders: Record<string, string>, reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh', providerOverride?: { model: string; baseURL: string; apiKey: string }) {
this.messages = new OpenAIShimMessages(defaultHeaders, reasoningEffort, providerOverride)
this.reasoningEffort = reasoningEffort
}
}
@@ -1067,6 +1069,7 @@ export function createOpenAIShimClient(options: {
maxRetries?: number
timeout?: number
reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
providerOverride?: { model: string; baseURL: string; apiKey: string }
}): unknown {
hydrateGithubModelsTokenFromSecureStorage()
@@ -1089,7 +1092,7 @@ export function createOpenAIShimClient(options: {
const beta = new OpenAIShimBeta({
...(options.defaultHeaders ?? {}),
}, options.reasoningEffort)
}, options.reasoningEffort, options.providerOverride)
return {
beta,

View File

@@ -644,7 +644,8 @@ export const AgentTool = buildTool({
useExactTools: true
}),
worktreePath: worktreeInfo?.worktreePath,
description
description,
agentName: name,
};
// Helper to wrap execution with a cwd override: explicit cwd arg (KAIROS)

View File

@@ -57,6 +57,8 @@ import { clearSessionHooks } from '../../utils/hooks/sessionHooks.js'
import { executeSubagentStartHooks } from '../../utils/hooks.js'
import { createUserMessage } from '../../utils/messages.js'
import { getAgentModel } from '../../utils/model/agent.js'
import { resolveAgentProvider } from '../../services/api/agentRouting.js'
import { getInitialSettings } from '../../utils/settings/settings.js'
import type { ModelAlias } from '../../utils/model/aliases.js'
import {
clearAgentTranscriptSubdir,
@@ -267,6 +269,7 @@ export async function* runAgent({
description,
transcriptSubdir,
onQueryProgress,
agentName,
}: {
agentDefinition: AgentDefinition
promptMessages: Message[]
@@ -326,6 +329,8 @@ export async function* runAgent({
* during long single-block streams (e.g. thinking) where no assistant
* message is yielded for >60s. */
onQueryProgress?: () => void
/** Agent name (team member name) for routing resolution */
agentName?: string
}): AsyncGenerator<Message, void> {
// Track subagent usage for feature discovery
@@ -344,6 +349,14 @@ export async function* runAgent({
permissionMode,
)
// Resolve per-agent provider routing from settings
const providerOverride = resolveAgentProvider(
agentName,
agentDefinition.agentType,
getInitialSettings(),
)
const effectiveModel = providerOverride ? providerOverride.model : resolvedAgentModel
const agentId = override?.agentId ? override.agentId : createAgentId()
// Route this agent's transcript into a grouping subdirectory if requested
@@ -675,7 +688,8 @@ export async function* runAgent({
commands: [],
debug: toolUseContext.options.debug,
verbose: toolUseContext.options.verbose,
mainLoopModel: resolvedAgentModel,
mainLoopModel: effectiveModel,
providerOverride: providerOverride ?? undefined,
// For fork children (useExactTools), inherit thinking config to match the
// parent's API request prefix for prompt cache hits. For regular
// sub-agents, disable thinking to control output token costs.

View File

@@ -713,6 +713,27 @@ export const SettingsSchema = lazySchema(() =>
.string()
.optional()
.describe('Advisor model for the server-side advisor tool.'),
agentModels: z
.record(
z.string(),
z.object({
base_url: z.string().url().describe('OpenAI-compatible API endpoint (must be https:// or http://)'),
api_key: z.string().describe('API key for this provider'),
}),
)
.optional()
.describe(
'Map of model name to provider connection info. ' +
'Example: { "deepseek-chat": { "base_url": "https://api.deepseek.com/v1", "api_key": "sk-xxx" } }',
),
agentRouting: z
.record(z.string(), z.string())
.optional()
.describe(
'Map of agent identifier (subagent_type or team member name) to model name. ' +
'Use "default" key as fallback. Model name must exist in agentModels. ' +
'Example: { "Explore": "deepseek-chat", "general-purpose": "gpt-4o", "default": "gpt-4o" }',
),
fastMode: z
.boolean()
.optional()