feat: per-agent model routing — route different agents to different providers (#238)
* feat: add agentModels and agentRouting to SettingsSchema Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: add agentRouting module for per-agent provider resolution Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: thread providerOverride through OpenAI shim for per-agent routing Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: getAnthropicClient accepts providerOverride for agent routing Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: thread providerOverride through Options and queryModel calls Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: thread providerOverride through query loop and ToolUseContext Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: resolve agent routing in runAgent and inject providerOverride Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * docs: add Agent Routing configuration guide to README Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: add unit tests for resolveAgentProvider + plaintext api_key note - 15 tests covering priority chain (name > subagentType > default > null) - normalize() case-insensitive and hyphen/underscore equivalence - Edge cases: null settings, missing config sections, non-existent model - README note about api_key stored in plaintext Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * security: address code review — SSRF, credential leak, key collision - base_url schema now uses z.string().url() for SSRF mitigation - Strip auth headers (Authorization, x-api-key, api-key) from defaultHeaders when providerOverride is active, preventing Anthropic credentials from leaking to third-party endpoints - Warn on duplicate normalized routing keys to prevent silent shadowing - providerOverride.apiKey is never logged (verified via grep) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: 冯俊辉 <fengjunhui@shiyanjia.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
44
README.md
44
README.md
@@ -139,6 +139,50 @@ For best results, use models with strong tool/function calling support.
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Agent Routing
|
||||||
|
|
||||||
|
Route different agents to different AI providers within the same session. Useful for cost optimization (cheap model for code review, powerful model for complex coding) or leveraging model strengths.
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
Add to `~/.claude/settings.json`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"agentModels": {
|
||||||
|
"deepseek-chat": {
|
||||||
|
"base_url": "https://api.deepseek.com/v1",
|
||||||
|
"api_key": "sk-your-key"
|
||||||
|
},
|
||||||
|
"gpt-4o": {
|
||||||
|
"base_url": "https://api.openai.com/v1",
|
||||||
|
"api_key": "sk-your-key"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"agentRouting": {
|
||||||
|
"Explore": "deepseek-chat",
|
||||||
|
"Plan": "gpt-4o",
|
||||||
|
"general-purpose": "gpt-4o",
|
||||||
|
"frontend-dev": "deepseek-chat",
|
||||||
|
"default": "gpt-4o"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### How It Works
|
||||||
|
|
||||||
|
- **agentModels**: Maps model names to OpenAI-compatible API endpoints
|
||||||
|
- **agentRouting**: Maps agent types or team member names to model names
|
||||||
|
- **Priority**: `name` > `subagent_type` > `"default"` > global provider
|
||||||
|
- **Matching**: Case-insensitive, hyphen/underscore equivalent (`general-purpose` = `general_purpose`)
|
||||||
|
- **Teams**: Team members are routed by their `name` — no extra config needed
|
||||||
|
|
||||||
|
When no routing match is found, the global provider (env vars) is used as fallback.
|
||||||
|
|
||||||
|
> **Note:** `api_key` values in `settings.json` are stored in plaintext. Keep this file private and do not commit it to version control.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Web Search and Fetch
|
## Web Search and Fetch
|
||||||
|
|
||||||
`WebFetch` works out of the box.
|
`WebFetch` works out of the box.
|
||||||
|
|||||||
@@ -176,6 +176,8 @@ export type ToolUseContext = {
|
|||||||
querySource?: QuerySource
|
querySource?: QuerySource
|
||||||
/** Optional callback to get the latest tools (e.g., after MCP servers connect mid-query) */
|
/** Optional callback to get the latest tools (e.g., after MCP servers connect mid-query) */
|
||||||
refreshTools?: () => Tools
|
refreshTools?: () => Tools
|
||||||
|
/** Per-agent provider override from agentRouting config */
|
||||||
|
providerOverride?: { model: string; baseURL: string; apiKey: string }
|
||||||
}
|
}
|
||||||
abortController: AbortController
|
abortController: AbortController
|
||||||
readFileState: FileStateCache
|
readFileState: FileStateCache
|
||||||
|
|||||||
@@ -702,6 +702,7 @@ async function* queryLoop(
|
|||||||
skipCacheWrite,
|
skipCacheWrite,
|
||||||
agentId: toolUseContext.agentId,
|
agentId: toolUseContext.agentId,
|
||||||
addNotification: toolUseContext.addNotification,
|
addNotification: toolUseContext.addNotification,
|
||||||
|
providerOverride: toolUseContext.options.providerOverride,
|
||||||
...(params.taskBudget && {
|
...(params.taskBudget && {
|
||||||
taskBudget: {
|
taskBudget: {
|
||||||
total: params.taskBudget.total,
|
total: params.taskBudget.total,
|
||||||
|
|||||||
125
src/services/api/agentRouting.test.ts
Normal file
125
src/services/api/agentRouting.test.ts
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
import { describe, expect, test } from 'bun:test'
|
||||||
|
import { resolveAgentProvider } from './agentRouting.js'
|
||||||
|
import type { SettingsJson } from '../../utils/settings/types.js'
|
||||||
|
|
||||||
|
const baseSettings = {
|
||||||
|
agentModels: {
|
||||||
|
'deepseek-chat': { base_url: 'https://api.deepseek.com/v1', api_key: 'sk-ds' },
|
||||||
|
'gpt-4o': { base_url: 'https://api.openai.com/v1', api_key: 'sk-oai' },
|
||||||
|
},
|
||||||
|
agentRouting: {
|
||||||
|
Explore: 'deepseek-chat',
|
||||||
|
'general-purpose': 'gpt-4o',
|
||||||
|
'frontend-dev': 'deepseek-chat',
|
||||||
|
default: 'gpt-4o',
|
||||||
|
},
|
||||||
|
} as unknown as SettingsJson
|
||||||
|
|
||||||
|
describe('resolveAgentProvider', () => {
|
||||||
|
// ── Priority chain ──────────────────────────────────────────
|
||||||
|
|
||||||
|
test('name takes priority over subagentType', () => {
|
||||||
|
const result = resolveAgentProvider('frontend-dev', 'Explore', baseSettings)
|
||||||
|
expect(result).toEqual({
|
||||||
|
model: 'deepseek-chat',
|
||||||
|
baseURL: 'https://api.deepseek.com/v1',
|
||||||
|
apiKey: 'sk-ds',
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
test('subagentType used when name has no match', () => {
|
||||||
|
const result = resolveAgentProvider('unknown-name', 'Explore', baseSettings)
|
||||||
|
expect(result).toEqual({
|
||||||
|
model: 'deepseek-chat',
|
||||||
|
baseURL: 'https://api.deepseek.com/v1',
|
||||||
|
apiKey: 'sk-ds',
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
test('falls back to "default" when neither name nor subagentType match', () => {
|
||||||
|
const result = resolveAgentProvider('nobody', 'unknown-type', baseSettings)
|
||||||
|
expect(result).toEqual({
|
||||||
|
model: 'gpt-4o',
|
||||||
|
baseURL: 'https://api.openai.com/v1',
|
||||||
|
apiKey: 'sk-oai',
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns null when no routing match and no default', () => {
|
||||||
|
const settings = {
|
||||||
|
agentModels: baseSettings.agentModels,
|
||||||
|
agentRouting: { Explore: 'deepseek-chat' },
|
||||||
|
} as unknown as SettingsJson
|
||||||
|
const result = resolveAgentProvider('nobody', 'unknown-type', settings)
|
||||||
|
expect(result).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns null when name and subagentType are both undefined', () => {
|
||||||
|
const settings = {
|
||||||
|
agentModels: baseSettings.agentModels,
|
||||||
|
agentRouting: { Explore: 'deepseek-chat' },
|
||||||
|
} as unknown as SettingsJson
|
||||||
|
const result = resolveAgentProvider(undefined, undefined, settings)
|
||||||
|
expect(result).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
// ── normalize() matching ────────────────────────────────────
|
||||||
|
|
||||||
|
test('matching is case-insensitive', () => {
|
||||||
|
const result = resolveAgentProvider(undefined, 'explore', baseSettings)
|
||||||
|
expect(result?.model).toBe('deepseek-chat')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('matching is case-insensitive (UPPER)', () => {
|
||||||
|
const result = resolveAgentProvider(undefined, 'EXPLORE', baseSettings)
|
||||||
|
expect(result?.model).toBe('deepseek-chat')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('hyphen and underscore are equivalent', () => {
|
||||||
|
const result = resolveAgentProvider(undefined, 'general_purpose', baseSettings)
|
||||||
|
expect(result?.model).toBe('gpt-4o')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('underscore in config matches hyphen in input', () => {
|
||||||
|
const settings = {
|
||||||
|
agentModels: baseSettings.agentModels,
|
||||||
|
agentRouting: { general_purpose: 'deepseek-chat' },
|
||||||
|
} as unknown as SettingsJson
|
||||||
|
const result = resolveAgentProvider(undefined, 'general-purpose', settings)
|
||||||
|
expect(result?.model).toBe('deepseek-chat')
|
||||||
|
})
|
||||||
|
|
||||||
|
// ── Edge cases ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
test('returns null when settings is null', () => {
|
||||||
|
expect(resolveAgentProvider('Explore', 'Explore', null)).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns null when agentRouting is missing', () => {
|
||||||
|
const settings = { agentModels: baseSettings.agentModels } as unknown as SettingsJson
|
||||||
|
expect(resolveAgentProvider(undefined, 'Explore', settings)).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns null when agentModels is missing', () => {
|
||||||
|
const settings = { agentRouting: baseSettings.agentRouting } as unknown as SettingsJson
|
||||||
|
expect(resolveAgentProvider(undefined, 'Explore', settings)).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns null when routing references non-existent model', () => {
|
||||||
|
const settings = {
|
||||||
|
agentModels: {},
|
||||||
|
agentRouting: { Explore: 'non-existent-model' },
|
||||||
|
} as unknown as SettingsJson
|
||||||
|
expect(resolveAgentProvider(undefined, 'Explore', settings)).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('subagentType only (no name)', () => {
|
||||||
|
const result = resolveAgentProvider(undefined, 'Explore', baseSettings)
|
||||||
|
expect(result?.model).toBe('deepseek-chat')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('name only (no subagentType)', () => {
|
||||||
|
const result = resolveAgentProvider('frontend-dev', undefined, baseSettings)
|
||||||
|
expect(result?.model).toBe('deepseek-chat')
|
||||||
|
})
|
||||||
|
})
|
||||||
75
src/services/api/agentRouting.ts
Normal file
75
src/services/api/agentRouting.ts
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
import type { SettingsJson } from '../../utils/settings/types.js'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provider override resolved from agent routing config.
|
||||||
|
* When present, the API client should use these instead of global env vars.
|
||||||
|
*/
|
||||||
|
export interface ProviderOverride {
|
||||||
|
/** Model name to send to the API (e.g. "deepseek-chat", "gpt-4o") */
|
||||||
|
model: string
|
||||||
|
/** OpenAI-compatible base URL */
|
||||||
|
baseURL: string
|
||||||
|
/** API key for this provider */
|
||||||
|
apiKey: string
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize an agent identifier for case-insensitive, hyphen/underscore-agnostic matching.
|
||||||
|
*/
|
||||||
|
function normalize(key: string): string {
|
||||||
|
return key.toLowerCase().replace(/[-_]/g, '')
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Look up agent.routing by name or subagent_type, then resolve via agent.models.
|
||||||
|
*
|
||||||
|
* Priority: name > subagentType > "default" > null (use global provider)
|
||||||
|
*/
|
||||||
|
export function resolveAgentProvider(
|
||||||
|
name: string | undefined,
|
||||||
|
subagentType: string | undefined,
|
||||||
|
settings: SettingsJson | null,
|
||||||
|
): ProviderOverride | null {
|
||||||
|
if (!settings) return null
|
||||||
|
|
||||||
|
const routing = settings.agentRouting
|
||||||
|
const models = settings.agentModels
|
||||||
|
if (!routing || !models) return null
|
||||||
|
|
||||||
|
// Build normalized lookup from routing config.
|
||||||
|
// Warn on duplicate normalized keys (e.g. "explore-agent" and "explore_agent"
|
||||||
|
// both normalize to "exploreagent") to prevent silent shadowing.
|
||||||
|
const normalizedRouting = new Map<string, string>()
|
||||||
|
for (const [key, value] of Object.entries(routing)) {
|
||||||
|
const nk = normalize(key)
|
||||||
|
if (normalizedRouting.has(nk)) {
|
||||||
|
console.error(`[agentRouting] Warning: routing key "${key}" collides with an existing key after normalization (both map to "${nk}"). First entry wins.`)
|
||||||
|
}
|
||||||
|
if (!normalizedRouting.has(nk)) {
|
||||||
|
normalizedRouting.set(nk, value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try name first, then subagentType, then "default"
|
||||||
|
const candidates = [name, subagentType, 'default'].filter(Boolean) as string[]
|
||||||
|
let modelName: string | undefined
|
||||||
|
|
||||||
|
for (const candidate of candidates) {
|
||||||
|
const match = normalizedRouting.get(normalize(candidate))
|
||||||
|
if (match) {
|
||||||
|
modelName = match
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!modelName) return null
|
||||||
|
|
||||||
|
const modelConfig = models[modelName]
|
||||||
|
if (!modelConfig) return null
|
||||||
|
|
||||||
|
return {
|
||||||
|
model: modelName,
|
||||||
|
baseURL: modelConfig.base_url,
|
||||||
|
apiKey: modelConfig.api_key,
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -704,6 +704,7 @@ export type Options = {
|
|||||||
// so the model can pace itself. `remaining` is computed by the caller
|
// so the model can pace itself. `remaining` is computed by the caller
|
||||||
// (query.ts decrements across the agentic loop).
|
// (query.ts decrements across the agentic loop).
|
||||||
taskBudget?: { total: number; remaining?: number }
|
taskBudget?: { total: number; remaining?: number }
|
||||||
|
providerOverride?: { model: string; baseURL: string; apiKey: string }
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function queryModelWithoutStreaming({
|
export async function queryModelWithoutStreaming({
|
||||||
@@ -820,6 +821,7 @@ export async function* executeNonStreamingRequest(
|
|||||||
model: string
|
model: string
|
||||||
fetchOverride?: Options['fetchOverride']
|
fetchOverride?: Options['fetchOverride']
|
||||||
source: string
|
source: string
|
||||||
|
providerOverride?: Options['providerOverride']
|
||||||
},
|
},
|
||||||
retryOptions: {
|
retryOptions: {
|
||||||
model: string
|
model: string
|
||||||
@@ -847,6 +849,7 @@ export async function* executeNonStreamingRequest(
|
|||||||
model: clientOptions.model,
|
model: clientOptions.model,
|
||||||
fetchOverride: clientOptions.fetchOverride,
|
fetchOverride: clientOptions.fetchOverride,
|
||||||
source: clientOptions.source,
|
source: clientOptions.source,
|
||||||
|
providerOverride: clientOptions.providerOverride,
|
||||||
}),
|
}),
|
||||||
async (anthropic, attempt, context) => {
|
async (anthropic, attempt, context) => {
|
||||||
const start = Date.now()
|
const start = Date.now()
|
||||||
@@ -1782,6 +1785,7 @@ async function* queryModel(
|
|||||||
model: options.model,
|
model: options.model,
|
||||||
fetchOverride: options.fetchOverride,
|
fetchOverride: options.fetchOverride,
|
||||||
source: options.querySource,
|
source: options.querySource,
|
||||||
|
providerOverride: options.providerOverride,
|
||||||
}),
|
}),
|
||||||
async (anthropic, attempt, context) => {
|
async (anthropic, attempt, context) => {
|
||||||
attemptNumber = attempt
|
attemptNumber = attempt
|
||||||
@@ -2549,7 +2553,7 @@ async function* queryModel(
|
|||||||
: 'other') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
: 'other') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||||
})
|
})
|
||||||
const result = yield* executeNonStreamingRequest(
|
const result = yield* executeNonStreamingRequest(
|
||||||
{ model: options.model, source: options.querySource },
|
{ model: options.model, source: options.querySource, providerOverride: options.providerOverride },
|
||||||
{
|
{
|
||||||
model: options.model,
|
model: options.model,
|
||||||
fallbackModel: options.fallbackModel,
|
fallbackModel: options.fallbackModel,
|
||||||
|
|||||||
@@ -95,12 +95,14 @@ export async function getAnthropicClient({
|
|||||||
model,
|
model,
|
||||||
fetchOverride,
|
fetchOverride,
|
||||||
source,
|
source,
|
||||||
|
providerOverride,
|
||||||
}: {
|
}: {
|
||||||
apiKey?: string
|
apiKey?: string
|
||||||
maxRetries: number
|
maxRetries: number
|
||||||
model?: string
|
model?: string
|
||||||
fetchOverride?: ClientOptions['fetch']
|
fetchOverride?: ClientOptions['fetch']
|
||||||
source?: string
|
source?: string
|
||||||
|
providerOverride?: { model: string; baseURL: string; apiKey: string }
|
||||||
}): Promise<Anthropic> {
|
}): Promise<Anthropic> {
|
||||||
const containerId = process.env.CLAUDE_CODE_CONTAINER_ID
|
const containerId = process.env.CLAUDE_CODE_CONTAINER_ID
|
||||||
const remoteSessionId = process.env.CLAUDE_CODE_REMOTE_SESSION_ID
|
const remoteSessionId = process.env.CLAUDE_CODE_REMOTE_SESSION_ID
|
||||||
@@ -154,6 +156,24 @@ export async function getAnthropicClient({
|
|||||||
fetch: resolvedFetch,
|
fetch: resolvedFetch,
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
|
// Agent routing override: use per-agent provider when configured.
|
||||||
|
// Strip auth-related headers to prevent leaking Anthropic credentials
|
||||||
|
// to third-party endpoints (SSRF / credential forwarding mitigation).
|
||||||
|
if (providerOverride) {
|
||||||
|
const { createOpenAIShimClient } = await import('./openaiShim.js')
|
||||||
|
const safeHeaders: Record<string, string> = {}
|
||||||
|
for (const [k, v] of Object.entries(defaultHeaders)) {
|
||||||
|
const lower = k.toLowerCase()
|
||||||
|
if (lower === 'authorization' || lower === 'x-api-key' || lower === 'api-key') continue
|
||||||
|
safeHeaders[k] = v
|
||||||
|
}
|
||||||
|
return createOpenAIShimClient({
|
||||||
|
defaultHeaders: safeHeaders,
|
||||||
|
maxRetries,
|
||||||
|
timeout: parseInt(process.env.API_TIMEOUT_MS || String(600 * 1000), 10),
|
||||||
|
providerOverride,
|
||||||
|
}) as unknown as Anthropic
|
||||||
|
}
|
||||||
if (
|
if (
|
||||||
isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
|
isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
|
||||||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB) ||
|
isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB) ||
|
||||||
|
|||||||
@@ -683,10 +683,12 @@ class OpenAIShimStream {
|
|||||||
class OpenAIShimMessages {
|
class OpenAIShimMessages {
|
||||||
private defaultHeaders: Record<string, string>
|
private defaultHeaders: Record<string, string>
|
||||||
private reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
|
private reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
|
||||||
|
private providerOverride?: { model: string; baseURL: string; apiKey: string }
|
||||||
|
|
||||||
constructor(defaultHeaders: Record<string, string>, reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh') {
|
constructor(defaultHeaders: Record<string, string>, reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh', providerOverride?: { model: string; baseURL: string; apiKey: string }) {
|
||||||
this.defaultHeaders = defaultHeaders
|
this.defaultHeaders = defaultHeaders
|
||||||
this.reasoningEffort = reasoningEffort
|
this.reasoningEffort = reasoningEffort
|
||||||
|
this.providerOverride = providerOverride
|
||||||
}
|
}
|
||||||
|
|
||||||
create(
|
create(
|
||||||
@@ -698,7 +700,7 @@ class OpenAIShimMessages {
|
|||||||
let httpResponse: Response | undefined
|
let httpResponse: Response | undefined
|
||||||
|
|
||||||
const promise = (async () => {
|
const promise = (async () => {
|
||||||
const request = resolveProviderRequest({ model: params.model, reasoningEffortOverride: self.reasoningEffort })
|
const request = resolveProviderRequest({ model: self.providerOverride?.model ?? params.model, baseUrl: self.providerOverride?.baseURL, reasoningEffortOverride: self.reasoningEffort })
|
||||||
const response = await self._doRequest(request, params, options)
|
const response = await self._doRequest(request, params, options)
|
||||||
httpResponse = response
|
httpResponse = response
|
||||||
|
|
||||||
@@ -857,7 +859,7 @@ class OpenAIShimMessages {
|
|||||||
...(options?.headers ?? {}),
|
...(options?.headers ?? {}),
|
||||||
}
|
}
|
||||||
|
|
||||||
const apiKey = process.env.OPENAI_API_KEY ?? ''
|
const apiKey = this.providerOverride?.apiKey ?? process.env.OPENAI_API_KEY ?? ''
|
||||||
// Detect Azure endpoints by hostname (not raw URL) to prevent bypass via
|
// Detect Azure endpoints by hostname (not raw URL) to prevent bypass via
|
||||||
// path segments like https://evil.com/cognitiveservices.azure.com/
|
// path segments like https://evil.com/cognitiveservices.azure.com/
|
||||||
let isAzure = false
|
let isAzure = false
|
||||||
@@ -1056,8 +1058,8 @@ class OpenAIShimBeta {
|
|||||||
messages: OpenAIShimMessages
|
messages: OpenAIShimMessages
|
||||||
reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
|
reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
|
||||||
|
|
||||||
constructor(defaultHeaders: Record<string, string>, reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh') {
|
constructor(defaultHeaders: Record<string, string>, reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh', providerOverride?: { model: string; baseURL: string; apiKey: string }) {
|
||||||
this.messages = new OpenAIShimMessages(defaultHeaders, reasoningEffort)
|
this.messages = new OpenAIShimMessages(defaultHeaders, reasoningEffort, providerOverride)
|
||||||
this.reasoningEffort = reasoningEffort
|
this.reasoningEffort = reasoningEffort
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1067,6 +1069,7 @@ export function createOpenAIShimClient(options: {
|
|||||||
maxRetries?: number
|
maxRetries?: number
|
||||||
timeout?: number
|
timeout?: number
|
||||||
reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
|
reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'
|
||||||
|
providerOverride?: { model: string; baseURL: string; apiKey: string }
|
||||||
}): unknown {
|
}): unknown {
|
||||||
hydrateGithubModelsTokenFromSecureStorage()
|
hydrateGithubModelsTokenFromSecureStorage()
|
||||||
|
|
||||||
@@ -1089,7 +1092,7 @@ export function createOpenAIShimClient(options: {
|
|||||||
|
|
||||||
const beta = new OpenAIShimBeta({
|
const beta = new OpenAIShimBeta({
|
||||||
...(options.defaultHeaders ?? {}),
|
...(options.defaultHeaders ?? {}),
|
||||||
}, options.reasoningEffort)
|
}, options.reasoningEffort, options.providerOverride)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
beta,
|
beta,
|
||||||
|
|||||||
@@ -644,7 +644,8 @@ export const AgentTool = buildTool({
|
|||||||
useExactTools: true
|
useExactTools: true
|
||||||
}),
|
}),
|
||||||
worktreePath: worktreeInfo?.worktreePath,
|
worktreePath: worktreeInfo?.worktreePath,
|
||||||
description
|
description,
|
||||||
|
agentName: name,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Helper to wrap execution with a cwd override: explicit cwd arg (KAIROS)
|
// Helper to wrap execution with a cwd override: explicit cwd arg (KAIROS)
|
||||||
|
|||||||
@@ -57,6 +57,8 @@ import { clearSessionHooks } from '../../utils/hooks/sessionHooks.js'
|
|||||||
import { executeSubagentStartHooks } from '../../utils/hooks.js'
|
import { executeSubagentStartHooks } from '../../utils/hooks.js'
|
||||||
import { createUserMessage } from '../../utils/messages.js'
|
import { createUserMessage } from '../../utils/messages.js'
|
||||||
import { getAgentModel } from '../../utils/model/agent.js'
|
import { getAgentModel } from '../../utils/model/agent.js'
|
||||||
|
import { resolveAgentProvider } from '../../services/api/agentRouting.js'
|
||||||
|
import { getInitialSettings } from '../../utils/settings/settings.js'
|
||||||
import type { ModelAlias } from '../../utils/model/aliases.js'
|
import type { ModelAlias } from '../../utils/model/aliases.js'
|
||||||
import {
|
import {
|
||||||
clearAgentTranscriptSubdir,
|
clearAgentTranscriptSubdir,
|
||||||
@@ -267,6 +269,7 @@ export async function* runAgent({
|
|||||||
description,
|
description,
|
||||||
transcriptSubdir,
|
transcriptSubdir,
|
||||||
onQueryProgress,
|
onQueryProgress,
|
||||||
|
agentName,
|
||||||
}: {
|
}: {
|
||||||
agentDefinition: AgentDefinition
|
agentDefinition: AgentDefinition
|
||||||
promptMessages: Message[]
|
promptMessages: Message[]
|
||||||
@@ -326,6 +329,8 @@ export async function* runAgent({
|
|||||||
* during long single-block streams (e.g. thinking) where no assistant
|
* during long single-block streams (e.g. thinking) where no assistant
|
||||||
* message is yielded for >60s. */
|
* message is yielded for >60s. */
|
||||||
onQueryProgress?: () => void
|
onQueryProgress?: () => void
|
||||||
|
/** Agent name (team member name) for routing resolution */
|
||||||
|
agentName?: string
|
||||||
}): AsyncGenerator<Message, void> {
|
}): AsyncGenerator<Message, void> {
|
||||||
// Track subagent usage for feature discovery
|
// Track subagent usage for feature discovery
|
||||||
|
|
||||||
@@ -344,6 +349,14 @@ export async function* runAgent({
|
|||||||
permissionMode,
|
permissionMode,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Resolve per-agent provider routing from settings
|
||||||
|
const providerOverride = resolveAgentProvider(
|
||||||
|
agentName,
|
||||||
|
agentDefinition.agentType,
|
||||||
|
getInitialSettings(),
|
||||||
|
)
|
||||||
|
const effectiveModel = providerOverride ? providerOverride.model : resolvedAgentModel
|
||||||
|
|
||||||
const agentId = override?.agentId ? override.agentId : createAgentId()
|
const agentId = override?.agentId ? override.agentId : createAgentId()
|
||||||
|
|
||||||
// Route this agent's transcript into a grouping subdirectory if requested
|
// Route this agent's transcript into a grouping subdirectory if requested
|
||||||
@@ -675,7 +688,8 @@ export async function* runAgent({
|
|||||||
commands: [],
|
commands: [],
|
||||||
debug: toolUseContext.options.debug,
|
debug: toolUseContext.options.debug,
|
||||||
verbose: toolUseContext.options.verbose,
|
verbose: toolUseContext.options.verbose,
|
||||||
mainLoopModel: resolvedAgentModel,
|
mainLoopModel: effectiveModel,
|
||||||
|
providerOverride: providerOverride ?? undefined,
|
||||||
// For fork children (useExactTools), inherit thinking config to match the
|
// For fork children (useExactTools), inherit thinking config to match the
|
||||||
// parent's API request prefix for prompt cache hits. For regular
|
// parent's API request prefix for prompt cache hits. For regular
|
||||||
// sub-agents, disable thinking to control output token costs.
|
// sub-agents, disable thinking to control output token costs.
|
||||||
|
|||||||
@@ -713,6 +713,27 @@ export const SettingsSchema = lazySchema(() =>
|
|||||||
.string()
|
.string()
|
||||||
.optional()
|
.optional()
|
||||||
.describe('Advisor model for the server-side advisor tool.'),
|
.describe('Advisor model for the server-side advisor tool.'),
|
||||||
|
agentModels: z
|
||||||
|
.record(
|
||||||
|
z.string(),
|
||||||
|
z.object({
|
||||||
|
base_url: z.string().url().describe('OpenAI-compatible API endpoint (must be https:// or http://)'),
|
||||||
|
api_key: z.string().describe('API key for this provider'),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.optional()
|
||||||
|
.describe(
|
||||||
|
'Map of model name to provider connection info. ' +
|
||||||
|
'Example: { "deepseek-chat": { "base_url": "https://api.deepseek.com/v1", "api_key": "sk-xxx" } }',
|
||||||
|
),
|
||||||
|
agentRouting: z
|
||||||
|
.record(z.string(), z.string())
|
||||||
|
.optional()
|
||||||
|
.describe(
|
||||||
|
'Map of agent identifier (subagent_type or team member name) to model name. ' +
|
||||||
|
'Use "default" key as fallback. Model name must exist in agentModels. ' +
|
||||||
|
'Example: { "Explore": "deepseek-chat", "general-purpose": "gpt-4o", "default": "gpt-4o" }',
|
||||||
|
),
|
||||||
fastMode: z
|
fastMode: z
|
||||||
.boolean()
|
.boolean()
|
||||||
.optional()
|
.optional()
|
||||||
|
|||||||
Reference in New Issue
Block a user