Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
86bce4ae74 | ||
|
|
c13842e91c | ||
|
|
458120889f | ||
|
|
ee19159c17 | ||
|
|
13de4e85df | ||
|
|
a5bfcbbadf | ||
|
|
268c0398e4 | ||
|
|
761924daa7 | ||
|
|
e908864da7 | ||
|
|
b95d2221df | ||
|
|
2b15e16421 | ||
|
|
6a62e3ff76 | ||
|
|
06e7684eb5 | ||
|
|
ae3b723f3b | ||
|
|
a6a3de5ac1 | ||
|
|
64582c119d | ||
|
|
85eab2751e | ||
|
|
4d4fb2880e | ||
|
|
fdef4a1b4c | ||
|
|
4cb963e660 | ||
|
|
b09972f223 | ||
|
|
336ddcc50d |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -7,6 +7,7 @@ dist/
|
||||
.openclaude-profile.json
|
||||
reports/
|
||||
GEMINI.md
|
||||
CLAUDE.md
|
||||
package-lock.json
|
||||
/.claude
|
||||
coverage/
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
".": "0.5.1"
|
||||
".": "0.6.0"
|
||||
}
|
||||
|
||||
35
CHANGELOG.md
35
CHANGELOG.md
@@ -1,5 +1,40 @@
|
||||
# Changelog
|
||||
|
||||
## [0.6.0](https://github.com/Gitlawb/openclaude/compare/v0.5.2...v0.6.0) (2026-04-22)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* add model caching and benchmarking utilities ([#671](https://github.com/Gitlawb/openclaude/issues/671)) ([2b15e16](https://github.com/Gitlawb/openclaude/commit/2b15e16421f793f954a92c53933a07094544b29d))
|
||||
* add thinking token extraction ([#798](https://github.com/Gitlawb/openclaude/issues/798)) ([268c039](https://github.com/Gitlawb/openclaude/commit/268c0398e4bf1ab898069c61500a2b3c226a0322))
|
||||
* **api:** compress old tool_result content for small-context providers ([#801](https://github.com/Gitlawb/openclaude/issues/801)) ([a6a3de5](https://github.com/Gitlawb/openclaude/commit/a6a3de5ac155fe9d00befbfcab98d439314effd8))
|
||||
* **api:** improve local provider reliability with readiness and self-healing ([#738](https://github.com/Gitlawb/openclaude/issues/738)) ([4cb963e](https://github.com/Gitlawb/openclaude/commit/4cb963e660dbd6ee438c04042700db05a9d32c59))
|
||||
* **api:** smart model routing primitive (cheap-for-simple, strong-for-hard) ([#785](https://github.com/Gitlawb/openclaude/issues/785)) ([e908864](https://github.com/Gitlawb/openclaude/commit/e908864da7e7c987a98053ac5d18d702e192db2b))
|
||||
* enable 15 additional feature flags in open build ([#667](https://github.com/Gitlawb/openclaude/issues/667)) ([6a62e3f](https://github.com/Gitlawb/openclaude/commit/6a62e3ff76ba9ba446b8e20cf2bb139ee76a9387))
|
||||
* native Anthropic API mode for Claude models on GitHub Copilot ([#579](https://github.com/Gitlawb/openclaude/issues/579)) ([fdef4a1](https://github.com/Gitlawb/openclaude/commit/fdef4a1b4ce218ded4937ca83b30acce7c726472))
|
||||
* **provider:** expose Atomic Chat in /provider picker with autodetect ([#810](https://github.com/Gitlawb/openclaude/issues/810)) ([ee19159](https://github.com/Gitlawb/openclaude/commit/ee19159c17b3de3b4a8b4a4541a6569f4261d54e))
|
||||
* **provider:** zero-config autodetection primitive ([#784](https://github.com/Gitlawb/openclaude/issues/784)) ([a5bfcbb](https://github.com/Gitlawb/openclaude/commit/a5bfcbbadf8e9a1fd42f3e103d295524b8da64b0))
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* **api:** ensure strict role sequence and filter empty assistant messages after interruption ([#745](https://github.com/Gitlawb/openclaude/issues/745) regression) ([#794](https://github.com/Gitlawb/openclaude/issues/794)) ([06e7684](https://github.com/Gitlawb/openclaude/commit/06e7684eb56df8e694ac784575e163641931c44c))
|
||||
* Collapse all-text arrays to string for DeepSeek compatibility ([#806](https://github.com/Gitlawb/openclaude/issues/806)) ([761924d](https://github.com/Gitlawb/openclaude/commit/761924daa7e225fe8acf41651408c7cae639a511))
|
||||
* **model:** codex/nvidia-nim/minimax now read OPENAI_MODEL env ([#815](https://github.com/Gitlawb/openclaude/issues/815)) ([4581208](https://github.com/Gitlawb/openclaude/commit/458120889f6ce54cc9f0b287461d5e38eae48a20))
|
||||
* **provider:** saved profile ignored when stale CLAUDE_CODE_USE_* in shell ([#807](https://github.com/Gitlawb/openclaude/issues/807)) ([13de4e8](https://github.com/Gitlawb/openclaude/commit/13de4e85df7f5fadc8cd15a76076374dc112360b))
|
||||
* rename .claude.json to .openclaude.json with legacy fallback ([#582](https://github.com/Gitlawb/openclaude/issues/582)) ([4d4fb28](https://github.com/Gitlawb/openclaude/commit/4d4fb2880e4d0e3a62d8715e1ec13d932e736279))
|
||||
* replace discontinued gemini-2.5-pro-preview-03-25 with stable gemini-2.5-pro ([#802](https://github.com/Gitlawb/openclaude/issues/802)) ([64582c1](https://github.com/Gitlawb/openclaude/commit/64582c119d5d0278195271379da4a68d59a89c1f)), closes [#398](https://github.com/Gitlawb/openclaude/issues/398)
|
||||
* **security:** harden project settings trust boundary + MCP sanitization ([#789](https://github.com/Gitlawb/openclaude/issues/789)) ([ae3b723](https://github.com/Gitlawb/openclaude/commit/ae3b723f3b297b49925cada4728f3174aee8bf12))
|
||||
* **test:** autoCompact floor assertion is flag-sensitive ([#816](https://github.com/Gitlawb/openclaude/issues/816)) ([c13842e](https://github.com/Gitlawb/openclaude/commit/c13842e91c7227246520955de6ae0636b30def9a))
|
||||
* **ui:** prevent provider manager lag by deferring sync I/O ([#803](https://github.com/Gitlawb/openclaude/issues/803)) ([85eab27](https://github.com/Gitlawb/openclaude/commit/85eab2751e7d351bb0ed6a3fe0e15461d241c9cb))
|
||||
|
||||
## [0.5.2](https://github.com/Gitlawb/openclaude/compare/v0.5.1...v0.5.2) (2026-04-20)
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* **api:** replace phrase-based reasoning sanitizer with tag-based filter ([#779](https://github.com/Gitlawb/openclaude/issues/779)) ([336ddcc](https://github.com/Gitlawb/openclaude/commit/336ddcc50d59d79ebff50993f2673652aecb0d7d))
|
||||
|
||||
## [0.5.1](https://github.com/Gitlawb/openclaude/compare/v0.5.0...v0.5.1) (2026-04-20)
|
||||
|
||||
|
||||
|
||||
@@ -125,7 +125,7 @@ Advanced and source-build guides:
|
||||
| Codex OAuth | `/provider` | Opens ChatGPT sign-in in your browser and stores Codex credentials securely |
|
||||
| Codex | `/provider` | Uses existing Codex CLI auth, OpenClaude secure storage, or env credentials |
|
||||
| Ollama | `/provider`, env vars, or `ollama launch` | Local inference with no API key |
|
||||
| Atomic Chat | advanced setup | Local Apple Silicon backend |
|
||||
| Atomic Chat | `/provider`, env vars, or `bun run dev:atomic-chat` | Local Model Provider; auto-detects loaded models |
|
||||
| Bedrock / Vertex / Foundry | env vars | Additional provider integrations for supported environments |
|
||||
|
||||
## What Works
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@gitlawb/openclaude",
|
||||
"version": "0.5.1",
|
||||
"version": "0.6.0",
|
||||
"description": "Claude Code opened to any LLM — OpenAI, Gemini, DeepSeek, Ollama, and 200+ models",
|
||||
"type": "module",
|
||||
"bin": {
|
||||
|
||||
@@ -19,30 +19,46 @@ const version = pkg.version
|
||||
// Most Anthropic-internal features stay off; open-build features can be
|
||||
// selectively enabled here when their full source exists in the mirror.
|
||||
const featureFlags: Record<string, boolean> = {
|
||||
VOICE_MODE: false,
|
||||
PROACTIVE: false,
|
||||
KAIROS: false,
|
||||
BRIDGE_MODE: false,
|
||||
DAEMON: false,
|
||||
AGENT_TRIGGERS: false,
|
||||
MONITOR_TOOL: true,
|
||||
ABLATION_BASELINE: false,
|
||||
DUMP_SYSTEM_PROMPT: false,
|
||||
CACHED_MICROCOMPACT: false,
|
||||
COORDINATOR_MODE: true,
|
||||
BUILTIN_EXPLORE_PLAN_AGENTS: true,
|
||||
CONTEXT_COLLAPSE: false,
|
||||
COMMIT_ATTRIBUTION: false,
|
||||
TEAMMEM: true,
|
||||
UDS_INBOX: false,
|
||||
BG_SESSIONS: false,
|
||||
AWAY_SUMMARY: false,
|
||||
TRANSCRIPT_CLASSIFIER: false,
|
||||
WEB_BROWSER_TOOL: false,
|
||||
MESSAGE_ACTIONS: true,
|
||||
BUDDY: true,
|
||||
CHICAGO_MCP: false,
|
||||
COWORKER_TYPE_TELEMETRY: false,
|
||||
// ── Disabled: require Anthropic infrastructure or missing source ─────
|
||||
VOICE_MODE: false, // Push-to-talk STT via claude.ai OAuth endpoint
|
||||
PROACTIVE: false, // Autonomous agent mode (missing proactive/ module)
|
||||
KAIROS: false, // Persistent assistant/session mode (cloud backend)
|
||||
BRIDGE_MODE: false, // Remote desktop bridge via CCR infrastructure
|
||||
DAEMON: false, // Background daemon process (stubbed in open build)
|
||||
AGENT_TRIGGERS: false, // Scheduled remote agent triggers
|
||||
ABLATION_BASELINE: false, // A/B testing harness for eval experiments
|
||||
CONTEXT_COLLAPSE: false, // Context collapsing optimization (stubbed)
|
||||
COMMIT_ATTRIBUTION: false, // Co-Authored-By metadata in git commits
|
||||
UDS_INBOX: false, // Unix Domain Socket inter-session messaging
|
||||
BG_SESSIONS: false, // Background sessions via tmux (stubbed)
|
||||
WEB_BROWSER_TOOL: false, // Built-in browser automation (source not mirrored)
|
||||
CHICAGO_MCP: false, // Computer-use MCP (native Swift modules stubbed)
|
||||
COWORKER_TYPE_TELEMETRY: false, // Telemetry for agent/coworker type classification
|
||||
|
||||
// ── Enabled: upstream defaults ──────────────────────────────────────
|
||||
COORDINATOR_MODE: true, // Multi-agent coordinator with worker delegation
|
||||
BUILTIN_EXPLORE_PLAN_AGENTS: true, // Built-in Explore/Plan specialized subagents
|
||||
BUDDY: true, // Buddy mode for paired programming
|
||||
MONITOR_TOOL: true, // MCP server monitoring/streaming tool
|
||||
TEAMMEM: true, // Team memory management
|
||||
MESSAGE_ACTIONS: true, // Message action buttons in the UI
|
||||
|
||||
// ── Enabled: new activations ────────────────────────────────────────
|
||||
DUMP_SYSTEM_PROMPT: true, // --dump-system-prompt CLI flag for debugging
|
||||
CACHED_MICROCOMPACT: true, // Cache-aware tool result truncation optimization
|
||||
AWAY_SUMMARY: true, // "While you were away" recap after 5min blur
|
||||
TRANSCRIPT_CLASSIFIER: true, // Auto-approval classifier for safe tool uses
|
||||
ULTRATHINK: true, // Deep thinking mode — type "ultrathink" to boost reasoning
|
||||
TOKEN_BUDGET: true, // Token budget tracking with usage warnings
|
||||
HISTORY_PICKER: true, // Enhanced interactive prompt history picker
|
||||
QUICK_SEARCH: true, // Ctrl+G quick search across prompts
|
||||
SHOT_STATS: true, // Shot distribution stats in session summary
|
||||
EXTRACT_MEMORIES: true, // Auto-extract durable memories from conversations
|
||||
FORK_SUBAGENT: true, // Implicit context-forking when omitting subagent_type
|
||||
VERIFICATION_AGENT: true, // Built-in read-only agent for test/verification
|
||||
MCP_SKILLS: true, // Discover skills dynamically from MCP server resources
|
||||
PROMPT_CACHE_BREAK_DETECTION: true, // Detect & log unexpected prompt cache invalidations
|
||||
HOOK_PROMPTS: true, // Allow tools to request interactive user prompts
|
||||
}
|
||||
|
||||
// ── Pre-process: replace feature() calls with boolean literals ──────
|
||||
|
||||
@@ -50,6 +50,23 @@ describe('growthbook stub — local feature flag overrides', () => {
|
||||
expect(stub.getAllGrowthBookFeatures()).toEqual({})
|
||||
})
|
||||
|
||||
// ── Open-build defaults (_openBuildDefaults) ────────────────────
|
||||
|
||||
test('returns open-build default when flags file is absent', () => {
|
||||
// tengu_passport_quail is in _openBuildDefaults as true; without a
|
||||
// flags file the stub should return the open-build override, not
|
||||
// the call-site defaultValue.
|
||||
expect(stub.getFeatureValue_CACHED_MAY_BE_STALE('tengu_passport_quail', false)).toBe(true)
|
||||
expect(stub.getFeatureValue_CACHED_MAY_BE_STALE('tengu_coral_fern', false)).toBe(true)
|
||||
})
|
||||
|
||||
test('flags file overrides open-build defaults', () => {
|
||||
// User-provided feature-flags.json takes priority over _openBuildDefaults.
|
||||
writeFileSync(flagsFile, JSON.stringify({ tengu_passport_quail: false }))
|
||||
|
||||
expect(stub.getFeatureValue_CACHED_MAY_BE_STALE('tengu_passport_quail', true)).toBe(false)
|
||||
})
|
||||
|
||||
// ── Valid JSON object ────────────────────────────────────────────
|
||||
|
||||
test('loads and returns values from a valid JSON file', () => {
|
||||
|
||||
@@ -40,6 +40,151 @@ import _os from 'node:os';
|
||||
|
||||
let _flags = undefined;
|
||||
|
||||
// ── Open-build GrowthBook overrides ───────────────────────────────────
|
||||
// Override upstream defaultValue for runtime gates tied to build-time
|
||||
// features. Only keys that DIFFER from upstream belong here — the
|
||||
// catalog below is pure documentation and does NOT affect resolution.
|
||||
//
|
||||
// Priority: ~/.claude/feature-flags.json > _openBuildDefaults > defaultValue
|
||||
//
|
||||
// To override at runtime, create ~/.claude/feature-flags.json:
|
||||
// { "tengu_some_flag": true }
|
||||
const _openBuildDefaults = {
|
||||
'tengu_sedge_lantern': true, // AWAY_SUMMARY — "while you were away" recap (upstream: false)
|
||||
'tengu_hive_evidence': true, // VERIFICATION_AGENT — read-only test/verification agent (upstream: false)
|
||||
'tengu_passport_quail': true, // EXTRACT_MEMORIES — enable memory extraction (upstream: false)
|
||||
'tengu_coral_fern': true, // EXTRACT_MEMORIES — enable memory search in past context (upstream: false)
|
||||
};
|
||||
|
||||
/* ── Known runtime feature keys (reference only) ───────────────────────
|
||||
* This catalog does NOT participate in flag resolution. It documents
|
||||
* the known GrowthBook keys and their upstream default values, scraped
|
||||
* from src/ call sites. It is NOT exhaustive — new keys may be added
|
||||
* upstream between catalog updates.
|
||||
*
|
||||
* Some keys have different defaults at different call sites — this is
|
||||
* intentional upstream (the server unifies the value at runtime).
|
||||
*
|
||||
* To activate any of these, add them to ~/.claude/feature-flags.json
|
||||
* or to _openBuildDefaults above.
|
||||
*
|
||||
* ── Reasoning & thinking ──────────────────────────────────────────────
|
||||
* tengu_turtle_carbon = true ULTRATHINK deep thinking runtime gate
|
||||
* tengu_thinkback = gate /thinkback replay command
|
||||
*
|
||||
* ── Agents & orchestration ────────────────────────────────────────────
|
||||
* tengu_amber_flint = true Agent swarms coordination
|
||||
* tengu_amber_stoat = true Built-in agent availability (Explore, Plan, etc.)
|
||||
* tengu_agent_list_attach = true Attach file context to agent list
|
||||
* tengu_auto_background_agents = false Auto-spawn background agents
|
||||
* tengu_slim_subagent_claudemd = true Lighter ClaudeMD for subagents
|
||||
* tengu_hive_evidence = false Verification agent / evidence tracking (4 call sites)
|
||||
* tengu_ultraplan_model = model cfg ULTRAPLAN model selection (dynamic config)
|
||||
*
|
||||
* ── Memory & context ──────────────────────────────────────────────────
|
||||
* tengu_passport_quail = false EXTRACT_MEMORIES main gate (isExtractModeActive)
|
||||
* tengu_coral_fern = false EXTRACT_MEMORIES search in past context
|
||||
* tengu_slate_thimble = false Memory dir paths (non-interactive sessions)
|
||||
* tengu_herring_clock = true/false Team memory paths (varies by call site)
|
||||
* tengu_bramble_lintel = null Extract memories throttle (null → every turn)
|
||||
* tengu_sedge_lantern = false AWAY_SUMMARY "while you were away" recap
|
||||
* tengu_session_memory = false Session memory service
|
||||
* tengu_sm_config = {} Session memory config (dynamic)
|
||||
* tengu_sm_compact_config = {} Session memory compaction config (dynamic)
|
||||
* tengu_cobalt_raccoon = false Reactive compaction (suppress auto-compact)
|
||||
* tengu_pebble_leaf_prune = false Session storage pruning
|
||||
*
|
||||
* ── Kairos & cron ─────────────────────────────────────────────────────
|
||||
* tengu_kairos_brief = false Brief layout mode (KAIROS)
|
||||
* tengu_kairos_brief_config = {} Brief config (dynamic)
|
||||
* tengu_kairos_cron = true Cron scheduler enable
|
||||
* tengu_kairos_cron_durable = true Durable (disk-persistent) cron tasks
|
||||
* tengu_kairos_cron_config = {} Cron jitter config (dynamic)
|
||||
*
|
||||
* ── Bridge & remote (require Anthropic infra) ─────────────────────────
|
||||
* tengu_ccr_bridge = false CCR bridge connection
|
||||
* tengu_ccr_bridge_multi_session = gate Multi-session spawn mode
|
||||
* tengu_ccr_mirror = false CCR session mirroring
|
||||
* tengu_ccr_bundle_seed_enabled = gate Git bundle seeding for CCR
|
||||
* tengu_ccr_bundle_max_bytes = null Bundle size limit (null → default)
|
||||
* tengu_bridge_repl_v2 = false Environment-less REPL bridge v2
|
||||
* tengu_bridge_repl_v2_cse_shim_enabled = true CSE→Session tag retag shim
|
||||
* tengu_bridge_min_version = {min:'0'} Min CLI version for bridge (dynamic)
|
||||
* tengu_bridge_initial_history_cap = 200 Initial history cap for bridge
|
||||
* tengu_bridge_system_init = false Bridge system initialization
|
||||
* tengu_cobalt_harbor = false Auto-connect CCR at startup
|
||||
* tengu_cobalt_lantern = false Remote setup preconditions
|
||||
* tengu_remote_backend = false Remote TUI backend
|
||||
* tengu_surreal_dali = false Remote agent tasks / triggers
|
||||
*
|
||||
* ── Prompt & API ──────────────────────────────────────────────────────
|
||||
* tengu_attribution_header = true Attribution header in API requests
|
||||
* tengu_basalt_3kr = true MCP instructions delta
|
||||
* tengu_slate_prism = true/false Message formatting (varies by call site)
|
||||
* tengu_amber_prism = false Message content formatting
|
||||
* tengu_amber_json_tools = false JSON format for tool schemas
|
||||
* tengu_fgts = false API feature gates
|
||||
* tengu_otk_slot_v1 = false One-time key slots for API auth
|
||||
* tengu_cicada_nap_ms = 0 Background GrowthBook refresh throttle (ms)
|
||||
* tengu_miraculo_the_bard = false Service initialization gate
|
||||
* tengu_immediate_model_command = false Immediate /model command execution
|
||||
* tengu_chomp_inflection = false Prompt suggestions after responses
|
||||
* tengu_tool_pear = gate API betas for tool use
|
||||
* tengu-off-switch = {act:false} Service kill switch (dynamic; uses dash)
|
||||
*
|
||||
* ── Permissions & security ────────────────────────────────────────────
|
||||
* tengu_birch_trellis = true Bash auto-mode permissions config
|
||||
* tengu_auto_mode_config = {} Auto-mode configuration (dynamic, many call sites)
|
||||
* tengu_iron_gate_closed = true Permission iron gate (with refresh)
|
||||
* tengu_destructive_command_warning = false Warning for destructive bash commands
|
||||
* tengu_disable_bypass_permissions_mode = security Security killswitch (always false in open build)
|
||||
*
|
||||
* ── UI & UX ───────────────────────────────────────────────────────────
|
||||
* tengu_willow_mode = 'off' REPL rendering mode
|
||||
* tengu_terminal_panel = false Terminal panel keybinding
|
||||
* tengu_terminal_sidebar = false Terminal sidebar in REPL/config
|
||||
* tengu_marble_sandcastle = false Fast mode gate
|
||||
* tengu_jade_anvil_4 = false Rate limit options UI ordering
|
||||
* tengu_collage_kaleidoscope = true Native clipboard image paste (macOS)
|
||||
* tengu_lapis_finch = false Plugin/hint recommendation
|
||||
* tengu_lodestone_enabled = false Deep links claude-cli:// protocol
|
||||
* tengu_copper_panda = false Skill improvement suggestions
|
||||
* tengu_desktop_upsell = {} Desktop app upsell config (dynamic)
|
||||
* tengu-top-of-feed-tip = {} Emergency tip of feed (dynamic; uses dash)
|
||||
*
|
||||
* ── File operations ───────────────────────────────────────────────────
|
||||
* tengu_quartz_lantern = false File read/write dedup optimization
|
||||
* tengu_moth_copse = false Attachments handling (variant A)
|
||||
* tengu_marble_fox = false Attachments handling (variant B)
|
||||
* tengu_scratch = gate Scratchpad filesystem access / coordinator
|
||||
*
|
||||
* ── MCP & plugins ─────────────────────────────────────────────────────
|
||||
* tengu_harbor = false MCP channel allowlist verification
|
||||
* tengu_harbor_permissions = false MCP channel permissions enforcement
|
||||
* tengu_copper_bridge = false Chrome MCP bridge
|
||||
* tengu_chrome_auto_enable = false Auto-enable Chrome MCP on startup
|
||||
* tengu_glacier_2xr = false Enhanced tool search / ToolSearchTool
|
||||
* tengu_malort_pedway = {} Computer-use (Chicago) config (dynamic)
|
||||
*
|
||||
* ── VSCode / IDE ──────────────────────────────────────────────────────
|
||||
* tengu_quiet_fern = false VSCode browser support
|
||||
* tengu_vscode_cc_auth = false VSCode in-band OAuth via claude_authenticate
|
||||
* tengu_vscode_review_upsell = gate VSCode review upsell
|
||||
* tengu_vscode_onboarding = gate VSCode onboarding experience
|
||||
*
|
||||
* ── Voice ─────────────────────────────────────────────────────────────
|
||||
* tengu_amber_quartz_disabled = false VOICE_MODE kill-switch (false = voice allowed)
|
||||
*
|
||||
* ── Auto-updater (stubbed in open build) ──────────────────────────────
|
||||
* tengu_version_config = {min:'0'} Min version enforcement (dynamic)
|
||||
* tengu_max_version_config = {} Max version / deprecation config (dynamic)
|
||||
*
|
||||
* ── Telemetry & tracing ───────────────────────────────────────────────
|
||||
* tengu_trace_lantern = false Beta session tracing
|
||||
* tengu_chair_sermon = gate Analytics / message formatting gate
|
||||
* tengu_strap_foyer = false Settings sync to cloud
|
||||
*/
|
||||
|
||||
function _loadFlags() {
|
||||
if (_flags !== undefined) return;
|
||||
try {
|
||||
@@ -55,6 +200,7 @@ function _loadFlags() {
|
||||
function _getFlagValue(key, defaultValue) {
|
||||
_loadFlags();
|
||||
if (_flags != null && Object.hasOwn(_flags, key)) return _flags[key];
|
||||
if (Object.hasOwn(_openBuildDefaults, key)) return _openBuildDefaults[key];
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,23 @@ describe('formatReachabilityFailureDetail', () => {
|
||||
)
|
||||
})
|
||||
|
||||
test('redacts credentials and sensitive query parameters in endpoint details', () => {
|
||||
const detail = formatReachabilityFailureDetail(
|
||||
'http://user:pass@localhost:11434/v1/models?token=abc123&mode=test',
|
||||
502,
|
||||
'bad gateway',
|
||||
{
|
||||
transport: 'chat_completions',
|
||||
requestedModel: 'llama3.1:8b',
|
||||
resolvedModel: 'llama3.1:8b',
|
||||
},
|
||||
)
|
||||
|
||||
expect(detail).toBe(
|
||||
'Unexpected status 502 from http://redacted:redacted@localhost:11434/v1/models?token=redacted&mode=test. Body: bad gateway',
|
||||
)
|
||||
})
|
||||
|
||||
test('adds alias/entitlement hint for codex model support 400s', () => {
|
||||
const detail = formatReachabilityFailureDetail(
|
||||
'https://chatgpt.com/backend-api/codex/responses',
|
||||
|
||||
@@ -7,6 +7,11 @@ import {
|
||||
resolveProviderRequest,
|
||||
isLocalProviderUrl as isProviderLocalUrl,
|
||||
} from '../src/services/api/providerConfig.js'
|
||||
import {
|
||||
getLocalOpenAICompatibleProviderLabel,
|
||||
probeOllamaGenerationReadiness,
|
||||
} from '../src/utils/providerDiscovery.js'
|
||||
import { redactUrlForDisplay } from '../src/utils/urlRedaction.js'
|
||||
|
||||
type CheckResult = {
|
||||
ok: boolean
|
||||
@@ -69,7 +74,7 @@ export function formatReachabilityFailureDetail(
|
||||
},
|
||||
): string {
|
||||
const compactBody = responseBody.trim().replace(/\s+/g, ' ').slice(0, 240)
|
||||
const base = `Unexpected status ${status} from ${endpoint}.`
|
||||
const base = `Unexpected status ${status} from ${redactUrlForDisplay(endpoint)}.`
|
||||
const bodySuffix = compactBody ? ` Body: ${compactBody}` : ''
|
||||
|
||||
if (request.transport !== 'codex_responses' || status !== 400) {
|
||||
@@ -255,7 +260,7 @@ function checkOpenAIEnv(): CheckResult[] {
|
||||
results.push(pass('OPENAI_MODEL', process.env.OPENAI_MODEL))
|
||||
}
|
||||
|
||||
results.push(pass('OPENAI_BASE_URL', request.baseUrl))
|
||||
results.push(pass('OPENAI_BASE_URL', redactUrlForDisplay(request.baseUrl)))
|
||||
|
||||
if (request.transport === 'codex_responses') {
|
||||
const credentials = resolveCodexApiCredentials(process.env)
|
||||
@@ -308,7 +313,7 @@ async function checkBaseUrlReachability(): Promise<CheckResult> {
|
||||
return pass('Provider reachability', 'Skipped (OpenAI-compatible mode disabled).')
|
||||
}
|
||||
|
||||
if (useGithub) {
|
||||
if (useGithub && !useOpenAI) {
|
||||
return pass(
|
||||
'Provider reachability',
|
||||
'Skipped for GitHub Models (inference endpoint differs from OpenAI /models probe).',
|
||||
@@ -326,6 +331,7 @@ async function checkBaseUrlReachability(): Promise<CheckResult> {
|
||||
const endpoint = request.transport === 'codex_responses'
|
||||
? `${request.baseUrl}/responses`
|
||||
: `${request.baseUrl}/models`
|
||||
const redactedEndpoint = redactUrlForDisplay(endpoint)
|
||||
|
||||
const controller = new AbortController()
|
||||
const timeout = setTimeout(() => controller.abort(), 4000)
|
||||
@@ -375,7 +381,10 @@ async function checkBaseUrlReachability(): Promise<CheckResult> {
|
||||
})
|
||||
|
||||
if (response.status === 200 || response.status === 401 || response.status === 403) {
|
||||
return pass('Provider reachability', `Reached ${endpoint} (status ${response.status}).`)
|
||||
return pass(
|
||||
'Provider reachability',
|
||||
`Reached ${redactedEndpoint} (status ${response.status}).`,
|
||||
)
|
||||
}
|
||||
|
||||
const responseBody = await response.text().catch(() => '')
|
||||
@@ -391,12 +400,100 @@ async function checkBaseUrlReachability(): Promise<CheckResult> {
|
||||
)
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error)
|
||||
return fail('Provider reachability', `Failed to reach ${endpoint}: ${message}`)
|
||||
return fail(
|
||||
'Provider reachability',
|
||||
`Failed to reach ${redactedEndpoint}: ${message}`,
|
||||
)
|
||||
} finally {
|
||||
clearTimeout(timeout)
|
||||
}
|
||||
}
|
||||
|
||||
async function checkProviderGenerationReadiness(): Promise<CheckResult> {
|
||||
const useGemini = isTruthy(process.env.CLAUDE_CODE_USE_GEMINI)
|
||||
const useOpenAI = isTruthy(process.env.CLAUDE_CODE_USE_OPENAI)
|
||||
const useGithub = isTruthy(process.env.CLAUDE_CODE_USE_GITHUB)
|
||||
const useMistral = isTruthy(process.env.CLAUDE_CODE_USE_MISTRAL)
|
||||
|
||||
if (!useGemini && !useOpenAI && !useGithub && !useMistral) {
|
||||
return pass('Provider generation readiness', 'Skipped (OpenAI-compatible mode disabled).')
|
||||
}
|
||||
|
||||
if (useGithub && !useOpenAI) {
|
||||
return pass(
|
||||
'Provider generation readiness',
|
||||
'Skipped for GitHub Models (runtime generation uses a different endpoint flow).',
|
||||
)
|
||||
}
|
||||
|
||||
if (useGemini || useMistral) {
|
||||
return pass(
|
||||
'Provider generation readiness',
|
||||
'Skipped for managed provider mode.',
|
||||
)
|
||||
}
|
||||
|
||||
if (!useOpenAI) {
|
||||
return pass('Provider generation readiness', 'Skipped (OpenAI-compatible mode disabled).')
|
||||
}
|
||||
|
||||
const request = resolveProviderRequest({
|
||||
model: process.env.OPENAI_MODEL,
|
||||
baseUrl: process.env.OPENAI_BASE_URL,
|
||||
})
|
||||
|
||||
if (request.transport === 'codex_responses') {
|
||||
return pass(
|
||||
'Provider generation readiness',
|
||||
'Skipped for Codex responses (reachability probe already performs a lightweight generation request).',
|
||||
)
|
||||
}
|
||||
|
||||
if (!isLocalBaseUrl(request.baseUrl)) {
|
||||
return pass('Provider generation readiness', 'Skipped for non-local provider URL.')
|
||||
}
|
||||
|
||||
const localProviderLabel = getLocalOpenAICompatibleProviderLabel(request.baseUrl)
|
||||
if (localProviderLabel !== 'Ollama') {
|
||||
return pass(
|
||||
'Provider generation readiness',
|
||||
`Skipped for ${localProviderLabel} (no provider-specific generation probe).`,
|
||||
)
|
||||
}
|
||||
|
||||
const readiness = await probeOllamaGenerationReadiness({
|
||||
baseUrl: request.baseUrl,
|
||||
model: request.requestedModel,
|
||||
})
|
||||
|
||||
if (readiness.state === 'ready') {
|
||||
return pass(
|
||||
'Provider generation readiness',
|
||||
`Generated a test response with ${readiness.probeModel ?? request.requestedModel}.`,
|
||||
)
|
||||
}
|
||||
|
||||
if (readiness.state === 'unreachable') {
|
||||
return fail(
|
||||
'Provider generation readiness',
|
||||
`Could not reach Ollama at ${redactUrlForDisplay(request.baseUrl)}.`,
|
||||
)
|
||||
}
|
||||
|
||||
if (readiness.state === 'no_models') {
|
||||
return fail(
|
||||
'Provider generation readiness',
|
||||
'Ollama is reachable, but no installed models were found. Pull a model first (for example: ollama pull qwen2.5-coder:7b).',
|
||||
)
|
||||
}
|
||||
|
||||
const detailSuffix = readiness.detail ? ` Detail: ${readiness.detail}.` : ''
|
||||
return fail(
|
||||
'Provider generation readiness',
|
||||
`Ollama is reachable, but generation failed for ${readiness.probeModel ?? request.requestedModel}.${detailSuffix}`,
|
||||
)
|
||||
}
|
||||
|
||||
function isAtomicChatUrl(baseUrl: string): boolean {
|
||||
try {
|
||||
const parsed = new URL(baseUrl)
|
||||
@@ -567,6 +664,7 @@ async function main(): Promise<void> {
|
||||
results.push(checkBuildArtifacts())
|
||||
results.push(...checkOpenAIEnv())
|
||||
results.push(await checkBaseUrlReachability())
|
||||
results.push(await checkProviderGenerationReadiness())
|
||||
results.push(checkOllamaProcessorMode())
|
||||
|
||||
if (!options.json) {
|
||||
|
||||
191
src/__tests__/security-hardening.test.ts
Normal file
191
src/__tests__/security-hardening.test.ts
Normal file
@@ -0,0 +1,191 @@
|
||||
/**
|
||||
* Security hardening regression tests.
|
||||
*
|
||||
* Covers:
|
||||
* 1. MCP tool result Unicode sanitization
|
||||
* 2. Sandbox settings source filtering (exclude projectSettings)
|
||||
* 3. Plugin git clone/pull hooks disabled
|
||||
* 4. ANTHROPIC_FOUNDRY_API_KEY removed from SAFE_ENV_VARS
|
||||
* 5. WebFetch SSRF protection via ssrfGuardedLookup
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test'
|
||||
import { resolve } from 'path'
|
||||
|
||||
const SRC = resolve(import.meta.dir, '..')
|
||||
const file = (relative: string) => Bun.file(resolve(SRC, relative))
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fix 1: MCP tool result Unicode sanitization
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('MCP tool result sanitization', () => {
|
||||
test('transformResultContent sanitizes text content', async () => {
|
||||
const content = await file('services/mcp/client.ts').text()
|
||||
// Tool definitions are already sanitized (line ~1798)
|
||||
expect(content).toContain('recursivelySanitizeUnicode(result.tools)')
|
||||
// Tool results must also be sanitized
|
||||
expect(content).toMatch(
|
||||
/case 'text':[\s\S]*?recursivelySanitizeUnicode\(resultContent\.text\)/,
|
||||
)
|
||||
})
|
||||
|
||||
test('resource text content is also sanitized', async () => {
|
||||
const content = await file('services/mcp/client.ts').text()
|
||||
expect(content).toMatch(
|
||||
/recursivelySanitizeUnicode\(\s*`\$\{prefix\}\$\{resource\.text\}`/,
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fix 2: Sandbox settings source filtering
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('Sandbox settings trust boundary', () => {
|
||||
test('getSandboxEnabledSetting does not use getSettings_DEPRECATED', async () => {
|
||||
const content = await file('utils/sandbox/sandbox-adapter.ts').text()
|
||||
// Extract the getSandboxEnabledSetting function body
|
||||
const fnMatch = content.match(
|
||||
/function getSandboxEnabledSetting\(\)[^{]*\{([\s\S]*?)\n\}/,
|
||||
)
|
||||
expect(fnMatch).not.toBeNull()
|
||||
const fnBody = fnMatch![1]
|
||||
// Must NOT use getSettings_DEPRECATED (reads all sources including project)
|
||||
expect(fnBody).not.toContain('getSettings_DEPRECATED')
|
||||
// Must use getSettingsForSource for individual trusted sources
|
||||
expect(fnBody).toContain("getSettingsForSource('userSettings')")
|
||||
expect(fnBody).toContain("getSettingsForSource('policySettings')")
|
||||
// Must NOT read from projectSettings
|
||||
expect(fnBody).not.toContain("'projectSettings'")
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fix 3: Plugin git hooks disabled
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('Plugin git operations disable hooks', () => {
|
||||
test('gitClone includes core.hooksPath=/dev/null', async () => {
|
||||
const content = await file('utils/plugins/marketplaceManager.ts').text()
|
||||
// The clone args must disable hooks
|
||||
const cloneSection = content.slice(
|
||||
content.indexOf('export async function gitClone('),
|
||||
content.indexOf('export async function gitClone(') + 2000,
|
||||
)
|
||||
expect(cloneSection).toContain("'core.hooksPath=/dev/null'")
|
||||
})
|
||||
|
||||
test('gitPull includes core.hooksPath=/dev/null', async () => {
|
||||
const content = await file('utils/plugins/marketplaceManager.ts').text()
|
||||
const pullSection = content.slice(
|
||||
content.indexOf('export async function gitPull('),
|
||||
content.indexOf('export async function gitPull(') + 2000,
|
||||
)
|
||||
expect(pullSection).toContain("'core.hooksPath=/dev/null'")
|
||||
})
|
||||
|
||||
test('gitSubmoduleUpdate includes core.hooksPath=/dev/null', async () => {
|
||||
const content = await file('utils/plugins/marketplaceManager.ts').text()
|
||||
const subSection = content.slice(
|
||||
content.indexOf('async function gitSubmoduleUpdate('),
|
||||
content.indexOf('async function gitSubmoduleUpdate(') + 1000,
|
||||
)
|
||||
expect(subSection).toContain("'core.hooksPath=/dev/null'")
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fix 4: ANTHROPIC_FOUNDRY_API_KEY not in SAFE_ENV_VARS
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('SAFE_ENV_VARS excludes credentials', () => {
|
||||
test('ANTHROPIC_FOUNDRY_API_KEY is not in SAFE_ENV_VARS', async () => {
|
||||
const content = await file('utils/managedEnvConstants.ts').text()
|
||||
// Extract the SAFE_ENV_VARS set definition
|
||||
const safeStart = content.indexOf('export const SAFE_ENV_VARS')
|
||||
const safeEnd = content.indexOf('])', safeStart)
|
||||
const safeSection = content.slice(safeStart, safeEnd)
|
||||
expect(safeSection).not.toContain('ANTHROPIC_FOUNDRY_API_KEY')
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fix 5: WebFetch SSRF protection
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('WebFetch SSRF guard', () => {
|
||||
test('getWithPermittedRedirects uses ssrfGuardedLookup', async () => {
|
||||
const content = await file('tools/WebFetchTool/utils.ts').text()
|
||||
expect(content).toContain(
|
||||
"import { ssrfGuardedLookup } from '../../utils/hooks/ssrfGuard.js'",
|
||||
)
|
||||
// The axios.get call in getWithPermittedRedirects must include lookup
|
||||
const fnSection = content.slice(
|
||||
content.indexOf('export async function getWithPermittedRedirects('),
|
||||
content.indexOf('export async function getWithPermittedRedirects(') +
|
||||
1000,
|
||||
)
|
||||
expect(fnSection).toContain('lookup: ssrfGuardedLookup')
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fix 6: Swarm permission file polling removed (security hardening)
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('Swarm permission file polling removed', () => {
|
||||
test('useSwarmPermissionPoller hook no longer exists', async () => {
|
||||
const content = await file(
|
||||
'hooks/useSwarmPermissionPoller.ts',
|
||||
).text()
|
||||
// The file-based polling hook must not exist — it read from an
|
||||
// unauthenticated resolved/ directory where any local process could
|
||||
// forge approval files.
|
||||
expect(content).not.toContain('function useSwarmPermissionPoller(')
|
||||
// The file-based processResponse must not exist
|
||||
expect(content).not.toContain('function processResponse(')
|
||||
})
|
||||
|
||||
test('poller does not import from permissionSync', async () => {
|
||||
const content = await file(
|
||||
'hooks/useSwarmPermissionPoller.ts',
|
||||
).text()
|
||||
// Must not import anything from permissionSync — all file-based
|
||||
// functions have been removed from this module's dependencies
|
||||
expect(content).not.toContain('permissionSync')
|
||||
})
|
||||
|
||||
test('file-based permission functions are marked deprecated', async () => {
|
||||
const content = await file(
|
||||
'utils/swarm/permissionSync.ts',
|
||||
).text()
|
||||
// All file-based functions must have @deprecated JSDoc
|
||||
const deprecatedFns = [
|
||||
'writePermissionRequest',
|
||||
'readPendingPermissions',
|
||||
'readResolvedPermission',
|
||||
'resolvePermission',
|
||||
'pollForResponse',
|
||||
'removeWorkerResponse',
|
||||
]
|
||||
for (const fn of deprecatedFns) {
|
||||
// Find the function and check that @deprecated appears before it
|
||||
const fnIndex = content.indexOf(`export async function ${fn}(`)
|
||||
if (fnIndex === -1) continue // submitPermissionRequest is a const, not async function
|
||||
const preceding = content.slice(Math.max(0, fnIndex - 500), fnIndex)
|
||||
expect(preceding).toContain('@deprecated')
|
||||
}
|
||||
})
|
||||
|
||||
test('mailbox-based functions are NOT deprecated', async () => {
|
||||
const content = await file(
|
||||
'utils/swarm/permissionSync.ts',
|
||||
).text()
|
||||
// These are the active path — must not be deprecated
|
||||
const activeFns = [
|
||||
'sendPermissionRequestViaMailbox',
|
||||
'sendPermissionResponseViaMailbox',
|
||||
]
|
||||
for (const fn of activeFns) {
|
||||
const fnIndex = content.indexOf(`export async function ${fn}(`)
|
||||
expect(fnIndex).not.toBe(-1)
|
||||
const preceding = content.slice(Math.max(0, fnIndex - 300), fnIndex)
|
||||
expect(preceding).not.toContain('@deprecated')
|
||||
}
|
||||
})
|
||||
})
|
||||
56
src/commands/benchmark.ts
Normal file
56
src/commands/benchmark.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import type { ToolUseContext } from '../Tool.js'
|
||||
import type { Command } from '../types/command.js'
|
||||
import {
|
||||
benchmarkModel,
|
||||
benchmarkMultipleModels,
|
||||
formatBenchmarkResults,
|
||||
isBenchmarkSupported,
|
||||
} from '../utils/model/benchmark.js'
|
||||
import { getOllamaModelOptions } from '../utils/model/ollamaModels.js'
|
||||
|
||||
async function runBenchmark(
|
||||
model?: string,
|
||||
context?: ToolUseContext,
|
||||
): Promise<void> {
|
||||
if (!isBenchmarkSupported()) {
|
||||
context?.stdout?.write(
|
||||
'Benchmark not supported for this provider.\n' +
|
||||
'Supported: OpenAI-compatible endpoints (Ollama, NVIDIA NIM, MiniMax)\n',
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
let modelsToBenchmark: string[]
|
||||
|
||||
if (model) {
|
||||
modelsToBenchmark = [model]
|
||||
} else {
|
||||
const ollamaModels = getOllamaModelOptions()
|
||||
modelsToBenchmark = ollamaModels.slice(0, 3).map((m) => m.value)
|
||||
}
|
||||
|
||||
context?.stdout?.write(`Benchmarking ${modelsToBenchmark.length} model(s)...\n`)
|
||||
|
||||
const results = await benchmarkMultipleModels(
|
||||
modelsToBenchmark,
|
||||
(completed, total, result) => {
|
||||
context?.stdout?.write(
|
||||
`[${completed}/${total}] ${result.model}: ` +
|
||||
`${result.success ? result.tokensPerSecond.toFixed(1) + ' tps' : 'FAILED'}\n`,
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
context?.stdout?.write('\n' + formatBenchmarkResults(results) + '\n')
|
||||
}
|
||||
|
||||
export const benchmark: Command = {
|
||||
name: 'benchmark',
|
||||
|
||||
async onExecute(context: ToolUseContext): Promise<void> {
|
||||
const args = context.args ?? {}
|
||||
const model = args.model as string | undefined
|
||||
|
||||
await runBenchmark(model, context)
|
||||
},
|
||||
}
|
||||
@@ -66,10 +66,44 @@ import {
|
||||
import {
|
||||
getOllamaChatBaseUrl,
|
||||
getLocalOpenAICompatibleProviderLabel,
|
||||
hasLocalOllama,
|
||||
listOllamaModels,
|
||||
probeOllamaGenerationReadiness,
|
||||
type OllamaGenerationReadiness,
|
||||
} from '../../utils/providerDiscovery.js'
|
||||
|
||||
function describeOllamaReadinessIssue(
|
||||
readiness: OllamaGenerationReadiness,
|
||||
options?: {
|
||||
baseUrl?: string
|
||||
allowManualFallback?: boolean
|
||||
},
|
||||
): string {
|
||||
const endpoint = options?.baseUrl ?? 'http://localhost:11434'
|
||||
|
||||
if (readiness.state === 'unreachable') {
|
||||
return `Could not reach Ollama at ${endpoint}. Start Ollama first, then run /provider again.`
|
||||
}
|
||||
|
||||
if (readiness.state === 'no_models') {
|
||||
const manualSuffix = options?.allowManualFallback
|
||||
? ', or enter details manually'
|
||||
: ''
|
||||
return `Ollama is running, but no installed models were found. Pull a chat model such as qwen2.5-coder:7b or llama3.1:8b first${manualSuffix}.`
|
||||
}
|
||||
|
||||
if (readiness.state === 'generation_failed') {
|
||||
const modelHint = readiness.probeModel ?? 'the selected model'
|
||||
const detailSuffix = readiness.detail
|
||||
? ` Details: ${readiness.detail}.`
|
||||
: ''
|
||||
const manualSuffix = options?.allowManualFallback
|
||||
? ' You can also enter details manually.'
|
||||
: ''
|
||||
return `Ollama is reachable and models are installed, but a generation probe failed for ${modelHint}.${detailSuffix} Run "ollama run ${modelHint}" once and retry.${manualSuffix}`
|
||||
}
|
||||
|
||||
return ''
|
||||
}
|
||||
|
||||
type ProviderChoice = 'auto' | ProviderProfile | 'codex-oauth' | 'clear'
|
||||
|
||||
type Step =
|
||||
@@ -715,6 +749,7 @@ function AutoRecommendationStep({
|
||||
| {
|
||||
state: 'openai'
|
||||
defaultModel: string
|
||||
reason: string
|
||||
}
|
||||
| {
|
||||
state: 'error'
|
||||
@@ -728,19 +763,27 @@ function AutoRecommendationStep({
|
||||
void (async () => {
|
||||
const defaultModel = getGoalDefaultOpenAIModel(goal)
|
||||
try {
|
||||
const ollamaAvailable = await hasLocalOllama()
|
||||
if (!ollamaAvailable) {
|
||||
const readiness = await probeOllamaGenerationReadiness()
|
||||
if (readiness.state !== 'ready') {
|
||||
if (!cancelled) {
|
||||
setStatus({ state: 'openai', defaultModel })
|
||||
setStatus({
|
||||
state: 'openai',
|
||||
defaultModel,
|
||||
reason: describeOllamaReadinessIssue(readiness),
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
const models = await listOllamaModels()
|
||||
const recommended = recommendOllamaModel(models, goal)
|
||||
const recommended = recommendOllamaModel(readiness.models, goal)
|
||||
if (!recommended) {
|
||||
if (!cancelled) {
|
||||
setStatus({ state: 'openai', defaultModel })
|
||||
setStatus({
|
||||
state: 'openai',
|
||||
defaultModel,
|
||||
reason:
|
||||
'Ollama responded to a generation probe, but no recommended chat model matched this goal.',
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -796,10 +839,10 @@ function AutoRecommendationStep({
|
||||
<Dialog title="Auto setup fallback" onCancel={onCancel}>
|
||||
<Box flexDirection="column" gap={1}>
|
||||
<Text>
|
||||
No viable local Ollama chat model was detected. Auto setup can
|
||||
continue into OpenAI-compatible setup with a default model of{' '}
|
||||
Auto setup can continue into OpenAI-compatible setup with a default model of{' '}
|
||||
{status.defaultModel}.
|
||||
</Text>
|
||||
<Text dimColor>{status.reason}</Text>
|
||||
<Select
|
||||
options={[
|
||||
{ label: 'Continue to OpenAI-compatible setup', value: 'continue' },
|
||||
@@ -883,32 +926,19 @@ function OllamaModelStep({
|
||||
let cancelled = false
|
||||
|
||||
void (async () => {
|
||||
const available = await hasLocalOllama()
|
||||
if (!available) {
|
||||
const readiness = await probeOllamaGenerationReadiness()
|
||||
if (readiness.state !== 'ready') {
|
||||
if (!cancelled) {
|
||||
setStatus({
|
||||
state: 'unavailable',
|
||||
message:
|
||||
'Could not reach Ollama at http://localhost:11434. Start Ollama first, then run /provider again.',
|
||||
message: describeOllamaReadinessIssue(readiness),
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
const models = await listOllamaModels()
|
||||
if (models.length === 0) {
|
||||
if (!cancelled) {
|
||||
setStatus({
|
||||
state: 'unavailable',
|
||||
message:
|
||||
'Ollama is running, but no installed models were found. Pull a chat model such as qwen2.5-coder:7b or llama3.1:8b first.',
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
const ranked = rankOllamaModels(models, 'balanced')
|
||||
const recommended = recommendOllamaModel(models, 'balanced')
|
||||
const ranked = rankOllamaModels(readiness.models, 'balanced')
|
||||
const recommended = recommendOllamaModel(readiness.models, 'balanced')
|
||||
if (!cancelled) {
|
||||
setStatus({
|
||||
state: 'ready',
|
||||
|
||||
@@ -112,8 +112,10 @@ test('third-party provider branch opens the first-run provider manager', async (
|
||||
)
|
||||
|
||||
expect(output).toContain('Set up provider')
|
||||
// Use alphabetically-early sentinels so they remain visible in the
|
||||
// 13-row test frame after the provider list was sorted A→Z.
|
||||
expect(output).toContain('Anthropic')
|
||||
expect(output).toContain('OpenAI')
|
||||
expect(output).toContain('Ollama')
|
||||
expect(output).toContain('LM Studio')
|
||||
expect(output).toContain('Azure OpenAI')
|
||||
expect(output).toContain('DeepSeek')
|
||||
expect(output).toContain('Google Gemini')
|
||||
})
|
||||
|
||||
@@ -97,6 +97,47 @@ async function waitForCondition(
|
||||
throw new Error('Timed out waiting for ProviderManager test condition')
|
||||
}
|
||||
|
||||
// Provider list is sorted alphabetically by label in the preset picker, so
|
||||
// reaching a given provider takes more keypresses than it used to. Keep the
|
||||
// target-by-label indirection here so these tests survive future list edits
|
||||
// without further churn.
|
||||
//
|
||||
// Order matches ProviderManager.renderPresetSelection() when
|
||||
// canUseCodexOAuth === true (default in mocked tests).
|
||||
const PRESET_ORDER = [
|
||||
'Alibaba Coding Plan',
|
||||
'Alibaba Coding Plan (China)',
|
||||
'Anthropic',
|
||||
'Atomic Chat',
|
||||
'Azure OpenAI',
|
||||
'Codex OAuth',
|
||||
'DeepSeek',
|
||||
'Google Gemini',
|
||||
'Groq',
|
||||
'LM Studio',
|
||||
'MiniMax',
|
||||
'Mistral',
|
||||
'Moonshot AI',
|
||||
'NVIDIA NIM',
|
||||
'Ollama',
|
||||
'OpenAI',
|
||||
'OpenRouter',
|
||||
'Together AI',
|
||||
'Custom',
|
||||
] as const
|
||||
|
||||
async function navigateToPreset(
|
||||
stdin: { write: (data: string) => void },
|
||||
label: (typeof PRESET_ORDER)[number],
|
||||
): Promise<void> {
|
||||
const index = PRESET_ORDER.indexOf(label)
|
||||
if (index < 0) throw new Error(`Unknown preset label: ${label}`)
|
||||
for (let i = 0; i < index; i++) {
|
||||
stdin.write('j')
|
||||
await Bun.sleep(25)
|
||||
}
|
||||
}
|
||||
|
||||
function createDeferred<T>(): {
|
||||
promise: Promise<T>
|
||||
resolve: (value: T) => void
|
||||
@@ -149,17 +190,21 @@ function mockProviderManagerDependencies(
|
||||
applySavedProfileToCurrentSession?: (...args: unknown[]) => Promise<string | null>
|
||||
clearCodexCredentials?: () => { success: boolean; warning?: string }
|
||||
getProviderProfiles?: () => unknown[]
|
||||
hasLocalOllama?: () => Promise<boolean>
|
||||
listOllamaModels?: () => Promise<
|
||||
Array<{
|
||||
probeOllamaGenerationReadiness?: () => Promise<{
|
||||
state: 'ready' | 'unreachable' | 'no_models' | 'generation_failed'
|
||||
models: Array<
|
||||
{
|
||||
name: string
|
||||
sizeBytes?: number | null
|
||||
family?: string | null
|
||||
families?: string[]
|
||||
parameterSize?: string | null
|
||||
quantizationLevel?: string | null
|
||||
}>
|
||||
}
|
||||
>
|
||||
probeModel?: string
|
||||
detail?: string
|
||||
}>
|
||||
codexSyncRead?: () => unknown
|
||||
codexAsyncRead?: () => Promise<unknown>
|
||||
updateProviderProfile?: (...args: unknown[]) => unknown
|
||||
@@ -189,8 +234,12 @@ function mockProviderManagerDependencies(
|
||||
})
|
||||
|
||||
mock.module('../utils/providerDiscovery.js', () => ({
|
||||
hasLocalOllama: options?.hasLocalOllama ?? (async () => false),
|
||||
listOllamaModels: options?.listOllamaModels ?? (async () => []),
|
||||
probeOllamaGenerationReadiness:
|
||||
options?.probeOllamaGenerationReadiness ??
|
||||
(async () => ({
|
||||
state: 'unreachable' as const,
|
||||
models: [],
|
||||
})),
|
||||
}))
|
||||
|
||||
mock.module('../utils/githubModelsCredentials.js', () => ({
|
||||
@@ -455,8 +504,9 @@ test('ProviderManager first-run Ollama preset auto-detects installed models', as
|
||||
async () => undefined,
|
||||
{
|
||||
addProviderProfile,
|
||||
hasLocalOllama: async () => true,
|
||||
listOllamaModels: async () => [
|
||||
probeOllamaGenerationReadiness: async () => ({
|
||||
state: 'ready',
|
||||
models: [
|
||||
{
|
||||
name: 'gemma4:31b-cloud',
|
||||
family: 'gemma',
|
||||
@@ -468,6 +518,8 @@ test('ProviderManager first-run Ollama preset auto-detects installed models', as
|
||||
parameterSize: '2.5b',
|
||||
},
|
||||
],
|
||||
probeModel: 'gemma4:31b-cloud',
|
||||
}),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -480,11 +532,10 @@ test('ProviderManager first-run Ollama preset auto-detects installed models', as
|
||||
|
||||
await waitForFrameOutput(
|
||||
mounted.getOutput,
|
||||
frame => frame.includes('Set up provider') && frame.includes('Ollama'),
|
||||
frame => frame.includes('Set up provider'),
|
||||
)
|
||||
|
||||
mounted.stdin.write('j')
|
||||
await Bun.sleep(50)
|
||||
await navigateToPreset(mounted.stdin, 'Ollama')
|
||||
mounted.stdin.write('\r')
|
||||
|
||||
const modelFrame = await waitForFrameOutput(
|
||||
@@ -579,12 +630,7 @@ test('ProviderManager first-run Codex OAuth switches the current session after l
|
||||
frame => frame.includes('Set up provider') && frame.includes('Codex OAuth'),
|
||||
)
|
||||
|
||||
mounted.stdin.write('j')
|
||||
await Bun.sleep(25)
|
||||
mounted.stdin.write('j')
|
||||
await Bun.sleep(25)
|
||||
mounted.stdin.write('j')
|
||||
await Bun.sleep(25)
|
||||
await navigateToPreset(mounted.stdin, 'Codex OAuth')
|
||||
mounted.stdin.write('\r')
|
||||
|
||||
await waitForCondition(() => onDone.mock.calls.length > 0)
|
||||
@@ -676,12 +722,7 @@ test('ProviderManager first-run Codex OAuth reports next-startup fallback when s
|
||||
frame => frame.includes('Set up provider') && frame.includes('Codex OAuth'),
|
||||
)
|
||||
|
||||
mounted.stdin.write('j')
|
||||
await Bun.sleep(25)
|
||||
mounted.stdin.write('j')
|
||||
await Bun.sleep(25)
|
||||
mounted.stdin.write('j')
|
||||
await Bun.sleep(25)
|
||||
await navigateToPreset(mounted.stdin, 'Codex OAuth')
|
||||
mounted.stdin.write('\r')
|
||||
|
||||
await waitForCondition(() => onDone.mock.calls.length > 0)
|
||||
@@ -775,12 +816,7 @@ test('ProviderManager does not hijack a manual Codex profile when OAuth credenti
|
||||
frame => frame.includes('Set up provider') && frame.includes('Codex OAuth'),
|
||||
)
|
||||
|
||||
mounted.stdin.write('j')
|
||||
await Bun.sleep(25)
|
||||
mounted.stdin.write('j')
|
||||
await Bun.sleep(25)
|
||||
mounted.stdin.write('j')
|
||||
await Bun.sleep(25)
|
||||
await navigateToPreset(mounted.stdin, 'Codex OAuth')
|
||||
mounted.stdin.write('\r')
|
||||
|
||||
await waitForCondition(() => onDone.mock.calls.length > 0)
|
||||
|
||||
@@ -37,13 +37,16 @@ import {
|
||||
readGithubModelsTokenAsync,
|
||||
} from '../utils/githubModelsCredentials.js'
|
||||
import {
|
||||
hasLocalOllama,
|
||||
listOllamaModels,
|
||||
probeAtomicChatReadiness,
|
||||
probeOllamaGenerationReadiness,
|
||||
type AtomicChatReadiness,
|
||||
type OllamaGenerationReadiness,
|
||||
} from '../utils/providerDiscovery.js'
|
||||
import {
|
||||
rankOllamaModels,
|
||||
recommendOllamaModel,
|
||||
} from '../utils/providerRecommendation.js'
|
||||
import { redactUrlForDisplay } from '../utils/urlRedaction.js'
|
||||
import { updateSettingsForSource } from '../utils/settings/settings.js'
|
||||
import {
|
||||
type OptionWithDescription,
|
||||
@@ -52,7 +55,6 @@ import {
|
||||
import { Pane } from './design-system/Pane.js'
|
||||
import TextInput from './TextInput.js'
|
||||
import { useCodexOAuthFlow } from './useCodexOAuthFlow.js'
|
||||
import { useSetAppState } from '../state/AppState.js'
|
||||
|
||||
export type ProviderManagerResult = {
|
||||
action: 'saved' | 'cancelled'
|
||||
@@ -69,6 +71,7 @@ type Screen =
|
||||
| 'menu'
|
||||
| 'select-preset'
|
||||
| 'select-ollama-model'
|
||||
| 'select-atomic-chat-model'
|
||||
| 'codex-oauth'
|
||||
| 'form'
|
||||
| 'select-active'
|
||||
@@ -89,6 +92,16 @@ type OllamaSelectionState =
|
||||
}
|
||||
| { state: 'unavailable'; message: string }
|
||||
|
||||
type AtomicChatSelectionState =
|
||||
| { state: 'idle' }
|
||||
| { state: 'loading' }
|
||||
| {
|
||||
state: 'ready'
|
||||
options: OptionWithDescription<string>[]
|
||||
defaultValue?: string
|
||||
}
|
||||
| { state: 'unavailable'; message: string }
|
||||
|
||||
const FORM_STEPS: Array<{
|
||||
key: DraftField
|
||||
label: string
|
||||
@@ -222,6 +235,44 @@ function getGithubProviderSummary(
|
||||
return `github-models · ${GITHUB_PROVIDER_DEFAULT_BASE_URL} · ${getGithubProviderModel(processEnv)} · ${credentialSummary}${activeSuffix}`
|
||||
}
|
||||
|
||||
function describeAtomicChatSelectionIssue(
|
||||
readiness: AtomicChatReadiness,
|
||||
baseUrl: string,
|
||||
): string {
|
||||
if (readiness.state === 'unreachable') {
|
||||
return `Could not reach Atomic Chat at ${redactUrlForDisplay(baseUrl)}. Start the Atomic Chat app first, or enter the endpoint manually.`
|
||||
}
|
||||
|
||||
if (readiness.state === 'no_models') {
|
||||
return 'Atomic Chat is running, but no models are loaded. Download and load a model inside the Atomic Chat app first, or enter details manually.'
|
||||
}
|
||||
|
||||
return ''
|
||||
}
|
||||
|
||||
function describeOllamaSelectionIssue(
|
||||
readiness: OllamaGenerationReadiness,
|
||||
baseUrl: string,
|
||||
): string {
|
||||
if (readiness.state === 'unreachable') {
|
||||
return `Could not reach Ollama at ${redactUrlForDisplay(baseUrl)}. Start Ollama first, or enter the endpoint manually.`
|
||||
}
|
||||
|
||||
if (readiness.state === 'no_models') {
|
||||
return 'Ollama is running, but no installed models were found. Pull a chat model such as qwen2.5-coder:7b or llama3.1:8b first, or enter details manually.'
|
||||
}
|
||||
|
||||
if (readiness.state === 'generation_failed') {
|
||||
const modelHint = readiness.probeModel ?? 'the selected model'
|
||||
const detailSuffix = readiness.detail
|
||||
? ` Details: ${readiness.detail}.`
|
||||
: ''
|
||||
return `Ollama is reachable and models are installed, but a generation probe failed for ${modelHint}.${detailSuffix} Run "ollama run ${modelHint}" once and retry, or enter details manually.`
|
||||
}
|
||||
|
||||
return ''
|
||||
}
|
||||
|
||||
function findCodexOAuthProfile(
|
||||
profiles: ProviderProfile[],
|
||||
profileId?: string,
|
||||
@@ -333,10 +384,12 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
const initialIsGithubActive = isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB)
|
||||
const initialHasGithubCredential = initialGithubCredentialSource !== 'none'
|
||||
|
||||
const [profiles, setProfiles] = React.useState(() => getProviderProfiles())
|
||||
const [activeProfileId, setActiveProfileId] = React.useState(
|
||||
() => getActiveProviderProfile()?.id,
|
||||
)
|
||||
// Deferred initialization: useState initializers run synchronously during
|
||||
// render, so getProviderProfiles() and getActiveProviderProfile() would block
|
||||
// the UI on first mount (sync file I/O). Use empty initial values and load
|
||||
// asynchronously in useEffect with queueMicrotask to keep UI responsive.
|
||||
const [profiles, setProfiles] = React.useState<ProviderProfile[]>([])
|
||||
const [activeProfileId, setActiveProfileId] = React.useState<string | undefined>()
|
||||
const [githubProviderAvailable, setGithubProviderAvailable] = React.useState(
|
||||
() => isGithubProviderAvailable(initialGithubCredentialSource),
|
||||
)
|
||||
@@ -370,11 +423,88 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
const [ollamaSelection, setOllamaSelection] = React.useState<OllamaSelectionState>({
|
||||
state: 'idle',
|
||||
})
|
||||
const [atomicChatSelection, setAtomicChatSelection] =
|
||||
React.useState<AtomicChatSelectionState>({ state: 'idle' })
|
||||
// Deferred initialization: useState initializers run synchronously during
|
||||
// render, so getProviderProfiles() and getActiveProviderProfile() would block
|
||||
// the UI (sync file I/O). Defer to queueMicrotask after first render.
|
||||
// In test environment, skip defer to avoid timing issues with mocks.
|
||||
const [isInitializing, setIsInitializing] = React.useState(
|
||||
process.env.NODE_ENV !== 'test',
|
||||
)
|
||||
const [isActivating, setIsActivating] = React.useState(false)
|
||||
const isRefreshingRef = React.useRef(false)
|
||||
|
||||
React.useEffect(() => {
|
||||
// Skip deferred initialization in test environment (mocks are synchronous)
|
||||
if (process.env.NODE_ENV === 'test') {
|
||||
setProfiles(getProviderProfiles())
|
||||
setActiveProfileId(getActiveProviderProfile()?.id)
|
||||
setIsInitializing(false)
|
||||
return
|
||||
}
|
||||
|
||||
queueMicrotask(() => {
|
||||
const profilesData = getProviderProfiles()
|
||||
const activeId = getActiveProviderProfile()?.id
|
||||
setProfiles(profilesData)
|
||||
setActiveProfileId(activeId)
|
||||
setIsInitializing(false)
|
||||
})
|
||||
}, [])
|
||||
|
||||
const currentStep = FORM_STEPS[formStepIndex] ?? FORM_STEPS[0]
|
||||
const currentStepKey = currentStep.key
|
||||
const currentValue = draft[currentStepKey]
|
||||
|
||||
// Memoize menu options to prevent unnecessary re-renders when navigating
|
||||
// the select menu. Without this, each arrow key press creates a new options
|
||||
// array reference, causing Select to re-render and feel sluggish.
|
||||
const hasProfiles = profiles.length > 0
|
||||
const hasSelectableProviders = hasProfiles || githubProviderAvailable
|
||||
const menuOptions = React.useMemo(
|
||||
() => [
|
||||
{
|
||||
value: 'add',
|
||||
label: 'Add provider',
|
||||
description: 'Create a new provider profile',
|
||||
},
|
||||
{
|
||||
value: 'activate',
|
||||
label: 'Set active provider',
|
||||
description: 'Switch the active provider profile',
|
||||
disabled: !hasSelectableProviders,
|
||||
},
|
||||
{
|
||||
value: 'edit',
|
||||
label: 'Edit provider',
|
||||
description: 'Update URL, model, or key',
|
||||
disabled: !hasProfiles,
|
||||
},
|
||||
{
|
||||
value: 'delete',
|
||||
label: 'Delete provider',
|
||||
description: 'Remove a provider profile',
|
||||
disabled: !hasSelectableProviders,
|
||||
},
|
||||
...(hasStoredCodexOAuthCredentials
|
||||
? [
|
||||
{
|
||||
value: 'logout-codex-oauth',
|
||||
label: 'Log out Codex OAuth',
|
||||
description: 'Clear securely stored Codex OAuth credentials',
|
||||
},
|
||||
]
|
||||
: []),
|
||||
{
|
||||
value: 'done',
|
||||
label: 'Done',
|
||||
description: 'Return to chat',
|
||||
},
|
||||
],
|
||||
[hasSelectableProviders, hasProfiles, hasStoredCodexOAuthCredentials],
|
||||
)
|
||||
|
||||
const refreshGithubProviderState = React.useCallback((): void => {
|
||||
const envCredentialSource = getGithubCredentialSourceFromEnv()
|
||||
const githubActive = isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB)
|
||||
@@ -450,32 +580,21 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
setOllamaSelection({ state: 'loading' })
|
||||
|
||||
void (async () => {
|
||||
const available = await hasLocalOllama(draft.baseUrl)
|
||||
if (!available) {
|
||||
const readiness = await probeOllamaGenerationReadiness({
|
||||
baseUrl: draft.baseUrl,
|
||||
})
|
||||
if (readiness.state !== 'ready') {
|
||||
if (!cancelled) {
|
||||
setOllamaSelection({
|
||||
state: 'unavailable',
|
||||
message:
|
||||
'Could not reach Ollama. Start Ollama first, or enter the endpoint manually.',
|
||||
message: describeOllamaSelectionIssue(readiness, draft.baseUrl),
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
const models = await listOllamaModels(draft.baseUrl)
|
||||
if (models.length === 0) {
|
||||
if (!cancelled) {
|
||||
setOllamaSelection({
|
||||
state: 'unavailable',
|
||||
message:
|
||||
'Ollama is running, but no installed models were found. Pull a chat model such as qwen2.5-coder:7b or llama3.1:8b first, or enter details manually.',
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
const ranked = rankOllamaModels(models, 'balanced')
|
||||
const recommended = recommendOllamaModel(models, 'balanced')
|
||||
const ranked = rankOllamaModels(readiness.models, 'balanced')
|
||||
const recommended = recommendOllamaModel(readiness.models, 'balanced')
|
||||
if (!cancelled) {
|
||||
setOllamaSelection({
|
||||
state: 'ready',
|
||||
@@ -494,12 +613,61 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
}
|
||||
}, [draft.baseUrl, screen])
|
||||
|
||||
React.useEffect(() => {
|
||||
if (screen !== 'select-atomic-chat-model') {
|
||||
return
|
||||
}
|
||||
|
||||
let cancelled = false
|
||||
setAtomicChatSelection({ state: 'loading' })
|
||||
|
||||
void (async () => {
|
||||
const readiness = await probeAtomicChatReadiness({
|
||||
baseUrl: draft.baseUrl,
|
||||
})
|
||||
if (readiness.state !== 'ready') {
|
||||
if (!cancelled) {
|
||||
setAtomicChatSelection({
|
||||
state: 'unavailable',
|
||||
message: describeAtomicChatSelectionIssue(readiness, draft.baseUrl),
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (!cancelled) {
|
||||
setAtomicChatSelection({
|
||||
state: 'ready',
|
||||
defaultValue: readiness.models[0],
|
||||
options: readiness.models.map(model => ({
|
||||
label: model,
|
||||
value: model,
|
||||
})),
|
||||
})
|
||||
}
|
||||
})()
|
||||
|
||||
return () => {
|
||||
cancelled = true
|
||||
}
|
||||
}, [draft.baseUrl, screen])
|
||||
|
||||
function refreshProfiles(): void {
|
||||
// Defer sync I/O to next microtask to prevent UI freeze.
|
||||
// getProviderProfiles() and getActiveProviderProfile() read config files
|
||||
// synchronously, which can block the main thread on Windows (antivirus, disk cache).
|
||||
// queueMicrotask ensures the current render completes first.
|
||||
if (isRefreshingRef.current) return
|
||||
isRefreshingRef.current = true
|
||||
|
||||
queueMicrotask(() => {
|
||||
const nextProfiles = getProviderProfiles()
|
||||
setProfiles(nextProfiles)
|
||||
setActiveProfileId(getActiveProviderProfile()?.id)
|
||||
refreshGithubProviderState()
|
||||
refreshCodexOAuthCredentialState()
|
||||
isRefreshingRef.current = false
|
||||
})
|
||||
}
|
||||
|
||||
function clearStartupProviderOverrideFromUserSettings(): string | null {
|
||||
@@ -572,12 +740,24 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
async function activateSelectedProvider(profileId: string): Promise<void> {
|
||||
let providerLabel = 'provider'
|
||||
|
||||
// Set loading state before sync I/O to keep UI responsive
|
||||
setIsActivating(true)
|
||||
setStatusMessage('Activating provider...')
|
||||
|
||||
try {
|
||||
// Defer sync I/O to next microtask - UI renders loading state first.
|
||||
// setActiveProviderProfile(), activateGithubProvider(), and
|
||||
// clearStartupProviderOverrideFromUserSettings() all perform sync file writes
|
||||
// (saveGlobalConfig, saveProfileFile, updateSettingsForSource) which can
|
||||
// block the main thread on Windows (antivirus, disk cache, NTFS metadata).
|
||||
await new Promise<void>(resolve => queueMicrotask(resolve))
|
||||
|
||||
if (profileId === GITHUB_PROVIDER_ID) {
|
||||
providerLabel = GITHUB_PROVIDER_LABEL
|
||||
const githubError = activateGithubProvider()
|
||||
if (githubError) {
|
||||
setErrorMessage(`Could not activate GitHub provider: ${githubError}`)
|
||||
setIsActivating(false)
|
||||
returnToMenu()
|
||||
return
|
||||
}
|
||||
@@ -593,6 +773,7 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
mainLoopModel: GITHUB_PROVIDER_DEFAULT_MODEL,
|
||||
}))
|
||||
setStatusMessage(`Active provider: ${GITHUB_PROVIDER_LABEL}`)
|
||||
setIsActivating(false)
|
||||
returnToMenu()
|
||||
return
|
||||
}
|
||||
@@ -600,6 +781,7 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
const active = setActiveProviderProfile(profileId)
|
||||
if (!active) {
|
||||
setErrorMessage('Could not change active provider.')
|
||||
setIsActivating(false)
|
||||
returnToMenu()
|
||||
return
|
||||
}
|
||||
@@ -647,10 +829,12 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
? `Active provider: ${active.name}. Warning: could not clear startup provider override (${settingsOverrideError}).`
|
||||
: `Active provider: ${active.name}`,
|
||||
)
|
||||
setIsActivating(false)
|
||||
returnToMenu()
|
||||
} catch (error) {
|
||||
refreshProfiles()
|
||||
setStatusMessage(undefined)
|
||||
setIsActivating(false)
|
||||
const detail = error instanceof Error ? error.message : String(error)
|
||||
setErrorMessage(`Could not finish activating ${providerLabel}: ${detail}`)
|
||||
returnToMenu()
|
||||
@@ -774,6 +958,12 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
return
|
||||
}
|
||||
|
||||
if (preset === 'atomic-chat') {
|
||||
setAtomicChatSelection({ state: 'loading' })
|
||||
setScreen('select-atomic-chat-model')
|
||||
return
|
||||
}
|
||||
|
||||
setScreen('form')
|
||||
}
|
||||
|
||||
@@ -849,6 +1039,86 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
returnToMenu()
|
||||
}
|
||||
|
||||
function renderAtomicChatSelection(): React.ReactNode {
|
||||
if (
|
||||
atomicChatSelection.state === 'loading' ||
|
||||
atomicChatSelection.state === 'idle'
|
||||
) {
|
||||
return (
|
||||
<Box flexDirection="column" gap={1}>
|
||||
<Text color="remember" bold>
|
||||
Checking Atomic Chat
|
||||
</Text>
|
||||
<Text dimColor>Looking for loaded Atomic Chat models...</Text>
|
||||
</Box>
|
||||
)
|
||||
}
|
||||
|
||||
if (atomicChatSelection.state === 'unavailable') {
|
||||
return (
|
||||
<Box flexDirection="column" gap={1}>
|
||||
<Text color="remember" bold>
|
||||
Atomic Chat setup
|
||||
</Text>
|
||||
<Text dimColor>{atomicChatSelection.message}</Text>
|
||||
<Select
|
||||
options={[
|
||||
{
|
||||
value: 'manual',
|
||||
label: 'Enter manually',
|
||||
description: 'Fill in the base URL and model yourself',
|
||||
},
|
||||
{
|
||||
value: 'back',
|
||||
label: 'Back',
|
||||
description: 'Choose another provider preset',
|
||||
},
|
||||
]}
|
||||
onChange={(value: string) => {
|
||||
if (value === 'manual') {
|
||||
setFormStepIndex(0)
|
||||
setCursorOffset(draft.name.length)
|
||||
setScreen('form')
|
||||
return
|
||||
}
|
||||
setScreen('select-preset')
|
||||
}}
|
||||
onCancel={() => setScreen('select-preset')}
|
||||
visibleOptionCount={2}
|
||||
/>
|
||||
</Box>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<Box flexDirection="column" gap={1}>
|
||||
<Text color="remember" bold>
|
||||
Choose an Atomic Chat model
|
||||
</Text>
|
||||
<Text dimColor>
|
||||
Pick one of the models loaded in Atomic Chat to save into a local
|
||||
provider profile.
|
||||
</Text>
|
||||
<Select
|
||||
options={atomicChatSelection.options}
|
||||
defaultValue={atomicChatSelection.defaultValue}
|
||||
defaultFocusValue={atomicChatSelection.defaultValue}
|
||||
inlineDescriptions
|
||||
visibleOptionCount={Math.min(8, atomicChatSelection.options.length)}
|
||||
onChange={(value: string) => {
|
||||
const nextDraft = {
|
||||
...draft,
|
||||
model: value,
|
||||
}
|
||||
setDraft(nextDraft)
|
||||
persistDraft(nextDraft)
|
||||
}}
|
||||
onCancel={() => setScreen('select-preset')}
|
||||
/>
|
||||
</Box>
|
||||
)
|
||||
}
|
||||
|
||||
function renderOllamaSelection(): React.ReactNode {
|
||||
if (ollamaSelection.state === 'loading' || ollamaSelection.state === 'idle') {
|
||||
return (
|
||||
@@ -979,21 +1249,35 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
|
||||
function renderPresetSelection(): React.ReactNode {
|
||||
const canUseCodexOAuth = !isBareMode()
|
||||
// Providers sorted alphabetically by label. `Custom` is pinned to the end
|
||||
// because it's the catch-all / escape hatch — users scanning the list
|
||||
// should always find known providers first. `Skip for now` (first-run
|
||||
// only) comes last, after Custom.
|
||||
const options = [
|
||||
{
|
||||
value: 'dashscope-intl',
|
||||
label: 'Alibaba Coding Plan',
|
||||
description: 'Alibaba DashScope International endpoint',
|
||||
},
|
||||
{
|
||||
value: 'dashscope-cn',
|
||||
label: 'Alibaba Coding Plan (China)',
|
||||
description: 'Alibaba DashScope China endpoint',
|
||||
},
|
||||
{
|
||||
value: 'anthropic',
|
||||
label: 'Anthropic',
|
||||
description: 'Native Claude API (x-api-key auth)',
|
||||
},
|
||||
{
|
||||
value: 'ollama',
|
||||
label: 'Ollama',
|
||||
description: 'Local or remote Ollama endpoint',
|
||||
value: 'atomic-chat',
|
||||
label: 'Atomic Chat',
|
||||
description: 'Local Model Provider',
|
||||
},
|
||||
{
|
||||
value: 'openai',
|
||||
label: 'OpenAI',
|
||||
description: 'OpenAI API with API key',
|
||||
value: 'azure-openai',
|
||||
label: 'Azure OpenAI',
|
||||
description: 'Azure OpenAI endpoint (model=deployment name)',
|
||||
},
|
||||
...(canUseCodexOAuth
|
||||
? [
|
||||
@@ -1005,11 +1289,6 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
},
|
||||
]
|
||||
: []),
|
||||
{
|
||||
value: 'moonshotai',
|
||||
label: 'Moonshot AI',
|
||||
description: 'Kimi OpenAI-compatible endpoint',
|
||||
},
|
||||
{
|
||||
value: 'deepseek',
|
||||
label: 'DeepSeek',
|
||||
@@ -1020,50 +1299,30 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
label: 'Google Gemini',
|
||||
description: 'Gemini OpenAI-compatible endpoint',
|
||||
},
|
||||
{
|
||||
value: 'together',
|
||||
label: 'Together AI',
|
||||
description: 'Together chat/completions endpoint',
|
||||
},
|
||||
{
|
||||
value: 'groq',
|
||||
label: 'Groq',
|
||||
description: 'Groq OpenAI-compatible endpoint',
|
||||
},
|
||||
{
|
||||
value: 'mistral',
|
||||
label: 'Mistral',
|
||||
description: 'Mistral OpenAI-compatible endpoint',
|
||||
},
|
||||
{
|
||||
value: 'azure-openai',
|
||||
label: 'Azure OpenAI',
|
||||
description: 'Azure OpenAI endpoint (model=deployment name)',
|
||||
},
|
||||
{
|
||||
value: 'openrouter',
|
||||
label: 'OpenRouter',
|
||||
description: 'OpenRouter OpenAI-compatible endpoint',
|
||||
},
|
||||
{
|
||||
value: 'lmstudio',
|
||||
label: 'LM Studio',
|
||||
description: 'Local LM Studio endpoint',
|
||||
},
|
||||
{
|
||||
value: 'dashscope-cn',
|
||||
label: 'Alibaba Coding Plan (China)',
|
||||
description: 'Alibaba DashScope China endpoint',
|
||||
value: 'minimax',
|
||||
label: 'MiniMax',
|
||||
description: 'MiniMax API endpoint',
|
||||
},
|
||||
{
|
||||
value: 'dashscope-intl',
|
||||
label: 'Alibaba Coding Plan',
|
||||
description: 'Alibaba DashScope International endpoint',
|
||||
value: 'mistral',
|
||||
label: 'Mistral',
|
||||
description: 'Mistral OpenAI-compatible endpoint',
|
||||
},
|
||||
{
|
||||
value: 'custom',
|
||||
label: 'Custom',
|
||||
description: 'Any OpenAI-compatible provider',
|
||||
value: 'moonshotai',
|
||||
label: 'Moonshot AI',
|
||||
description: 'Kimi OpenAI-compatible endpoint',
|
||||
},
|
||||
{
|
||||
value: 'nvidia-nim',
|
||||
@@ -1071,9 +1330,29 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
description: 'NVIDIA NIM endpoint',
|
||||
},
|
||||
{
|
||||
value: 'minimax',
|
||||
label: 'MiniMax',
|
||||
description: 'MiniMax API endpoint',
|
||||
value: 'ollama',
|
||||
label: 'Ollama',
|
||||
description: 'Local or remote Ollama endpoint',
|
||||
},
|
||||
{
|
||||
value: 'openai',
|
||||
label: 'OpenAI',
|
||||
description: 'OpenAI API with API key',
|
||||
},
|
||||
{
|
||||
value: 'openrouter',
|
||||
label: 'OpenRouter',
|
||||
description: 'OpenRouter OpenAI-compatible endpoint',
|
||||
},
|
||||
{
|
||||
value: 'together',
|
||||
label: 'Together AI',
|
||||
description: 'Together chat/completions endpoint',
|
||||
},
|
||||
{
|
||||
value: 'custom',
|
||||
label: 'Custom',
|
||||
description: 'Any OpenAI-compatible provider',
|
||||
},
|
||||
...(mode === 'first-run'
|
||||
? [
|
||||
@@ -1165,49 +1444,10 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
}
|
||||
|
||||
function renderMenu(): React.ReactNode {
|
||||
// Use memoized menuOptions from component scope
|
||||
const hasProfiles = profiles.length > 0
|
||||
const hasSelectableProviders = hasProfiles || githubProviderAvailable
|
||||
|
||||
const options = [
|
||||
{
|
||||
value: 'add',
|
||||
label: 'Add provider',
|
||||
description: 'Create a new provider profile',
|
||||
},
|
||||
{
|
||||
value: 'activate',
|
||||
label: 'Set active provider',
|
||||
description: 'Switch the active provider profile',
|
||||
disabled: !hasSelectableProviders,
|
||||
},
|
||||
{
|
||||
value: 'edit',
|
||||
label: 'Edit provider',
|
||||
description: 'Update URL, model, or key',
|
||||
disabled: !hasProfiles,
|
||||
},
|
||||
{
|
||||
value: 'delete',
|
||||
label: 'Delete provider',
|
||||
description: 'Remove a provider profile',
|
||||
disabled: !hasSelectableProviders,
|
||||
},
|
||||
...(hasStoredCodexOAuthCredentials
|
||||
? [
|
||||
{
|
||||
value: 'logout-codex-oauth',
|
||||
label: 'Log out Codex OAuth',
|
||||
description: 'Clear securely stored Codex OAuth credentials',
|
||||
},
|
||||
]
|
||||
: []),
|
||||
{
|
||||
value: 'done',
|
||||
label: 'Done',
|
||||
description: 'Return to chat',
|
||||
},
|
||||
]
|
||||
|
||||
return (
|
||||
<Box flexDirection="column" gap={1}>
|
||||
<Text color="remember" bold>
|
||||
@@ -1244,7 +1484,7 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
)}
|
||||
</Box>
|
||||
<Select
|
||||
options={options}
|
||||
options={menuOptions}
|
||||
onChange={(value: string) => {
|
||||
setErrorMessage(undefined)
|
||||
switch (value) {
|
||||
@@ -1257,7 +1497,7 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
}
|
||||
break
|
||||
case 'edit':
|
||||
if (profiles.length > 0) {
|
||||
if (hasProfiles) {
|
||||
setScreen('select-edit')
|
||||
}
|
||||
break
|
||||
@@ -1314,7 +1554,7 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
}}
|
||||
onCancel={() => closeWithCancelled('Provider manager closed')}
|
||||
defaultFocusValue={menuFocusValue}
|
||||
visibleOptionCount={options.length}
|
||||
visibleOptionCount={menuOptions.length}
|
||||
/>
|
||||
</Box>
|
||||
)
|
||||
@@ -1393,6 +1633,9 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
case 'select-ollama-model':
|
||||
content = renderOllamaSelection()
|
||||
break
|
||||
case 'select-atomic-chat-model':
|
||||
content = renderAtomicChatSelection()
|
||||
break
|
||||
case 'codex-oauth':
|
||||
content = (
|
||||
<CodexOAuthSetup
|
||||
@@ -1550,5 +1793,21 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
|
||||
break
|
||||
}
|
||||
|
||||
return <Pane color="permission">{content}</Pane>
|
||||
return (
|
||||
<Pane color="permission">
|
||||
{isInitializing ? (
|
||||
<Box flexDirection="column" gap={1}>
|
||||
<Text color="remember" bold>Loading providers...</Text>
|
||||
<Text dimColor>Reading provider profiles from disk.</Text>
|
||||
</Box>
|
||||
) : isActivating ? (
|
||||
<Box flexDirection="column" gap={1}>
|
||||
<Text color="remember" bold>Activating provider...</Text>
|
||||
<Text dimColor>Please wait while the provider is being configured.</Text>
|
||||
</Box>
|
||||
) : (
|
||||
content
|
||||
)}
|
||||
</Pane>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -281,6 +281,24 @@ export function Config({
|
||||
enabled: autoCompactEnabled
|
||||
});
|
||||
}
|
||||
}, {
|
||||
id: 'toolHistoryCompressionEnabled',
|
||||
label: 'Tool history compression',
|
||||
value: globalConfig.toolHistoryCompressionEnabled,
|
||||
type: 'boolean' as const,
|
||||
onChange(toolHistoryCompressionEnabled: boolean) {
|
||||
saveGlobalConfig(current => ({
|
||||
...current,
|
||||
toolHistoryCompressionEnabled
|
||||
}));
|
||||
setGlobalConfig({
|
||||
...getGlobalConfig(),
|
||||
toolHistoryCompressionEnabled
|
||||
});
|
||||
logEvent('tengu_tool_history_compression_setting_changed', {
|
||||
enabled: toolHistoryCompressionEnabled
|
||||
});
|
||||
}
|
||||
}, {
|
||||
id: 'spinnerTipsEnabled',
|
||||
label: 'Show tips',
|
||||
@@ -1158,6 +1176,9 @@ export function Config({
|
||||
if (globalConfig.autoCompactEnabled !== initialConfig.current.autoCompactEnabled) {
|
||||
formattedChanges.push(`${globalConfig.autoCompactEnabled ? 'Enabled' : 'Disabled'} auto-compact`);
|
||||
}
|
||||
if (globalConfig.toolHistoryCompressionEnabled !== initialConfig.current.toolHistoryCompressionEnabled) {
|
||||
formattedChanges.push(`${globalConfig.toolHistoryCompressionEnabled ? 'Enabled' : 'Disabled'} tool history compression`);
|
||||
}
|
||||
if (globalConfig.respectGitignore !== initialConfig.current.respectGitignore) {
|
||||
formattedChanges.push(`${globalConfig.respectGitignore ? 'Enabled' : 'Disabled'} respect .gitignore in file picker`);
|
||||
}
|
||||
|
||||
@@ -123,6 +123,8 @@ function detectProvider(): { name: string; model: string; baseUrl: string; isLoc
|
||||
name = 'MiniMax'
|
||||
else if (resolvedRequest.transport === 'codex_responses' || baseUrl.includes('chatgpt.com/backend-api/codex'))
|
||||
name = 'Codex'
|
||||
else if (/moonshot/i.test(baseUrl) || /kimi/i.test(rawModel))
|
||||
name = 'Moonshot (Kimi)'
|
||||
else if (/deepseek/i.test(baseUrl) || /deepseek/i.test(rawModel))
|
||||
name = 'DeepSeek'
|
||||
else if (/openrouter/i.test(baseUrl))
|
||||
|
||||
@@ -53,17 +53,20 @@ describe('getProjectMemoryPathForSelector', () => {
|
||||
})
|
||||
|
||||
test('defaults to a new AGENTS.md in the current cwd when no project file is loaded', () => {
|
||||
expect(getProjectMemoryPathForSelector([], '/repo/packages/app')).toBe(
|
||||
'/repo/packages/app/AGENTS.md',
|
||||
const cwd = join('/repo', 'packages', 'app')
|
||||
expect(getProjectMemoryPathForSelector([], cwd)).toBe(
|
||||
join(cwd, 'AGENTS.md'),
|
||||
)
|
||||
})
|
||||
|
||||
test('ignores loaded project instruction files outside the current cwd ancestry', () => {
|
||||
const outsideRepoPath = join('/other-worktree', 'AGENTS.md')
|
||||
const cwd = join('/repo', 'packages', 'app')
|
||||
expect(
|
||||
getProjectMemoryPathForSelector(
|
||||
[projectFile('/other-worktree/AGENTS.md')],
|
||||
'/repo/packages/app',
|
||||
[projectFile(outsideRepoPath)],
|
||||
cwd,
|
||||
),
|
||||
).toBe('/repo/packages/app/AGENTS.md')
|
||||
).toBe(join(cwd, 'AGENTS.md'))
|
||||
})
|
||||
})
|
||||
|
||||
@@ -823,6 +823,11 @@ function getFunctionResultClearingSection(model: string): string | null {
|
||||
return null
|
||||
}
|
||||
const config = getCachedMCConfigForFRC()
|
||||
if (!config) {
|
||||
// External/stub builds return null from getCachedMCConfig — abort the
|
||||
// section rather than trying to read .supportedModels off null.
|
||||
return null
|
||||
}
|
||||
const isModelSupported = config.supportedModels?.some(pattern =>
|
||||
model.includes(pattern),
|
||||
)
|
||||
|
||||
@@ -19,7 +19,7 @@ async function _temp() {
|
||||
logForDebugging("Showing marketplace config save failure notification");
|
||||
notifs.push({
|
||||
key: "marketplace-config-save-failed",
|
||||
jsx: <Text color="error">Failed to save marketplace retry info · Check ~/.claude.json permissions</Text>,
|
||||
jsx: <Text color="error">Failed to save marketplace retry info · Check ~/.openclaude.json permissions</Text>,
|
||||
priority: "immediate",
|
||||
timeoutMs: 10000
|
||||
});
|
||||
|
||||
@@ -1,34 +1,23 @@
|
||||
/**
|
||||
* Swarm Permission Poller Hook
|
||||
* Swarm Permission Callback Registry
|
||||
*
|
||||
* This hook polls for permission responses from the team leader when running
|
||||
* as a worker agent in a swarm. When a response is received, it calls the
|
||||
* appropriate callback (onAllow/onReject) to continue execution.
|
||||
* Manages callback registrations for permission requests and responses
|
||||
* in agent swarms. Responses are delivered exclusively via the mailbox
|
||||
* system (useInboxPoller → processMailboxPermissionResponse).
|
||||
*
|
||||
* This hook should be used in conjunction with the worker-side integration
|
||||
* in useCanUseTool.ts, which creates pending requests that this hook monitors.
|
||||
* The legacy file-based polling (resolved/ directory) has been removed
|
||||
* because it created an unauthenticated attack surface — any local process
|
||||
* could forge approval files. The mailbox path is the sole active channel.
|
||||
*/
|
||||
|
||||
import { useCallback, useEffect, useRef } from 'react'
|
||||
import { useInterval } from 'usehooks-ts'
|
||||
import { logForDebugging } from '../utils/debug.js'
|
||||
import { errorMessage } from '../utils/errors.js'
|
||||
import {
|
||||
type PermissionUpdate,
|
||||
permissionUpdateSchema,
|
||||
} from '../utils/permissions/PermissionUpdateSchema.js'
|
||||
import {
|
||||
isSwarmWorker,
|
||||
type PermissionResponse,
|
||||
pollForResponse,
|
||||
removeWorkerResponse,
|
||||
} from '../utils/swarm/permissionSync.js'
|
||||
import { getAgentName, getTeamName } from '../utils/teammate.js'
|
||||
|
||||
const POLL_INTERVAL_MS = 500
|
||||
|
||||
/**
|
||||
* Validate permissionUpdates from external sources (mailbox IPC, disk polling).
|
||||
* Validate permissionUpdates from external sources (mailbox IPC).
|
||||
* Malformed entries from buggy/old teammate processes are filtered out rather
|
||||
* than propagated unchecked into callback.onAllow().
|
||||
*/
|
||||
@@ -225,106 +214,9 @@ export function processSandboxPermissionResponse(params: {
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a permission response by invoking the registered callback
|
||||
*/
|
||||
function processResponse(response: PermissionResponse): boolean {
|
||||
const callback = pendingCallbacks.get(response.requestId)
|
||||
|
||||
if (!callback) {
|
||||
logForDebugging(
|
||||
`[SwarmPermissionPoller] No callback registered for request ${response.requestId}`,
|
||||
)
|
||||
return false
|
||||
}
|
||||
|
||||
logForDebugging(
|
||||
`[SwarmPermissionPoller] Processing response for request ${response.requestId}: ${response.decision}`,
|
||||
)
|
||||
|
||||
// Remove from registry before invoking callback
|
||||
pendingCallbacks.delete(response.requestId)
|
||||
|
||||
if (response.decision === 'approved') {
|
||||
const permissionUpdates = parsePermissionUpdates(response.permissionUpdates)
|
||||
const updatedInput = response.updatedInput
|
||||
callback.onAllow(updatedInput, permissionUpdates)
|
||||
} else {
|
||||
callback.onReject(response.feedback)
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook that polls for permission responses when running as a swarm worker.
|
||||
*
|
||||
* This hook:
|
||||
* 1. Only activates when isSwarmWorker() returns true
|
||||
* 2. Polls every 500ms for responses
|
||||
* 3. When a response is found, invokes the registered callback
|
||||
* 4. Cleans up the response file after processing
|
||||
*/
|
||||
export function useSwarmPermissionPoller(): void {
|
||||
const isProcessingRef = useRef(false)
|
||||
|
||||
const poll = useCallback(async () => {
|
||||
// Don't poll if not a swarm worker
|
||||
if (!isSwarmWorker()) {
|
||||
return
|
||||
}
|
||||
|
||||
// Prevent concurrent polling
|
||||
if (isProcessingRef.current) {
|
||||
return
|
||||
}
|
||||
|
||||
// Don't poll if no callbacks are registered
|
||||
if (pendingCallbacks.size === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
isProcessingRef.current = true
|
||||
|
||||
try {
|
||||
const agentName = getAgentName()
|
||||
const teamName = getTeamName()
|
||||
|
||||
if (!agentName || !teamName) {
|
||||
return
|
||||
}
|
||||
|
||||
// Check each pending request for a response
|
||||
for (const [requestId, _callback] of pendingCallbacks) {
|
||||
const response = await pollForResponse(requestId, agentName, teamName)
|
||||
|
||||
if (response) {
|
||||
// Process the response
|
||||
const processed = processResponse(response)
|
||||
|
||||
if (processed) {
|
||||
// Clean up the response from the worker's inbox
|
||||
await removeWorkerResponse(requestId, agentName, teamName)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logForDebugging(
|
||||
`[SwarmPermissionPoller] Error during poll: ${errorMessage(error)}`,
|
||||
)
|
||||
} finally {
|
||||
isProcessingRef.current = false
|
||||
}
|
||||
}, [])
|
||||
|
||||
// Only poll if we're a swarm worker
|
||||
const shouldPoll = isSwarmWorker()
|
||||
useInterval(() => void poll(), shouldPoll ? POLL_INTERVAL_MS : null)
|
||||
|
||||
// Initial poll on mount
|
||||
useEffect(() => {
|
||||
if (isSwarmWorker()) {
|
||||
void poll()
|
||||
}
|
||||
}, [poll])
|
||||
}
|
||||
// Legacy file-based polling (useSwarmPermissionPoller, processResponse)
|
||||
// has been removed. Permission responses are now delivered exclusively
|
||||
// via the mailbox system:
|
||||
// Leader: sendPermissionResponseViaMailbox() → writeToMailbox()
|
||||
// Worker: useInboxPoller → processMailboxPermissionResponse()
|
||||
// See: fix(security) — remove unauthenticated file-based permission channel
|
||||
|
||||
@@ -11,6 +11,7 @@ const execFileNoThrowMock = mock(
|
||||
async () => ({ code: 0, stdout: '', stderr: '' }),
|
||||
)
|
||||
|
||||
function installOscMocks(): void {
|
||||
mock.module('../../utils/execFileNoThrow.js', () => ({
|
||||
execFileNoThrow: execFileNoThrowMock,
|
||||
execFileNoThrowWithCwd: execFileNoThrowMock,
|
||||
@@ -19,6 +20,7 @@ mock.module('../../utils/execFileNoThrow.js', () => ({
|
||||
mock.module('../../utils/tempfile.js', () => ({
|
||||
generateTempFilePath: generateTempFilePathMock,
|
||||
}))
|
||||
}
|
||||
|
||||
async function importFreshOscModule() {
|
||||
return import(`./osc.ts?ts=${Date.now()}-${Math.random()}`)
|
||||
@@ -45,6 +47,7 @@ async function waitForExecCall(
|
||||
|
||||
describe('Windows clipboard fallback', () => {
|
||||
beforeEach(() => {
|
||||
installOscMocks()
|
||||
execFileNoThrowMock.mockClear()
|
||||
generateTempFilePathMock.mockClear()
|
||||
process.env = { ...originalEnv }
|
||||
@@ -62,14 +65,12 @@ describe('Windows clipboard fallback', () => {
|
||||
const { setClipboard } = await importFreshOscModule()
|
||||
|
||||
await setClipboard('Привет мир')
|
||||
await flushClipboardCopy()
|
||||
const windowsCall = await waitForExecCall('powershell')
|
||||
|
||||
expect(execFileNoThrowMock.mock.calls.some(([cmd]) => cmd === 'clip')).toBe(
|
||||
false,
|
||||
)
|
||||
expect(
|
||||
execFileNoThrowMock.mock.calls.some(([cmd]) => cmd === 'powershell'),
|
||||
).toBe(true)
|
||||
expect(windowsCall).toBeDefined()
|
||||
})
|
||||
|
||||
test('passes Windows clipboard text through a UTF-8 temp file instead of stdin', async () => {
|
||||
@@ -97,6 +98,7 @@ describe('Windows clipboard fallback', () => {
|
||||
|
||||
describe('clipboard path behavior remains stable', () => {
|
||||
beforeEach(() => {
|
||||
installOscMocks()
|
||||
execFileNoThrowMock.mockClear()
|
||||
process.env = { ...originalEnv }
|
||||
delete process.env['SSH_CONNECTION']
|
||||
|
||||
@@ -12,7 +12,7 @@ import {
|
||||
* One-shot migration: clear skipAutoPermissionPrompt for users who accepted
|
||||
* the old 2-option AutoModeOptInDialog but don't have auto as their default.
|
||||
* Re-surfaces the dialog so they see the new "make it my default mode" option.
|
||||
* Guard lives in GlobalConfig (~/.claude.json), not settings.json, so it
|
||||
* Guard lives in GlobalConfig (~/.openclaude.json), not settings.json, so it
|
||||
* survives settings resets and doesn't re-arm itself.
|
||||
*
|
||||
* Only runs when tengu_auto_mode_config.enabled === 'enabled'. For 'opt-in'
|
||||
|
||||
@@ -3873,7 +3873,7 @@ export function REPL({
|
||||
// empty to non-empty, not on every length change -- otherwise a render loop
|
||||
// (concurrent onQuery thrashing, etc.) spams saveGlobalConfig, which hits
|
||||
// ELOCKED under concurrent sessions and falls back to unlocked writes.
|
||||
// That write storm is the primary trigger for ~/.claude.json corruption
|
||||
// That write storm is the primary trigger for ~/.openclaude.json corruption
|
||||
// (GH #3117).
|
||||
const hasCountedQueueUseRef = useRef(false);
|
||||
useEffect(() => {
|
||||
|
||||
@@ -334,7 +334,7 @@ async function processRemoteEvalPayload(
|
||||
// Empty object is truthy — without the length check, `{features: {}}`
|
||||
// (transient server bug, truncated response) would pass, clear the maps
|
||||
// below, return true, and syncRemoteEvalToDisk would wholesale-write `{}`
|
||||
// to disk: total flag blackout for every process sharing ~/.claude.json.
|
||||
// to disk: total flag blackout for every process sharing ~/.openclaude.json.
|
||||
if (!payload?.features || Object.keys(payload.features).length === 0) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ import { randomUUID } from 'crypto'
|
||||
import {
|
||||
getAPIProvider,
|
||||
isFirstPartyAnthropicBaseUrl,
|
||||
isGithubNativeAnthropicMode,
|
||||
} from 'src/utils/model/providers.js'
|
||||
import {
|
||||
getAttributionHeader,
|
||||
@@ -334,8 +335,13 @@ export function getPromptCachingEnabled(model: string): boolean {
|
||||
// Prompt caching is an Anthropic-specific feature. Third-party providers
|
||||
// do not understand cache_control blocks and strict backends (e.g. Azure
|
||||
// Foundry) reject or flag requests that contain them.
|
||||
//
|
||||
// Exception: when the GitHub provider is configured in native Anthropic API
|
||||
// mode (CLAUDE_CODE_GITHUB_ANTHROPIC_API=1), requests are sent in Anthropic
|
||||
// format, so cache_control blocks are supported.
|
||||
const provider = getAPIProvider()
|
||||
if (provider !== 'firstParty' && provider !== 'bedrock' && provider !== 'vertex') {
|
||||
const isNativeGithub = isGithubNativeAnthropicMode(model)
|
||||
if (provider !== 'firstParty' && provider !== 'bedrock' && provider !== 'vertex' && !isNativeGithub) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -1211,7 +1217,7 @@ async function* queryModel(
|
||||
cachedMCEnabled = featureEnabled && modelSupported
|
||||
const config = getCachedMCConfig()
|
||||
logForDebugging(
|
||||
`Cached MC gate: enabled=${featureEnabled} modelSupported=${modelSupported} model=${options.model} supportedModels=${jsonStringify(config.supportedModels)}`,
|
||||
`Cached MC gate: enabled=${featureEnabled} modelSupported=${modelSupported} model=${options.model} supportedModels=${jsonStringify(config?.supportedModels)}`,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ import { getSmallFastModel } from 'src/utils/model/model.js'
|
||||
import {
|
||||
getAPIProvider,
|
||||
isFirstPartyAnthropicBaseUrl,
|
||||
isGithubNativeAnthropicMode,
|
||||
} from 'src/utils/model/providers.js'
|
||||
import { getProxyFetchOptions } from 'src/utils/proxy.js'
|
||||
import {
|
||||
@@ -174,6 +175,25 @@ export async function getAnthropicClient({
|
||||
providerOverride,
|
||||
}) as unknown as Anthropic
|
||||
}
|
||||
// GitHub provider in native Anthropic API mode: send requests in Anthropic
|
||||
// format so cache_control blocks are honoured and prompt caching works.
|
||||
// Requires the GitHub endpoint (OPENAI_BASE_URL) to support Anthropic's
|
||||
// messages API — set CLAUDE_CODE_GITHUB_ANTHROPIC_API=1 to opt in.
|
||||
if (isGithubNativeAnthropicMode(model)) {
|
||||
const githubBaseUrl =
|
||||
process.env.OPENAI_BASE_URL?.replace(/\/$/, '') ??
|
||||
'https://api.githubcopilot.com'
|
||||
const githubToken =
|
||||
process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN ?? ''
|
||||
const nativeArgs: ConstructorParameters<typeof Anthropic>[0] = {
|
||||
...ARGS,
|
||||
baseURL: githubBaseUrl,
|
||||
authToken: githubToken,
|
||||
// No apiKey — we authenticate via Bearer token (authToken)
|
||||
apiKey: null,
|
||||
}
|
||||
return new Anthropic(nativeArgs)
|
||||
}
|
||||
if (
|
||||
isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
|
||||
isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB) ||
|
||||
|
||||
@@ -547,7 +547,7 @@ describe('Codex request translation', () => {
|
||||
])
|
||||
})
|
||||
|
||||
test('strips leaked reasoning preamble from completed Codex text responses', () => {
|
||||
test('strips <think> tag block from completed Codex text responses', () => {
|
||||
const message = convertCodexResponseToAnthropicMessage(
|
||||
{
|
||||
id: 'resp_1',
|
||||
@@ -560,7 +560,7 @@ describe('Codex request translation', () => {
|
||||
{
|
||||
type: 'output_text',
|
||||
text:
|
||||
'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?',
|
||||
'<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?',
|
||||
},
|
||||
],
|
||||
},
|
||||
@@ -578,6 +578,37 @@ describe('Codex request translation', () => {
|
||||
])
|
||||
})
|
||||
|
||||
test('strips unterminated <think> tag at block boundary in Codex completed response', () => {
|
||||
const message = convertCodexResponseToAnthropicMessage(
|
||||
{
|
||||
id: 'resp_1',
|
||||
model: 'gpt-5.4',
|
||||
output: [
|
||||
{
|
||||
type: 'message',
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{
|
||||
type: 'output_text',
|
||||
text:
|
||||
'Here is the answer.\n<think>wait, let me reconsider the user request',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
usage: { input_tokens: 12, output_tokens: 4 },
|
||||
},
|
||||
'gpt-5.4',
|
||||
)
|
||||
|
||||
expect(message.content).toEqual([
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Here is the answer.',
|
||||
},
|
||||
])
|
||||
})
|
||||
|
||||
test('translates Codex SSE text stream into Anthropic events', async () => {
|
||||
const responseText = [
|
||||
'event: response.output_item.added',
|
||||
@@ -609,7 +640,7 @@ describe('Codex request translation', () => {
|
||||
])
|
||||
})
|
||||
|
||||
test('strips leaked reasoning preamble from Codex SSE text stream', async () => {
|
||||
test('strips <think> tag block from Codex SSE text stream', async () => {
|
||||
const responseText = [
|
||||
'event: response.output_item.added',
|
||||
'data: {"type":"response.output_item.added","item":{"id":"msg_1","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":0}',
|
||||
@@ -618,13 +649,13 @@ describe('Codex request translation', () => {
|
||||
'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_1","output_index":0,"part":{"type":"output_text","text":""},"sequence_number":1}',
|
||||
'',
|
||||
'event: response.output_text.delta',
|
||||
'data: {"type":"response.output_text.delta","content_index":0,"delta":"The user just said \\"hey\\" - a simple greeting. I should respond briefly and friendly.\\n\\nHey! How can I help you today?","item_id":"msg_1","output_index":0,"sequence_number":2}',
|
||||
'data: {"type":"response.output_text.delta","content_index":0,"delta":"<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?","item_id":"msg_1","output_index":0,"sequence_number":2}',
|
||||
'',
|
||||
'event: response.output_item.done',
|
||||
'data: {"type":"response.output_item.done","item":{"id":"msg_1","type":"message","status":"completed","content":[{"type":"output_text","text":"The user just said \\"hey\\" - a simple greeting. I should respond briefly and friendly.\\n\\nHey! How can I help you today?"}],"role":"assistant"},"output_index":0,"sequence_number":3}',
|
||||
'data: {"type":"response.output_item.done","item":{"id":"msg_1","type":"message","status":"completed","content":[{"type":"output_text","text":"<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?"}],"role":"assistant"},"output_index":0,"sequence_number":3}',
|
||||
'',
|
||||
'event: response.completed',
|
||||
'data: {"type":"response.completed","response":{"id":"resp_1","status":"completed","model":"gpt-5.4","output":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"The user just said \\"hey\\" - a simple greeting. I should respond briefly and friendly.\\n\\nHey! How can I help you today?"}]}],"usage":{"input_tokens":2,"output_tokens":1}},"sequence_number":4}',
|
||||
'data: {"type":"response.completed","response":{"id":"resp_1","status":"completed","model":"gpt-5.4","output":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?"}]}],"usage":{"input_tokens":2,"output_tokens":1}},"sequence_number":4}',
|
||||
'',
|
||||
].join('\n')
|
||||
|
||||
@@ -646,6 +677,50 @@ describe('Codex request translation', () => {
|
||||
}
|
||||
}
|
||||
|
||||
expect(textDeltas).toEqual(['Hey! How can I help you today?'])
|
||||
expect(textDeltas.join('')).toBe('Hey! How can I help you today?')
|
||||
})
|
||||
|
||||
test('preserves prose without tags (no phrase-based false positive)', async () => {
|
||||
// Regression test: older phrase-based sanitizer would incorrectly strip text
|
||||
// starting with "I should" or "The user". The tag-based approach leaves it alone.
|
||||
const responseText = [
|
||||
'event: response.output_item.added',
|
||||
'data: {"type":"response.output_item.added","item":{"id":"msg_1","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":0}',
|
||||
'',
|
||||
'event: response.content_part.added',
|
||||
'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_1","output_index":0,"part":{"type":"output_text","text":""},"sequence_number":1}',
|
||||
'',
|
||||
'event: response.output_text.delta',
|
||||
'data: {"type":"response.output_text.delta","content_index":0,"delta":"I should note that the user role requires a briefly concise friendly response format.","item_id":"msg_1","output_index":0,"sequence_number":2}',
|
||||
'',
|
||||
'event: response.output_item.done',
|
||||
'data: {"type":"response.output_item.done","item":{"id":"msg_1","type":"message","status":"completed","content":[{"type":"output_text","text":"I should note that the user role requires a briefly concise friendly response format."}],"role":"assistant"},"output_index":0,"sequence_number":3}',
|
||||
'',
|
||||
'event: response.completed',
|
||||
'data: {"type":"response.completed","response":{"id":"resp_1","status":"completed","model":"gpt-5.4","output":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I should note that the user role requires a briefly concise friendly response format."}]}],"usage":{"input_tokens":2,"output_tokens":1}},"sequence_number":4}',
|
||||
'',
|
||||
].join('\n')
|
||||
|
||||
const stream = new ReadableStream({
|
||||
start(controller) {
|
||||
controller.enqueue(new TextEncoder().encode(responseText))
|
||||
controller.close()
|
||||
},
|
||||
})
|
||||
|
||||
const textDeltas: string[] = []
|
||||
for await (const event of codexStreamToAnthropic(
|
||||
new Response(stream),
|
||||
'gpt-5.4',
|
||||
)) {
|
||||
const delta = (event as { delta?: { type?: string; text?: string } }).delta
|
||||
if (delta?.type === 'text_delta' && typeof delta.text === 'string') {
|
||||
textDeltas.push(delta.text)
|
||||
}
|
||||
}
|
||||
|
||||
expect(textDeltas.join('')).toBe(
|
||||
'I should note that the user role requires a briefly concise friendly response format.',
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { APIError } from '@anthropic-ai/sdk'
|
||||
import { compressToolHistory } from './compressToolHistory.js'
|
||||
import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
|
||||
import type {
|
||||
ResolvedCodexCredentials,
|
||||
@@ -6,10 +7,9 @@ import type {
|
||||
} from './providerConfig.js'
|
||||
import { sanitizeSchemaForOpenAICompat } from './openaiSchemaSanitizer.js'
|
||||
import {
|
||||
looksLikeLeakedReasoningPrefix,
|
||||
shouldBufferPotentialReasoningPrefix,
|
||||
stripLeakedReasoningPreamble,
|
||||
} from './reasoningLeakSanitizer.js'
|
||||
createThinkTagFilter,
|
||||
stripThinkTags,
|
||||
} from './thinkTagSanitizer.js'
|
||||
|
||||
export interface AnthropicUsage {
|
||||
input_tokens: number
|
||||
@@ -485,13 +485,15 @@ export async function performCodexRequest(options: {
|
||||
defaultHeaders: Record<string, string>
|
||||
signal?: AbortSignal
|
||||
}): Promise<Response> {
|
||||
const input = convertAnthropicMessagesToResponsesInput(
|
||||
const compressedMessages = compressToolHistory(
|
||||
options.params.messages as Array<{
|
||||
role?: string
|
||||
message?: { role?: string; content?: unknown }
|
||||
content?: unknown
|
||||
}>,
|
||||
options.request.resolvedModel,
|
||||
)
|
||||
const input = convertAnthropicMessagesToResponsesInput(compressedMessages)
|
||||
const body: Record<string, unknown> = {
|
||||
model: options.request.resolvedModel,
|
||||
input: input.length > 0
|
||||
@@ -734,34 +736,29 @@ export async function* codexStreamToAnthropic(
|
||||
{ index: number; toolUseId: string }
|
||||
>()
|
||||
let activeTextBlockIndex: number | null = null
|
||||
let activeTextBuffer = ''
|
||||
let textBufferMode: 'none' | 'pending' | 'strip' = 'none'
|
||||
const thinkFilter = createThinkTagFilter()
|
||||
let nextContentBlockIndex = 0
|
||||
let sawToolUse = false
|
||||
let finalResponse: Record<string, any> | undefined
|
||||
|
||||
const closeActiveTextBlock = async function* () {
|
||||
if (activeTextBlockIndex === null) return
|
||||
if (textBufferMode !== 'none') {
|
||||
const sanitized = stripLeakedReasoningPreamble(activeTextBuffer)
|
||||
if (sanitized) {
|
||||
const tail = thinkFilter.flush()
|
||||
if (tail) {
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: activeTextBlockIndex,
|
||||
delta: {
|
||||
type: 'text_delta',
|
||||
text: sanitized,
|
||||
text: tail,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
yield {
|
||||
type: 'content_block_stop',
|
||||
index: activeTextBlockIndex,
|
||||
}
|
||||
activeTextBlockIndex = null
|
||||
activeTextBuffer = ''
|
||||
textBufferMode = 'none'
|
||||
}
|
||||
|
||||
const startTextBlockIfNeeded = async function* () {
|
||||
@@ -837,43 +834,17 @@ export async function* codexStreamToAnthropic(
|
||||
|
||||
if (event.event === 'response.output_text.delta') {
|
||||
yield* startTextBlockIfNeeded()
|
||||
activeTextBuffer += payload.delta ?? ''
|
||||
if (activeTextBlockIndex !== null) {
|
||||
if (
|
||||
textBufferMode === 'strip' ||
|
||||
looksLikeLeakedReasoningPrefix(activeTextBuffer)
|
||||
) {
|
||||
textBufferMode = 'strip'
|
||||
continue
|
||||
}
|
||||
|
||||
if (textBufferMode === 'pending') {
|
||||
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
|
||||
continue
|
||||
}
|
||||
const visible = thinkFilter.feed(payload.delta ?? '')
|
||||
if (visible) {
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: activeTextBlockIndex,
|
||||
delta: {
|
||||
type: 'text_delta',
|
||||
text: activeTextBuffer,
|
||||
text: visible,
|
||||
},
|
||||
}
|
||||
textBufferMode = 'none'
|
||||
continue
|
||||
}
|
||||
|
||||
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
|
||||
textBufferMode = 'pending'
|
||||
continue
|
||||
}
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: activeTextBlockIndex,
|
||||
delta: {
|
||||
type: 'text_delta',
|
||||
text: payload.delta ?? '',
|
||||
},
|
||||
}
|
||||
}
|
||||
continue
|
||||
@@ -969,7 +940,7 @@ export function convertCodexResponseToAnthropicMessage(
|
||||
if (part?.type === 'output_text') {
|
||||
content.push({
|
||||
type: 'text',
|
||||
text: stripLeakedReasoningPreamble(part.text ?? ''),
|
||||
text: stripThinkTags(part.text ?? ''),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
572
src/services/api/compressToolHistory.test.ts
Normal file
572
src/services/api/compressToolHistory.test.ts
Normal file
@@ -0,0 +1,572 @@
|
||||
import { afterEach, beforeEach, expect, mock, test } from 'bun:test'
|
||||
import { compressToolHistory, getTiers } from './compressToolHistory.js'
|
||||
|
||||
// Mock the two dependencies so tests are deterministic and don't read disk config.
|
||||
const mockState = {
|
||||
enabled: true,
|
||||
effectiveWindow: 100_000,
|
||||
}
|
||||
|
||||
mock.module('../../utils/config.js', () => ({
|
||||
getGlobalConfig: () => ({
|
||||
toolHistoryCompressionEnabled: mockState.enabled,
|
||||
}),
|
||||
}))
|
||||
|
||||
mock.module('../compact/autoCompact.js', () => ({
|
||||
getEffectiveContextWindowSize: () => mockState.effectiveWindow,
|
||||
}))
|
||||
|
||||
beforeEach(() => {
|
||||
mockState.enabled = true
|
||||
mockState.effectiveWindow = 100_000
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
mockState.enabled = true
|
||||
mockState.effectiveWindow = 100_000
|
||||
})
|
||||
|
||||
type Block = Record<string, unknown>
|
||||
type Msg = { role: string; content: Block[] | string }
|
||||
|
||||
function bigText(n: number): string {
|
||||
return 'x'.repeat(n)
|
||||
}
|
||||
|
||||
function buildToolExchange(id: number, resultLength: number): Msg[] {
|
||||
return [
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_use',
|
||||
id: `toolu_${id}`,
|
||||
name: 'Read',
|
||||
input: { file_path: `/path/to/file${id}.ts` },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: `toolu_${id}`,
|
||||
content: bigText(resultLength),
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
function buildConversation(numToolExchanges: number, resultLength = 5_000): Msg[] {
|
||||
const out: Msg[] = [{ role: 'user', content: 'Initial request' }]
|
||||
for (let i = 0; i < numToolExchanges; i++) {
|
||||
out.push(...buildToolExchange(i, resultLength))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
function getResultMessages(messages: Msg[]): Msg[] {
|
||||
return messages.filter(
|
||||
m => Array.isArray(m.content) && m.content.some((b: any) => b.type === 'tool_result'),
|
||||
)
|
||||
}
|
||||
|
||||
function getResultBlock(msg: Msg): Block {
|
||||
return (msg.content as Block[]).find((b: any) => b.type === 'tool_result') as Block
|
||||
}
|
||||
|
||||
function getResultText(msg: Msg): string {
|
||||
const block = getResultBlock(msg)
|
||||
const c = block.content
|
||||
if (typeof c === 'string') return c
|
||||
if (Array.isArray(c)) {
|
||||
return c
|
||||
.filter((b: any) => b.type === 'text')
|
||||
.map((b: any) => b.text)
|
||||
.join('\n')
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
||||
// ---------- getTiers ----------
|
||||
|
||||
test('getTiers: < 16k window → recent=2, mid=3', () => {
|
||||
expect(getTiers(8_000)).toEqual({ recent: 2, mid: 3 })
|
||||
})
|
||||
|
||||
test('getTiers: 16k–32k → recent=3, mid=5', () => {
|
||||
expect(getTiers(20_000)).toEqual({ recent: 3, mid: 5 })
|
||||
})
|
||||
|
||||
test('getTiers: 32k–64k → recent=4, mid=8', () => {
|
||||
expect(getTiers(48_000)).toEqual({ recent: 4, mid: 8 })
|
||||
})
|
||||
|
||||
test('getTiers: 64k–128k (Copilot gpt-4o) → recent=5, mid=10', () => {
|
||||
expect(getTiers(100_000)).toEqual({ recent: 5, mid: 10 })
|
||||
})
|
||||
|
||||
test('getTiers: 128k–256k (Copilot Claude) → recent=8, mid=15', () => {
|
||||
expect(getTiers(200_000)).toEqual({ recent: 8, mid: 15 })
|
||||
})
|
||||
|
||||
test('getTiers: 256k–500k → recent=12, mid=25', () => {
|
||||
expect(getTiers(400_000)).toEqual({ recent: 12, mid: 25 })
|
||||
})
|
||||
|
||||
test('getTiers: ≥ 500k (gpt-4.1 1M) → recent=25, mid=50', () => {
|
||||
expect(getTiers(1_000_000)).toEqual({ recent: 25, mid: 50 })
|
||||
})
|
||||
|
||||
// ---------- master switch ----------
|
||||
|
||||
test('pass-through when toolHistoryCompressionEnabled is false', () => {
|
||||
mockState.enabled = false
|
||||
const messages = buildConversation(20)
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
expect(result).toBe(messages) // same reference (no transformation)
|
||||
})
|
||||
|
||||
test('pass-through when total tool_results <= recent tier', () => {
|
||||
// 100k effective → recent=5; only 4 exchanges → no compression
|
||||
const messages = buildConversation(4)
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
expect(result).toBe(messages)
|
||||
})
|
||||
|
||||
// ---------- per-tier behavior ----------
|
||||
|
||||
test('recent tier: tool_result content untouched', () => {
|
||||
// 100k effective → recent=5, mid=10. With 6 exchanges, only the oldest is touched.
|
||||
const messages = buildConversation(6, 5_000)
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
|
||||
// Last 5 should be untouched (full 5000 chars)
|
||||
for (let i = resultMsgs.length - 5; i < resultMsgs.length; i++) {
|
||||
expect(getResultText(resultMsgs[i]).length).toBe(5_000)
|
||||
}
|
||||
})
|
||||
|
||||
test('mid tier: long content truncated to MID_MAX_CHARS with marker', () => {
|
||||
// 100k → recent=5, mid=10. 10 exchanges: 5 recent + 5 mid (none old).
|
||||
const messages = buildConversation(10, 5_000)
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
|
||||
// First 5 are mid tier — should be truncated to ~2000 chars + marker
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const text = getResultText(resultMsgs[i])
|
||||
expect(text).toContain('[…truncated')
|
||||
expect(text).toContain('chars from tool history]')
|
||||
// Should be roughly 2000 chars + marker (under 2200)
|
||||
expect(text.length).toBeLessThan(2_200)
|
||||
expect(text.length).toBeGreaterThan(2_000)
|
||||
}
|
||||
})
|
||||
|
||||
test('mid tier: short content (< MID_MAX_CHARS) untouched', () => {
|
||||
const messages = buildConversation(10, 500) // 500 < MID_MAX_CHARS
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
|
||||
for (let i = 0; i < 5; i++) {
|
||||
expect(getResultText(resultMsgs[i])).toBe(bigText(500))
|
||||
}
|
||||
})
|
||||
|
||||
test('old tier: content replaced with stub [name args={...} → N chars omitted]', () => {
|
||||
// 100k → recent=5, mid=10, old=rest. 20 exchanges → 5 old + 10 mid + 5 recent.
|
||||
const messages = buildConversation(20, 5_000)
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
|
||||
// First 5 are old tier — should be stubs
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const text = getResultText(resultMsgs[i])
|
||||
expect(text).toMatch(/^\[Read args=\{.*\} → 5000 chars omitted\]$/)
|
||||
}
|
||||
})
|
||||
|
||||
test('old tier: stub args truncated to 200 chars', () => {
|
||||
const longArg = bigText(500)
|
||||
const messages: Msg[] = [
|
||||
{ role: 'user', content: 'start' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_use',
|
||||
id: 'toolu_x',
|
||||
name: 'Bash',
|
||||
input: { command: longArg },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'tool_result', tool_use_id: 'toolu_x', content: 'output' },
|
||||
],
|
||||
},
|
||||
// Pad with enough recent exchanges to push the above into old tier
|
||||
...buildConversation(20, 100).slice(1),
|
||||
]
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
const text = getResultText(resultMsgs[0])
|
||||
|
||||
// Stub format: [Bash args=<json≤200chars> → N chars omitted]
|
||||
// The args portion (between args= and →) must be ≤ 200 chars.
|
||||
const argsMatch = text.match(/args=(.*?) →/)
|
||||
expect(argsMatch).not.toBeNull()
|
||||
expect(argsMatch![1].length).toBeLessThanOrEqual(200)
|
||||
})
|
||||
|
||||
test('old tier: orphan tool_result (no matching tool_use) falls back to "tool"', () => {
|
||||
const messages: Msg[] = [
|
||||
{ role: 'user', content: 'start' },
|
||||
// Orphan: tool_result without matching tool_use in history
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'tool_result', tool_use_id: 'orphan_id', content: 'data' },
|
||||
],
|
||||
},
|
||||
...buildConversation(20, 100).slice(1),
|
||||
]
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
const text = getResultText(resultMsgs[0])
|
||||
|
||||
expect(text).toMatch(/^\[tool args=\{\} → 4 chars omitted\]$/)
|
||||
})
|
||||
|
||||
// ---------- structural preservation ----------
|
||||
|
||||
test('tool_use blocks always preserved', () => {
|
||||
const messages = buildConversation(20, 5_000)
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
|
||||
const useCount = (msgs: Msg[]) =>
|
||||
msgs.reduce((sum, m) => {
|
||||
if (!Array.isArray(m.content)) return sum
|
||||
return sum + m.content.filter((b: any) => b.type === 'tool_use').length
|
||||
}, 0)
|
||||
|
||||
expect(useCount(result as Msg[])).toBe(useCount(messages))
|
||||
})
|
||||
|
||||
test('text blocks always preserved', () => {
|
||||
const messages: Msg[] = [
|
||||
{ role: 'user', content: 'first' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'text', text: 'reasoning before tool' },
|
||||
{ type: 'tool_use', id: 'toolu_1', name: 'Read', input: {} },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'tool_result', tool_use_id: 'toolu_1', content: bigText(5000) }],
|
||||
},
|
||||
...buildConversation(20, 5_000).slice(1),
|
||||
]
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const assistantMsg = (result as Msg[])[1]
|
||||
const textBlock = (assistantMsg.content as Block[]).find((b: any) => b.type === 'text')
|
||||
|
||||
expect(textBlock).toEqual({ type: 'text', text: 'reasoning before tool' })
|
||||
})
|
||||
|
||||
test('thinking blocks always preserved', () => {
|
||||
const messages: Msg[] = [
|
||||
{ role: 'user', content: 'first' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'internal reasoning', signature: 'sig' },
|
||||
{ type: 'tool_use', id: 'toolu_1', name: 'Read', input: {} },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'tool_result', tool_use_id: 'toolu_1', content: bigText(5000) }],
|
||||
},
|
||||
...buildConversation(20, 5_000).slice(1),
|
||||
]
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const assistantMsg = (result as Msg[])[1]
|
||||
const thinking = (assistantMsg.content as Block[]).find((b: any) => b.type === 'thinking')
|
||||
|
||||
expect(thinking).toEqual({
|
||||
type: 'thinking',
|
||||
thinking: 'internal reasoning',
|
||||
signature: 'sig',
|
||||
})
|
||||
})
|
||||
|
||||
test('non-array content (string) handled gracefully', () => {
|
||||
const messages: Msg[] = [
|
||||
{ role: 'user', content: 'plain string content' },
|
||||
...buildConversation(20, 100).slice(1),
|
||||
]
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
expect((result as Msg[])[0].content).toBe('plain string content')
|
||||
})
|
||||
|
||||
test('empty content array handled gracefully', () => {
|
||||
const messages: Msg[] = [
|
||||
{ role: 'user', content: [] },
|
||||
...buildConversation(20, 100).slice(1),
|
||||
]
|
||||
expect(() => compressToolHistory(messages, 'gpt-4o')).not.toThrow()
|
||||
})
|
||||
|
||||
// ---------- message shape compatibility ----------
|
||||
|
||||
test('wrapped shape ({ message: { role, content } }) handled', () => {
|
||||
type WrappedMsg = { message: { role: string; content: Block[] | string } }
|
||||
const wrap = (m: Msg): WrappedMsg => ({ message: { role: m.role, content: m.content } })
|
||||
const messages = buildConversation(20, 5_000).map(wrap)
|
||||
const result = compressToolHistory(messages as any, 'gpt-4o')
|
||||
|
||||
// First wrapped tool-result message should have stub content (old tier)
|
||||
const firstResultMsg = (result as WrappedMsg[]).find(
|
||||
m =>
|
||||
Array.isArray(m.message.content) &&
|
||||
m.message.content.some((b: any) => b.type === 'tool_result'),
|
||||
)
|
||||
const block = (firstResultMsg!.message.content as Block[]).find(
|
||||
(b: any) => b.type === 'tool_result',
|
||||
) as Block
|
||||
const text = ((block.content as Block[])[0] as any).text
|
||||
expect(text).toMatch(/^\[Read args=.*→ 5000 chars omitted\]$/)
|
||||
})
|
||||
|
||||
test('flat shape ({ role, content }) handled', () => {
|
||||
const messages = buildConversation(20, 5_000)
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
|
||||
expect(getResultText(resultMsgs[0])).toMatch(/^\[Read args=.*→ 5000 chars omitted\]$/)
|
||||
})
|
||||
|
||||
// ---------- tier boundary correctness ----------
|
||||
|
||||
test('tier boundaries: 6 exchanges → 1 mid + 5 recent (recent=5)', () => {
|
||||
const messages = buildConversation(6, 5_000)
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
|
||||
// Oldest: mid (truncated)
|
||||
expect(getResultText(resultMsgs[0])).toContain('[…truncated')
|
||||
// Last 5: untouched
|
||||
for (let i = 1; i < 6; i++) {
|
||||
expect(getResultText(resultMsgs[i]).length).toBe(5_000)
|
||||
}
|
||||
})
|
||||
|
||||
test('tier boundaries: 16 exchanges → 1 old + 10 mid + 5 recent', () => {
|
||||
const messages = buildConversation(16, 5_000)
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
|
||||
// Oldest 1: stub (old tier)
|
||||
expect(getResultText(resultMsgs[0])).toMatch(/^\[Read .*chars omitted\]$/)
|
||||
// Next 10: mid (truncated)
|
||||
for (let i = 1; i < 11; i++) {
|
||||
expect(getResultText(resultMsgs[i])).toContain('[…truncated')
|
||||
}
|
||||
// Last 5: untouched
|
||||
for (let i = 11; i < 16; i++) {
|
||||
expect(getResultText(resultMsgs[i]).length).toBe(5_000)
|
||||
}
|
||||
})
|
||||
|
||||
test('large window (1M) with 30 exchanges: all untouched (recent=25 ≥ 30 - 5)', () => {
|
||||
// ≥500k → recent=25, mid=50. 30 exchanges → 5 mid + 25 recent. None old.
|
||||
mockState.effectiveWindow = 1_000_000
|
||||
const messages = buildConversation(30, 5_000)
|
||||
const result = compressToolHistory(messages, 'gpt-4.1')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
|
||||
// Last 25: untouched
|
||||
for (let i = 5; i < 30; i++) {
|
||||
expect(getResultText(resultMsgs[i]).length).toBe(5_000)
|
||||
}
|
||||
})
|
||||
|
||||
// ---------- attribute preservation ----------
|
||||
|
||||
test('is_error flag preserved in mid tier', () => {
|
||||
const messages: Msg[] = [
|
||||
{ role: 'user', content: 'start' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'tool_use', id: 'toolu_err', name: 'Bash', input: {} }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: 'toolu_err',
|
||||
is_error: true,
|
||||
content: bigText(5_000),
|
||||
},
|
||||
],
|
||||
},
|
||||
// Pad with enough recent exchanges to push the above into MID tier
|
||||
...buildConversation(10, 100).slice(1),
|
||||
]
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
const block = getResultBlock(resultMsgs[0]) as { is_error?: boolean; content: unknown }
|
||||
|
||||
expect(block.is_error).toBe(true)
|
||||
expect(getResultText(resultMsgs[0])).toContain('[…truncated')
|
||||
})
|
||||
|
||||
test('is_error flag preserved in old tier (stub)', () => {
|
||||
const messages: Msg[] = [
|
||||
{ role: 'user', content: 'start' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'tool_use', id: 'toolu_err', name: 'Bash', input: {} }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: 'toolu_err',
|
||||
is_error: true,
|
||||
content: bigText(5_000),
|
||||
},
|
||||
],
|
||||
},
|
||||
...buildConversation(20, 100).slice(1),
|
||||
]
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
const block = getResultBlock(resultMsgs[0]) as { is_error?: boolean; content: unknown }
|
||||
|
||||
expect(block.is_error).toBe(true)
|
||||
expect(getResultText(resultMsgs[0])).toMatch(/^\[Bash .*chars omitted\]$/)
|
||||
})
|
||||
|
||||
// ---------- COMPACTABLE_TOOLS filter ----------
|
||||
|
||||
test('non-compactable tool (e.g. Task/Agent) is NEVER compressed', () => {
|
||||
// Build conversation where the OLDEST exchange uses a non-compactable tool name
|
||||
const messages: Msg[] = [
|
||||
{ role: 'user', content: 'start' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'tool_use', id: 'task_1', name: 'Task', input: { goal: 'plan' } },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'tool_result', tool_use_id: 'task_1', content: bigText(5_000) },
|
||||
],
|
||||
},
|
||||
// Pad with 20 compactable exchanges to push Task into old tier
|
||||
...buildConversation(20, 100).slice(1),
|
||||
]
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
|
||||
// First tool_result is for Task (non-compactable) → must remain full
|
||||
expect(getResultText(resultMsgs[0]).length).toBe(5_000)
|
||||
expect(getResultText(resultMsgs[0])).not.toContain('chars omitted')
|
||||
expect(getResultText(resultMsgs[0])).not.toContain('[…truncated')
|
||||
})
|
||||
|
||||
test('mcp__ prefixed tools ARE compactable (matches microCompact behavior)', () => {
|
||||
const messages: Msg[] = [
|
||||
{ role: 'user', content: 'start' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'tool_use', id: 'mcp_1', name: 'mcp__github__get_issue', input: {} },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'tool_result', tool_use_id: 'mcp_1', content: bigText(5_000) },
|
||||
],
|
||||
},
|
||||
...buildConversation(20, 100).slice(1),
|
||||
]
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
|
||||
// MCP tool result is compressed (gets stub since it's in old tier)
|
||||
expect(getResultText(resultMsgs[0])).toMatch(/^\[mcp__github__get_issue .*chars omitted\]$/)
|
||||
})
|
||||
|
||||
// ---------- skip already-cleared blocks ----------
|
||||
|
||||
test('blocks already cleared by microCompact are NOT re-compressed', () => {
|
||||
const messages: Msg[] = [
|
||||
{ role: 'user', content: 'start' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'tool_use', id: 'cleared_1', name: 'Read', input: {} }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: 'cleared_1',
|
||||
content: '[Old tool result content cleared]', // microCompact's marker
|
||||
},
|
||||
],
|
||||
},
|
||||
...buildConversation(20, 100).slice(1),
|
||||
]
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
|
||||
// Already-cleared marker survives untouched (no double processing)
|
||||
expect(getResultText(resultMsgs[0])).toBe('[Old tool result content cleared]')
|
||||
})
|
||||
|
||||
test('extra block attributes (e.g. cache_control) preserved across rewrites', () => {
|
||||
const cacheControl = { type: 'ephemeral' }
|
||||
const messages: Msg[] = [
|
||||
{ role: 'user', content: 'start' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'tool_use', id: 'toolu_cc', name: 'Read', input: {} }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: 'toolu_cc',
|
||||
cache_control: cacheControl,
|
||||
content: bigText(5_000),
|
||||
},
|
||||
],
|
||||
},
|
||||
...buildConversation(20, 100).slice(1),
|
||||
]
|
||||
const result = compressToolHistory(messages, 'gpt-4o')
|
||||
const resultMsgs = getResultMessages(result)
|
||||
const block = getResultBlock(resultMsgs[0]) as { cache_control?: unknown }
|
||||
|
||||
// The custom attribute survived the stub rewrite via ...block spread
|
||||
expect(block.cache_control).toEqual(cacheControl)
|
||||
})
|
||||
255
src/services/api/compressToolHistory.ts
Normal file
255
src/services/api/compressToolHistory.ts
Normal file
@@ -0,0 +1,255 @@
|
||||
/**
|
||||
* Compresses old tool_result content for stateless OpenAI-compatible providers
|
||||
* (Copilot, Mistral, Ollama). Preserves all conversation structure — tool_use,
|
||||
* tool_result pairing, text, thinking, and is_error all survive intact. Only
|
||||
* the BULK text of older tool_results is shrunk to delay context saturation.
|
||||
*
|
||||
* Tier sizes scale with the model's effective context window via
|
||||
* getEffectiveContextWindowSize() — same calculation used by auto-compact, so
|
||||
* the two systems stay aligned.
|
||||
*
|
||||
* Complements (does not replace) microCompact.ts:
|
||||
* - microCompact: time/cache-based, runs from query.ts, binary clear/keep,
|
||||
* limited to Claude (cache editing) or idle gaps (time-based).
|
||||
* - compressToolHistory: size-based, runs at the shim layer, tiered
|
||||
* compression, covers the gap for active sessions on non-Claude providers.
|
||||
*
|
||||
* Reuses isCompactableTool from microCompact to avoid touching tools the
|
||||
* project already classifies as unsafe to compress (e.g. Task, Agent).
|
||||
* Skips blocks already cleared by microCompact (TOOL_RESULT_CLEARED_MESSAGE).
|
||||
*
|
||||
* Anthropic native bypasses both shims, so it is unaffected by this module.
|
||||
*/
|
||||
import { getEffectiveContextWindowSize } from '../compact/autoCompact.js'
|
||||
import { isCompactableTool } from '../compact/microCompact.js'
|
||||
import { TOOL_RESULT_CLEARED_MESSAGE } from '../../utils/toolResultStorage.js'
|
||||
import { getGlobalConfig } from '../../utils/config.js'
|
||||
|
||||
// Mid-tier truncation budget. 2k chars ≈ 500 tokens, enough to preserve the
|
||||
// shape of most tool outputs (file headers, command stderr, top grep hits)
|
||||
// without ballooning context. Bump too high and the tier loses its purpose.
|
||||
const MID_MAX_CHARS = 2_000
|
||||
|
||||
// Stub args budget. JSON.stringify of a typical tool input fits in 200 chars
|
||||
// (file paths, short commands, small queries). Long inputs are rare and clamping
|
||||
// here keeps the stub size bounded even when callers pass oversized arguments.
|
||||
const STUB_ARGS_MAX_CHARS = 200
|
||||
|
||||
type AnyMessage = {
|
||||
role?: string
|
||||
message?: { role?: string; content?: unknown }
|
||||
content?: unknown
|
||||
}
|
||||
|
||||
type ToolResultBlock = {
|
||||
type: 'tool_result'
|
||||
tool_use_id?: string
|
||||
is_error?: boolean
|
||||
content?: unknown
|
||||
}
|
||||
|
||||
type ToolUseBlock = {
|
||||
type: 'tool_use'
|
||||
id?: string
|
||||
name?: string
|
||||
input?: unknown
|
||||
}
|
||||
|
||||
type Tiers = { recent: number; mid: number }
|
||||
|
||||
// Tier sizes scale with effective window. Targets roughly:
|
||||
// - recent tier stays under ~25% of available window (full fidelity kept)
|
||||
// - recent + mid tier stays under ~50% of available window (bounded bulk)
|
||||
// - everything older collapses to ~15-token stubs
|
||||
// Values assume ~5KB avg tool_result, which matches the Copilot default case
|
||||
// (parallel_tool_calls=true means multiple Read/Bash outputs per turn). For
|
||||
// ≥ 500k models the tiers are so generous that compression is effectively
|
||||
// inert for any realistic session — see compressToolHistory.test.ts.
|
||||
export function getTiers(effectiveWindow: number): Tiers {
|
||||
if (effectiveWindow < 16_000) return { recent: 2, mid: 3 }
|
||||
if (effectiveWindow < 32_000) return { recent: 3, mid: 5 }
|
||||
if (effectiveWindow < 64_000) return { recent: 4, mid: 8 }
|
||||
if (effectiveWindow < 128_000) return { recent: 5, mid: 10 }
|
||||
if (effectiveWindow < 256_000) return { recent: 8, mid: 15 }
|
||||
if (effectiveWindow < 500_000) return { recent: 12, mid: 25 }
|
||||
return { recent: 25, mid: 50 }
|
||||
}
|
||||
|
||||
function extractText(content: unknown): string {
|
||||
if (typeof content === 'string') return content
|
||||
if (Array.isArray(content)) {
|
||||
return content
|
||||
.filter(
|
||||
(b: { type?: string; text?: string }) =>
|
||||
b?.type === 'text' && typeof b.text === 'string',
|
||||
)
|
||||
.map((b: { text?: string }) => b.text ?? '')
|
||||
.join('\n')
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
||||
// Old-tier compression strategy. Replaces content entirely with a one-line
|
||||
// metadata marker ~10× more token-efficient than a 500-char truncation AND
|
||||
// unambiguous — partial truncations can look authoritative to the model. The
|
||||
// stub format encodes tool name + args so the model can re-invoke the same
|
||||
// tool if it needs the omitted output back.
|
||||
function buildStub(
|
||||
block: ToolResultBlock,
|
||||
toolUsesById: Map<string, ToolUseBlock>,
|
||||
): ToolResultBlock {
|
||||
const original = extractText(block.content)
|
||||
const toolUse = toolUsesById.get(block.tool_use_id ?? '')
|
||||
const name = toolUse?.name ?? 'tool'
|
||||
const args = toolUse?.input
|
||||
? JSON.stringify(toolUse.input).slice(0, STUB_ARGS_MAX_CHARS)
|
||||
: '{}'
|
||||
return {
|
||||
...block,
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: `[${name} args=${args} → ${original.length} chars omitted]`,
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
// Mid-tier compression. The trailing marker is load-bearing: without it, the
|
||||
// model can't distinguish "tool returned 2000 chars" from "tool returned 20k
|
||||
// chars that we cut to 2000". Distinguishing those matters for the model's
|
||||
// decision to re-invoke the tool.
|
||||
function truncateBlock(
|
||||
block: ToolResultBlock,
|
||||
maxChars: number,
|
||||
): ToolResultBlock {
|
||||
const text = extractText(block.content)
|
||||
if (text.length <= maxChars) return block
|
||||
const omitted = text.length - maxChars
|
||||
return {
|
||||
...block,
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: `${text.slice(0, maxChars)}\n[…truncated ${omitted} chars from tool history]`,
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
function getInner(msg: AnyMessage): { role?: string; content?: unknown } {
|
||||
return (msg.message ?? msg) as { role?: string; content?: unknown }
|
||||
}
|
||||
|
||||
function indexToolUses(messages: AnyMessage[]): Map<string, ToolUseBlock> {
|
||||
const map = new Map<string, ToolUseBlock>()
|
||||
for (const msg of messages) {
|
||||
const content = getInner(msg).content
|
||||
if (!Array.isArray(content)) continue
|
||||
for (const b of content as Array<{ type?: string; id?: string }>) {
|
||||
if (b?.type === 'tool_use' && b.id) {
|
||||
map.set(b.id, b as ToolUseBlock)
|
||||
}
|
||||
}
|
||||
}
|
||||
return map
|
||||
}
|
||||
|
||||
function indexToolResultMessages(messages: AnyMessage[]): number[] {
|
||||
const indices: number[] = []
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
const inner = getInner(messages[i])
|
||||
const role = inner.role ?? messages[i].role
|
||||
const content = inner.content
|
||||
if (
|
||||
role === 'user' &&
|
||||
Array.isArray(content) &&
|
||||
content.some((b: { type?: string }) => b?.type === 'tool_result')
|
||||
) {
|
||||
indices.push(i)
|
||||
}
|
||||
}
|
||||
return indices
|
||||
}
|
||||
|
||||
function rewriteMessage<T extends AnyMessage>(
|
||||
msg: T,
|
||||
newContent: unknown[],
|
||||
): T {
|
||||
if (msg.message) {
|
||||
return { ...msg, message: { ...msg.message, content: newContent } }
|
||||
}
|
||||
return { ...msg, content: newContent }
|
||||
}
|
||||
|
||||
// microCompact.maybeTimeBasedMicrocompact may have already replaced old
|
||||
// tool_result content with TOOL_RESULT_CLEARED_MESSAGE before we see it.
|
||||
// Re-compressing produces a stub over a marker (e.g. `[Read args={} → 40
|
||||
// chars omitted]`), wasteful and less informative than the canonical marker.
|
||||
function isAlreadyCleared(block: ToolResultBlock): boolean {
|
||||
const text = extractText(block.content)
|
||||
return text === TOOL_RESULT_CLEARED_MESSAGE
|
||||
}
|
||||
|
||||
function shouldCompressBlock(
|
||||
block: ToolResultBlock,
|
||||
toolUsesById: Map<string, ToolUseBlock>,
|
||||
): boolean {
|
||||
if (isAlreadyCleared(block)) return false
|
||||
const toolUse = toolUsesById.get(block.tool_use_id ?? '')
|
||||
// Unknown tool name (orphan tool_result with no matching tool_use) falls
|
||||
// through to compression with a generic "tool" stub. Safer default: the
|
||||
// original tool_use vanished so there's no downstream use for the output.
|
||||
if (!toolUse?.name) return true
|
||||
// Respect microCompact's curated safe-to-compress set (Read/Bash/Grep/…/
|
||||
// mcp__*) so user-facing flow tools (Task, Agent, custom) stay intact.
|
||||
return isCompactableTool(toolUse.name)
|
||||
}
|
||||
|
||||
export function compressToolHistory<T extends AnyMessage>(
|
||||
messages: T[],
|
||||
model: string,
|
||||
): T[] {
|
||||
// Master kill-switch. Returns the original reference so callers skip a
|
||||
// defensive copy when the feature is disabled.
|
||||
if (!getGlobalConfig().toolHistoryCompressionEnabled) return messages
|
||||
|
||||
const tiers = getTiers(getEffectiveContextWindowSize(model))
|
||||
|
||||
const toolResultIndices = indexToolResultMessages(messages)
|
||||
const total = toolResultIndices.length
|
||||
// If every tool-result fits in the recent tier, no boundary crosses; return
|
||||
// the same reference for the same copy-elision reason.
|
||||
if (total <= tiers.recent) return messages
|
||||
|
||||
// O(1) lookup: messageIndex → tool-result position (0 = oldest). Replaces
|
||||
// the naive Array.indexOf(i) that was O(n²) across the .map below.
|
||||
const positionByIndex = new Map<number, number>()
|
||||
for (let pos = 0; pos < toolResultIndices.length; pos++) {
|
||||
positionByIndex.set(toolResultIndices[pos], pos)
|
||||
}
|
||||
|
||||
const toolUsesById = indexToolUses(messages)
|
||||
|
||||
return messages.map((msg, i) => {
|
||||
const pos = positionByIndex.get(i)
|
||||
if (pos === undefined) return msg
|
||||
|
||||
const fromEnd = total - 1 - pos
|
||||
if (fromEnd < tiers.recent) return msg
|
||||
|
||||
const inMidWindow = fromEnd < tiers.recent + tiers.mid
|
||||
const content = getInner(msg).content as unknown[]
|
||||
const newContent = content.map(block => {
|
||||
const b = block as { type?: string }
|
||||
if (b?.type !== 'tool_result') return block
|
||||
const tr = block as ToolResultBlock
|
||||
if (!shouldCompressBlock(tr, toolUsesById)) return block
|
||||
return inMidWindow
|
||||
? truncateBlock(tr, MID_MAX_CHARS)
|
||||
: buildStub(tr, toolUsesById)
|
||||
})
|
||||
|
||||
return rewriteMessage(msg, newContent)
|
||||
})
|
||||
}
|
||||
@@ -320,10 +320,7 @@ export function classifyOpenAIHttpFailure(options: {
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
(options.status >= 200 && options.status < 300 && isMalformedProviderResponse(body)) ||
|
||||
(options.status >= 400 && isMalformedProviderResponse(body))
|
||||
) {
|
||||
if (options.status >= 400 && isMalformedProviderResponse(body)) {
|
||||
return {
|
||||
source: 'http',
|
||||
category: 'malformed_provider_response',
|
||||
|
||||
317
src/services/api/openaiShim.compression.test.ts
Normal file
317
src/services/api/openaiShim.compression.test.ts
Normal file
@@ -0,0 +1,317 @@
|
||||
import { afterEach, beforeEach, expect, mock, test } from 'bun:test'
|
||||
import { createOpenAIShimClient } from './openaiShim.js'
|
||||
|
||||
type FetchType = typeof globalThis.fetch
|
||||
const originalFetch = globalThis.fetch
|
||||
|
||||
const originalEnv = {
|
||||
OPENAI_BASE_URL: process.env.OPENAI_BASE_URL,
|
||||
OPENAI_API_KEY: process.env.OPENAI_API_KEY,
|
||||
OPENAI_MODEL: process.env.OPENAI_MODEL,
|
||||
}
|
||||
|
||||
// Mock config + autoCompact so the shim sees deterministic state.
|
||||
const mockState = {
|
||||
enabled: true,
|
||||
effectiveWindow: 100_000, // Copilot gpt-4o tier
|
||||
}
|
||||
|
||||
mock.module('../../utils/config.js', () => ({
|
||||
getGlobalConfig: () => ({
|
||||
toolHistoryCompressionEnabled: mockState.enabled,
|
||||
autoCompactEnabled: false,
|
||||
}),
|
||||
}))
|
||||
|
||||
mock.module('../compact/autoCompact.js', () => ({
|
||||
getEffectiveContextWindowSize: () => mockState.effectiveWindow,
|
||||
}))
|
||||
|
||||
type OpenAIShimClient = {
|
||||
beta: {
|
||||
messages: {
|
||||
create: (
|
||||
params: Record<string, unknown>,
|
||||
options?: Record<string, unknown>,
|
||||
) => Promise<unknown>
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function bigText(n: number): string {
|
||||
return 'A'.repeat(n)
|
||||
}
|
||||
|
||||
function buildToolExchange(id: number, resultLength: number) {
|
||||
return [
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_use',
|
||||
id: `toolu_${id}`,
|
||||
name: 'Read',
|
||||
input: { file_path: `/path/to/file${id}.ts` },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: `toolu_${id}`,
|
||||
content: bigText(resultLength),
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
function buildLongConversation(numExchanges: number, resultLength = 5_000) {
|
||||
const out: Array<{ role: string; content: unknown }> = [
|
||||
{ role: 'user', content: 'start the work' },
|
||||
]
|
||||
for (let i = 0; i < numExchanges; i++) {
|
||||
out.push(...buildToolExchange(i, resultLength))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
function makeFakeResponse(): Response {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'gpt-4o',
|
||||
choices: [
|
||||
{
|
||||
message: { role: 'assistant', content: 'done' },
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: { prompt_tokens: 8, completion_tokens: 2, total_tokens: 10 },
|
||||
}),
|
||||
{ headers: { 'Content-Type': 'application/json' } },
|
||||
)
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
process.env.OPENAI_BASE_URL = 'http://example.test/v1'
|
||||
process.env.OPENAI_API_KEY = 'test-key'
|
||||
delete process.env.OPENAI_MODEL
|
||||
mockState.enabled = true
|
||||
mockState.effectiveWindow = 100_000
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
if (originalEnv.OPENAI_BASE_URL === undefined) delete process.env.OPENAI_BASE_URL
|
||||
else process.env.OPENAI_BASE_URL = originalEnv.OPENAI_BASE_URL
|
||||
if (originalEnv.OPENAI_API_KEY === undefined) delete process.env.OPENAI_API_KEY
|
||||
else process.env.OPENAI_API_KEY = originalEnv.OPENAI_API_KEY
|
||||
if (originalEnv.OPENAI_MODEL === undefined) delete process.env.OPENAI_MODEL
|
||||
else process.env.OPENAI_MODEL = originalEnv.OPENAI_MODEL
|
||||
globalThis.fetch = originalFetch
|
||||
})
|
||||
|
||||
async function captureRequestBody(
|
||||
messages: Array<{ role: string; content: unknown }>,
|
||||
model: string,
|
||||
): Promise<Record<string, unknown>> {
|
||||
let captured: Record<string, unknown> | undefined
|
||||
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
captured = JSON.parse(String(init?.body))
|
||||
return makeFakeResponse()
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
await client.beta.messages.create({
|
||||
model,
|
||||
system: 'system prompt',
|
||||
messages,
|
||||
})
|
||||
|
||||
if (!captured) throw new Error('request not captured')
|
||||
return captured
|
||||
}
|
||||
|
||||
function getToolMessages(body: Record<string, unknown>): Array<{ content: string }> {
|
||||
const messages = body.messages as Array<{ role: string; content: string }>
|
||||
return messages.filter(m => m.role === 'tool')
|
||||
}
|
||||
|
||||
function getAssistantToolCalls(body: Record<string, unknown>): unknown[] {
|
||||
const messages = body.messages as Array<{
|
||||
role: string
|
||||
tool_calls?: unknown[]
|
||||
}>
|
||||
return messages
|
||||
.filter(m => m.role === 'assistant' && Array.isArray(m.tool_calls))
|
||||
.flatMap(m => m.tool_calls ?? [])
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// BUG REPRO: without compression, full tool history is resent every turn
|
||||
// ============================================================================
|
||||
|
||||
test('BUG REPRO: without compression, all 30 tool results are sent at full size', async () => {
|
||||
mockState.enabled = false
|
||||
const messages = buildLongConversation(30, 5_000)
|
||||
|
||||
const body = await captureRequestBody(messages, 'gpt-4o')
|
||||
const toolMessages = getToolMessages(body)
|
||||
const payloadSize = JSON.stringify(body).length
|
||||
|
||||
// All 30 tool results present, none truncated
|
||||
expect(toolMessages.length).toBe(30)
|
||||
for (const m of toolMessages) {
|
||||
expect(m.content.length).toBeGreaterThanOrEqual(5_000)
|
||||
expect(m.content).not.toContain('[…truncated')
|
||||
expect(m.content).not.toContain('chars omitted')
|
||||
}
|
||||
|
||||
// Total payload is large (~150KB raw) — this is the cost being paid every turn
|
||||
expect(payloadSize).toBeGreaterThan(150_000)
|
||||
})
|
||||
|
||||
// ============================================================================
|
||||
// FIX: with compression, recent kept full, mid truncated, old stubbed
|
||||
// ============================================================================
|
||||
|
||||
test('FIX: with compression on Copilot gpt-4o (tier 5/10/rest), 30 turns shrinks dramatically', async () => {
|
||||
mockState.enabled = true
|
||||
mockState.effectiveWindow = 100_000 // 64–128k → recent=5, mid=10
|
||||
const messages = buildLongConversation(30, 5_000)
|
||||
|
||||
const body = await captureRequestBody(messages, 'gpt-4o')
|
||||
const toolMessages = getToolMessages(body)
|
||||
const payloadSize = JSON.stringify(body).length
|
||||
|
||||
// Structure preserved: still 30 tool messages, no orphan tool_calls
|
||||
expect(toolMessages.length).toBe(30)
|
||||
expect(getAssistantToolCalls(body).length).toBe(30)
|
||||
|
||||
// Tier breakdown (oldest → newest):
|
||||
// indices 0..14 → old tier (stubs)
|
||||
// indices 15..24 → mid tier (truncated)
|
||||
// indices 25..29 → recent (full)
|
||||
for (let i = 0; i <= 14; i++) {
|
||||
expect(toolMessages[i].content).toMatch(/^\[Read args=.*chars omitted\]$/)
|
||||
}
|
||||
for (let i = 15; i <= 24; i++) {
|
||||
expect(toolMessages[i].content).toContain('[…truncated')
|
||||
}
|
||||
for (let i = 25; i <= 29; i++) {
|
||||
expect(toolMessages[i].content.length).toBe(5_000)
|
||||
expect(toolMessages[i].content).not.toContain('[…truncated')
|
||||
expect(toolMessages[i].content).not.toContain('chars omitted')
|
||||
}
|
||||
|
||||
// Significant reduction: from ~150KB to <60KB (10 mid×2KB + structure overhead)
|
||||
expect(payloadSize).toBeLessThan(60_000)
|
||||
})
|
||||
|
||||
// ============================================================================
|
||||
// FIX: large-context model gets generous tiers — compression effectively inert
|
||||
// ============================================================================
|
||||
|
||||
test('FIX: gpt-4.1 (1M context) with 25 exchanges keeps all full (recent tier=25)', async () => {
|
||||
mockState.enabled = true
|
||||
mockState.effectiveWindow = 1_000_000 // ≥500k → recent=25, mid=50
|
||||
const messages = buildLongConversation(25, 5_000)
|
||||
|
||||
const body = await captureRequestBody(messages, 'gpt-4.1')
|
||||
const toolMessages = getToolMessages(body)
|
||||
|
||||
expect(toolMessages.length).toBe(25)
|
||||
for (const m of toolMessages) {
|
||||
expect(m.content.length).toBe(5_000)
|
||||
expect(m.content).not.toContain('[…truncated')
|
||||
expect(m.content).not.toContain('chars omitted')
|
||||
}
|
||||
})
|
||||
|
||||
test('FIX: gpt-4.1 (1M context) with 30 exchanges → only first 5 mid-truncated', async () => {
|
||||
mockState.enabled = true
|
||||
mockState.effectiveWindow = 1_000_000 // recent=25, mid=50
|
||||
const messages = buildLongConversation(30, 5_000)
|
||||
|
||||
const body = await captureRequestBody(messages, 'gpt-4.1')
|
||||
const toolMessages = getToolMessages(body)
|
||||
|
||||
// 30 total: indices 0..4 mid, indices 5..29 recent
|
||||
for (let i = 0; i < 5; i++) {
|
||||
expect(toolMessages[i].content).toContain('[…truncated')
|
||||
}
|
||||
for (let i = 5; i < 30; i++) {
|
||||
expect(toolMessages[i].content.length).toBe(5_000)
|
||||
}
|
||||
})
|
||||
|
||||
// ============================================================================
|
||||
// FIX: stub preserves tool name and args — model can re-invoke if needed
|
||||
// ============================================================================
|
||||
|
||||
test('FIX: stub format includes original tool name and arguments', async () => {
|
||||
mockState.enabled = true
|
||||
mockState.effectiveWindow = 100_000
|
||||
const messages = buildLongConversation(30, 5_000)
|
||||
|
||||
const body = await captureRequestBody(messages, 'gpt-4o')
|
||||
const toolMessages = getToolMessages(body)
|
||||
const oldestStub = toolMessages[0].content
|
||||
|
||||
// Format: [<tool_name> args=<json> → <N> chars omitted]
|
||||
expect(oldestStub).toMatch(/^\[Read /)
|
||||
expect(oldestStub).toMatch(/file_path/)
|
||||
expect(oldestStub).toMatch(/→ 5000 chars omitted\]$/)
|
||||
})
|
||||
|
||||
// ============================================================================
|
||||
// FIX: tool_use blocks (assistant tool_calls) are never modified
|
||||
// ============================================================================
|
||||
|
||||
test('FIX: every tool_call retains its full id, name, and arguments', async () => {
|
||||
mockState.enabled = true
|
||||
mockState.effectiveWindow = 100_000
|
||||
const messages = buildLongConversation(30, 5_000)
|
||||
|
||||
const body = await captureRequestBody(messages, 'gpt-4o')
|
||||
const toolCalls = getAssistantToolCalls(body) as Array<{
|
||||
id: string
|
||||
function: { name: string; arguments: string }
|
||||
}>
|
||||
|
||||
expect(toolCalls.length).toBe(30)
|
||||
for (let i = 0; i < toolCalls.length; i++) {
|
||||
expect(toolCalls[i].id).toBe(`toolu_${i}`)
|
||||
expect(toolCalls[i].function.name).toBe('Read')
|
||||
expect(JSON.parse(toolCalls[i].function.arguments)).toEqual({
|
||||
file_path: `/path/to/file${i}.ts`,
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
// ============================================================================
|
||||
// FIX: small-context provider (Mistral 32k) gets aggressive compression
|
||||
// ============================================================================
|
||||
|
||||
test('FIX: 32k window (Mistral tier) → recent=3 keeps last 3 only', async () => {
|
||||
mockState.enabled = true
|
||||
mockState.effectiveWindow = 24_000 // 16–32k → recent=3, mid=5
|
||||
const messages = buildLongConversation(15, 3_000)
|
||||
|
||||
const body = await captureRequestBody(messages, 'mistral-large-latest')
|
||||
const toolMessages = getToolMessages(body)
|
||||
|
||||
// 15 total: indices 0..6 old, 7..11 mid, 12..14 recent
|
||||
for (let i = 0; i <= 6; i++) {
|
||||
expect(toolMessages[i].content).toContain('chars omitted')
|
||||
}
|
||||
for (let i = 7; i <= 11; i++) {
|
||||
expect(toolMessages[i].content).toContain('[…truncated')
|
||||
}
|
||||
for (let i = 12; i <= 14; i++) {
|
||||
expect(toolMessages[i].content.length).toBe(3_000)
|
||||
}
|
||||
})
|
||||
@@ -117,3 +117,170 @@ test('redacts credentials in transport diagnostic URL logs', async () => {
|
||||
expect(logLine).not.toContain('user:supersecret')
|
||||
expect(logLine).not.toContain('supersecret@')
|
||||
})
|
||||
test('logs self-heal localhost fallback with redacted from/to URLs', async () => {
|
||||
const debugSpy = mock(() => {})
|
||||
mock.module('../../utils/debug.js', () => ({
|
||||
logForDebugging: debugSpy,
|
||||
}))
|
||||
|
||||
const nonce = `${Date.now()}-${Math.random()}`
|
||||
const { createOpenAIShimClient } = await import(`./openaiShim.ts?ts=${nonce}`)
|
||||
|
||||
process.env.OPENAI_BASE_URL = 'http://user:supersecret@localhost:11434/v1'
|
||||
process.env.OPENAI_API_KEY = 'supersecret'
|
||||
|
||||
globalThis.fetch = mock(async (input: string | Request) => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
if (url.includes('localhost')) {
|
||||
throw Object.assign(new TypeError('fetch failed'), {
|
||||
code: 'ENOTFOUND',
|
||||
})
|
||||
}
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'qwen2.5-coder:7b',
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: 'ok',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 5,
|
||||
completion_tokens: 2,
|
||||
total_tokens: 7,
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
)
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
const client = createOpenAIShimClient({}) as {
|
||||
beta: {
|
||||
messages: {
|
||||
create: (params: Record<string, unknown>) => Promise<unknown>
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await expect(
|
||||
client.beta.messages.create({
|
||||
model: 'qwen2.5-coder:7b',
|
||||
messages: [{ role: 'user', content: 'hello' }],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
}),
|
||||
).resolves.toBeDefined()
|
||||
|
||||
const fallbackLog = debugSpy.mock.calls.find(call =>
|
||||
typeof call?.[0] === 'string' &&
|
||||
call[0].includes('self-heal retry reason=localhost_resolution_failed'),
|
||||
)
|
||||
|
||||
expect(fallbackLog).toBeDefined()
|
||||
const logLine = String(fallbackLog?.[0])
|
||||
expect(logLine).toContain('from=http://redacted:redacted@localhost:11434/v1/chat/completions')
|
||||
expect(logLine).toContain('to=http://redacted:redacted@127.0.0.1:11434/v1/chat/completions')
|
||||
expect(logLine).not.toContain('supersecret')
|
||||
})
|
||||
|
||||
test('logs self-heal toolless retry for local tool-call incompatibility', async () => {
|
||||
const debugSpy = mock(() => {})
|
||||
mock.module('../../utils/debug.js', () => ({
|
||||
logForDebugging: debugSpy,
|
||||
}))
|
||||
|
||||
const nonce = `${Date.now()}-${Math.random()}`
|
||||
const { createOpenAIShimClient } = await import(`./openaiShim.ts?ts=${nonce}`)
|
||||
|
||||
process.env.OPENAI_BASE_URL = 'http://localhost:11434/v1'
|
||||
process.env.OPENAI_API_KEY = 'ollama'
|
||||
|
||||
let callCount = 0
|
||||
globalThis.fetch = mock(async () => {
|
||||
callCount += 1
|
||||
if (callCount === 1) {
|
||||
return new Response('tool_calls are not supported', {
|
||||
status: 400,
|
||||
headers: {
|
||||
'Content-Type': 'text/plain',
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'qwen2.5-coder:7b',
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: 'ok',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 7,
|
||||
completion_tokens: 3,
|
||||
total_tokens: 10,
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
)
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
const client = createOpenAIShimClient({}) as {
|
||||
beta: {
|
||||
messages: {
|
||||
create: (params: Record<string, unknown>) => Promise<unknown>
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await expect(
|
||||
client.beta.messages.create({
|
||||
model: 'qwen2.5-coder:7b',
|
||||
messages: [{ role: 'user', content: 'hello' }],
|
||||
tools: [
|
||||
{
|
||||
name: 'Read',
|
||||
description: 'Read file',
|
||||
input_schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
filePath: { type: 'string' },
|
||||
},
|
||||
required: ['filePath'],
|
||||
},
|
||||
},
|
||||
],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
}),
|
||||
).resolves.toBeDefined()
|
||||
|
||||
const fallbackLog = debugSpy.mock.calls.find(call =>
|
||||
typeof call?.[0] === 'string' &&
|
||||
call[0].includes('self-heal retry reason=tool_call_incompatible mode=toolless'),
|
||||
)
|
||||
|
||||
expect(fallbackLog).toBeDefined()
|
||||
expect(fallbackLog?.[1]).toEqual({ level: 'warn' })
|
||||
})
|
||||
|
||||
@@ -2513,7 +2513,7 @@ test('non-streaming: real content takes precedence over reasoning_content', asyn
|
||||
])
|
||||
})
|
||||
|
||||
test('non-streaming: strips leaked reasoning preamble from assistant content', async () => {
|
||||
test('non-streaming: strips <think> tag block from assistant content', async () => {
|
||||
globalThis.fetch = (async () => {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
@@ -2524,7 +2524,7 @@ test('non-streaming: strips leaked reasoning preamble from assistant content', a
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content:
|
||||
'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?',
|
||||
'<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
@@ -2645,7 +2645,7 @@ test('streaming: thinking block closed before tool call', async () => {
|
||||
expect(thinkingStart?.content_block?.type).toBe('thinking')
|
||||
})
|
||||
|
||||
test('streaming: strips leaked reasoning preamble from assistant content deltas', async () => {
|
||||
test('streaming: strips <think> tag block from assistant content deltas', async () => {
|
||||
globalThis.fetch = (async () => {
|
||||
const chunks = makeStreamChunks([
|
||||
{
|
||||
@@ -2658,7 +2658,7 @@ test('streaming: strips leaked reasoning preamble from assistant content deltas'
|
||||
delta: {
|
||||
role: 'assistant',
|
||||
content:
|
||||
'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?',
|
||||
'<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?',
|
||||
},
|
||||
finish_reason: null,
|
||||
},
|
||||
@@ -2700,10 +2700,10 @@ test('streaming: strips leaked reasoning preamble from assistant content deltas'
|
||||
}
|
||||
}
|
||||
|
||||
expect(textDeltas).toEqual(['Hey! How can I help you today?'])
|
||||
expect(textDeltas.join('')).toBe('Hey! How can I help you today?')
|
||||
})
|
||||
|
||||
test('streaming: strips leaked reasoning preamble when split across multiple content chunks', async () => {
|
||||
test('streaming: strips <think> tag split across multiple content chunks', async () => {
|
||||
globalThis.fetch = (async () => {
|
||||
const chunks = makeStreamChunks([
|
||||
{
|
||||
@@ -2715,7 +2715,7 @@ test('streaming: strips leaked reasoning preamble when split across multiple con
|
||||
index: 0,
|
||||
delta: {
|
||||
role: 'assistant',
|
||||
content: 'The user said "hey" - this is a simple greeting. ',
|
||||
content: '<think>user wants a greeting,',
|
||||
},
|
||||
finish_reason: null,
|
||||
},
|
||||
@@ -2729,8 +2729,21 @@ test('streaming: strips leaked reasoning preamble when split across multiple con
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
content:
|
||||
'I should respond in a friendly, concise way.\n\nHey! How can I help you today?',
|
||||
content: ' respond briefly</th',
|
||||
},
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'chatcmpl-1',
|
||||
object: 'chat.completion.chunk',
|
||||
model: 'gpt-5-mini',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
content: 'ink>Hey! How can I help you today?',
|
||||
},
|
||||
finish_reason: null,
|
||||
},
|
||||
@@ -2773,7 +2786,69 @@ test('streaming: strips leaked reasoning preamble when split across multiple con
|
||||
}
|
||||
}
|
||||
|
||||
expect(textDeltas).toEqual(['Hey! How can I help you today?'])
|
||||
expect(textDeltas.join('')).toBe('Hey! How can I help you today?')
|
||||
})
|
||||
|
||||
test('streaming: preserves prose without tags (no phrase-based false positive)', async () => {
|
||||
// Regression: older phrase-based sanitizer would strip "I should..." prose.
|
||||
// The tag-based approach leaves legitimate assistant output alone.
|
||||
globalThis.fetch = (async () => {
|
||||
const chunks = makeStreamChunks([
|
||||
{
|
||||
id: 'chatcmpl-1',
|
||||
object: 'chat.completion.chunk',
|
||||
model: 'gpt-5-mini',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
role: 'assistant',
|
||||
content:
|
||||
'I should note that the user role requires a briefly concise friendly response format.',
|
||||
},
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'chatcmpl-1',
|
||||
object: 'chat.completion.chunk',
|
||||
model: 'gpt-5-mini',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
},
|
||||
])
|
||||
|
||||
return makeSseResponse(chunks)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
const result = await client.beta.messages
|
||||
.create({
|
||||
model: 'gpt-5-mini',
|
||||
system: 'test system',
|
||||
messages: [{ role: 'user', content: 'hey' }],
|
||||
max_tokens: 64,
|
||||
stream: true,
|
||||
})
|
||||
.withResponse()
|
||||
|
||||
const textDeltas: string[] = []
|
||||
for await (const event of result.data) {
|
||||
const delta = (event as { delta?: { type?: string; text?: string } }).delta
|
||||
if (delta?.type === 'text_delta' && typeof delta.text === 'string') {
|
||||
textDeltas.push(delta.text)
|
||||
}
|
||||
}
|
||||
|
||||
expect(textDeltas.join('')).toBe(
|
||||
'I should note that the user role requires a briefly concise friendly response format.',
|
||||
)
|
||||
})
|
||||
|
||||
test('classifies localhost transport failures with actionable category marker', async () => {
|
||||
@@ -2856,6 +2931,204 @@ test('classifies chat-completions endpoint 404 failures with endpoint_not_found
|
||||
}),
|
||||
).rejects.toThrow('openai_category=endpoint_not_found')
|
||||
})
|
||||
test('self-heals localhost resolution failures by retrying local loopback base URL', async () => {
|
||||
process.env.OPENAI_BASE_URL = 'http://localhost:11434/v1'
|
||||
|
||||
const requestUrls: string[] = []
|
||||
globalThis.fetch = (async (input, _init) => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
requestUrls.push(url)
|
||||
|
||||
if (url.includes('localhost')) {
|
||||
const error = Object.assign(new TypeError('fetch failed'), {
|
||||
code: 'ENOTFOUND',
|
||||
})
|
||||
throw error
|
||||
}
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'qwen2.5-coder:7b',
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: 'hello from loopback',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 4,
|
||||
completion_tokens: 3,
|
||||
total_tokens: 7,
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
await expect(
|
||||
client.beta.messages.create({
|
||||
model: 'qwen2.5-coder:7b',
|
||||
messages: [{ role: 'user', content: 'hello' }],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
}),
|
||||
).resolves.toBeDefined()
|
||||
|
||||
expect(requestUrls[0]).toBe('http://localhost:11434/v1/chat/completions')
|
||||
expect(requestUrls).toContain('http://127.0.0.1:11434/v1/chat/completions')
|
||||
})
|
||||
|
||||
test('self-heals local endpoint_not_found by retrying with /v1 base URL', async () => {
|
||||
process.env.OPENAI_BASE_URL = 'http://localhost:11434'
|
||||
|
||||
const requestUrls: string[] = []
|
||||
globalThis.fetch = (async (input, _init) => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
requestUrls.push(url)
|
||||
|
||||
if (url === 'http://localhost:11434/chat/completions') {
|
||||
return new Response('Not Found', {
|
||||
status: 404,
|
||||
headers: {
|
||||
'Content-Type': 'text/plain',
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'qwen2.5-coder:7b',
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: 'hello from /v1',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 5,
|
||||
completion_tokens: 2,
|
||||
total_tokens: 7,
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
await expect(
|
||||
client.beta.messages.create({
|
||||
model: 'qwen2.5-coder:7b',
|
||||
messages: [{ role: 'user', content: 'hello' }],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
}),
|
||||
).resolves.toBeDefined()
|
||||
|
||||
expect(requestUrls).toEqual([
|
||||
'http://localhost:11434/chat/completions',
|
||||
'http://localhost:11434/v1/chat/completions',
|
||||
])
|
||||
})
|
||||
|
||||
test('self-heals tool-call incompatibility by retrying local Ollama requests without tools', async () => {
|
||||
process.env.OPENAI_BASE_URL = 'http://localhost:11434/v1'
|
||||
|
||||
const requestBodies: Array<Record<string, unknown>> = []
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
const requestBody = JSON.parse(String(init?.body)) as Record<string, unknown>
|
||||
requestBodies.push(requestBody)
|
||||
|
||||
if (requestBodies.length === 1) {
|
||||
return new Response('tool_calls are not supported', {
|
||||
status: 400,
|
||||
headers: {
|
||||
'Content-Type': 'text/plain',
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'qwen2.5-coder:7b',
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: 'fallback without tools',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 8,
|
||||
completion_tokens: 4,
|
||||
total_tokens: 12,
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
await expect(
|
||||
client.beta.messages.create({
|
||||
model: 'qwen2.5-coder:7b',
|
||||
messages: [{ role: 'user', content: 'hello' }],
|
||||
tools: [
|
||||
{
|
||||
name: 'Read',
|
||||
description: 'Read a file',
|
||||
input_schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
filePath: { type: 'string' },
|
||||
},
|
||||
required: ['filePath'],
|
||||
},
|
||||
},
|
||||
],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
}),
|
||||
).resolves.toBeDefined()
|
||||
|
||||
expect(requestBodies).toHaveLength(2)
|
||||
expect(Array.isArray(requestBodies[0]?.tools)).toBe(true)
|
||||
expect(requestBodies[0]?.tool_choice).toBeUndefined()
|
||||
expect(
|
||||
requestBodies[1]?.tools === undefined ||
|
||||
(Array.isArray(requestBodies[1]?.tools) && requestBodies[1]?.tools.length === 0),
|
||||
).toBe(true)
|
||||
expect(requestBodies[1]?.tool_choice).toBeUndefined()
|
||||
})
|
||||
|
||||
test('preserves valid tool_result and drops orphan tool_result', async () => {
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
@@ -2924,7 +3197,7 @@ test('preserves valid tool_result and drops orphan tool_result', async () => {
|
||||
{
|
||||
role: 'user',
|
||||
content: 'What happened?',
|
||||
}
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
@@ -2943,4 +3216,383 @@ test('preserves valid tool_result and drops orphan tool_result', async () => {
|
||||
|
||||
const orphanMessage = toolMessages.find(m => m.tool_call_id === 'orphan_call_2')
|
||||
expect(orphanMessage).toBeUndefined()
|
||||
|
||||
// Actually, the semantic message IS injected here because the user block with orphan
|
||||
// tool result is converted to:
|
||||
// 1. Tool result (valid_call_1) -> role 'tool'
|
||||
// 2. User content ("What happened?") -> role 'user'
|
||||
// This triggers the tool -> assistant injection.
|
||||
const assistantMessages = messages.filter(m => m.role === 'assistant')
|
||||
expect(assistantMessages.some(m => m.content === '[Tool execution interrupted by user]')).toBe(true)
|
||||
})
|
||||
|
||||
test('drops empty assistant message when only thinking block was present and stripped', async () => {
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
return new Response(JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
object: 'chat.completion',
|
||||
created: 123456789,
|
||||
model: 'mistral-large-latest',
|
||||
choices: [{ message: { role: 'assistant', content: 'hi' }, finish_reason: 'stop' }],
|
||||
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }
|
||||
}), { headers: { 'Content-Type': 'application/json' } })
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
await client.beta.messages.create({
|
||||
model: 'mistral-large-latest',
|
||||
messages: [
|
||||
{ role: 'user', content: 'Initial' },
|
||||
{ role: 'assistant', content: [{ type: 'thinking', thinking: 'I am thinking...', signature: 'sig' }] },
|
||||
{ role: 'user', content: 'Interrupting query' },
|
||||
],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const messages = requestBody?.messages as Array<Record<string, unknown>>
|
||||
// The assistant msg is dropped because thinking is stripped.
|
||||
// The two user messages are coalesced.
|
||||
expect(messages.length).toBe(1)
|
||||
expect(messages[0].role).toBe('user')
|
||||
expect(String(messages[0].content)).toContain('Initial')
|
||||
expect(String(messages[0].content)).toContain('Interrupting query')
|
||||
})
|
||||
|
||||
test('injects semantic assistant message when tool result is followed by user message', async () => {
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
return new Response(JSON.stringify({
|
||||
id: 'chatcmpl-2',
|
||||
object: 'chat.completion',
|
||||
created: 123456789,
|
||||
model: 'mistral-large-latest',
|
||||
choices: [{ message: { role: 'assistant', content: 'hi' }, finish_reason: 'stop' }],
|
||||
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }
|
||||
}), { headers: { 'Content-Type': 'application/json' } })
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
await client.beta.messages.create({
|
||||
model: 'mistral-large-latest',
|
||||
messages: [
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'tool_use', id: 'call_1', name: 'search', input: {} }]
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'tool_result', tool_use_id: 'call_1', content: 'Result' }
|
||||
]
|
||||
},
|
||||
{ role: 'user', content: 'Next user query' },
|
||||
],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const messages = requestBody?.messages as Array<Record<string, unknown>>
|
||||
// Roles should be: assistant (tool_calls) -> tool -> assistant (semantic) -> user
|
||||
const roles = messages.map(m => m.role)
|
||||
expect(roles).toEqual(['assistant', 'tool', 'assistant', 'user'])
|
||||
|
||||
const semanticMsg = messages[2]
|
||||
expect(semanticMsg.role).toBe('assistant')
|
||||
expect(semanticMsg.content).toBe('[Tool execution interrupted by user]')
|
||||
})
|
||||
|
||||
test('Moonshot: uses max_tokens (not max_completion_tokens) and strips store', async () => {
|
||||
process.env.OPENAI_BASE_URL = 'https://api.moonshot.ai/v1'
|
||||
process.env.OPENAI_API_KEY = 'sk-moonshot-test'
|
||||
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'kimi-k2.6',
|
||||
choices: [
|
||||
{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
|
||||
],
|
||||
usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
|
||||
}),
|
||||
{ headers: { 'Content-Type': 'application/json' } },
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
await client.beta.messages.create({
|
||||
model: 'kimi-k2.6',
|
||||
system: 'you are kimi',
|
||||
messages: [{ role: 'user', content: 'hi' }],
|
||||
max_tokens: 256,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
expect(requestBody?.max_tokens).toBe(256)
|
||||
expect(requestBody?.max_completion_tokens).toBeUndefined()
|
||||
expect(requestBody?.store).toBeUndefined()
|
||||
})
|
||||
|
||||
test('Moonshot: cn host is also detected', async () => {
|
||||
process.env.OPENAI_BASE_URL = 'https://api.moonshot.cn/v1'
|
||||
process.env.OPENAI_API_KEY = 'sk-moonshot-test'
|
||||
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'kimi-k2.6',
|
||||
choices: [
|
||||
{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
|
||||
],
|
||||
usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
|
||||
}),
|
||||
{ headers: { 'Content-Type': 'application/json' } },
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
await client.beta.messages.create({
|
||||
model: 'kimi-k2.6',
|
||||
system: 'you are kimi',
|
||||
messages: [{ role: 'user', content: 'hi' }],
|
||||
max_tokens: 256,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
expect(requestBody?.store).toBeUndefined()
|
||||
})
|
||||
|
||||
|
||||
test('collapses multiple text blocks in tool_result to string for DeepSeek compatibility (issue #774)', async () => {
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'deepseek-reasoner',
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: 'done',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 12,
|
||||
completion_tokens: 4,
|
||||
total_tokens: 16,
|
||||
},
|
||||
}),
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
await client.beta.messages.create({
|
||||
model: 'deepseek-reasoner',
|
||||
system: 'test system',
|
||||
messages: [
|
||||
{ role: 'user', content: 'Run ls' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_use',
|
||||
id: 'call_1',
|
||||
name: 'Bash',
|
||||
input: { command: 'ls' },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: 'call_1',
|
||||
content: [
|
||||
{ type: 'text', text: 'line one' },
|
||||
{ type: 'text', text: 'line two' },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const messages = requestBody?.messages as Array<Record<string, unknown>>
|
||||
const toolMessages = messages.filter(m => m.role === 'tool')
|
||||
expect(toolMessages.length).toBe(1)
|
||||
expect(toolMessages[0].tool_call_id).toBe('call_1')
|
||||
expect(typeof toolMessages[0].content).toBe('string')
|
||||
expect(toolMessages[0].content).toBe('line one\n\nline two')
|
||||
})
|
||||
|
||||
test('collapses multiple text blocks into a single string for DeepSeek compatibility (issue #774)', async () => {
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'deepseek-reasoner',
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: 'done',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 12,
|
||||
completion_tokens: 4,
|
||||
total_tokens: 16,
|
||||
},
|
||||
}),
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
await client.beta.messages.create({
|
||||
model: 'deepseek-reasoner',
|
||||
system: 'test system',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: 'Hello!' },
|
||||
{ type: 'text', text: 'How are you?' },
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const messages = requestBody?.messages as Array<Record<string, unknown>>
|
||||
expect(messages.length).toBe(2) // system + user
|
||||
expect(messages[1].role).toBe('user')
|
||||
expect(typeof messages[1].content).toBe('string')
|
||||
expect(messages[1].content).toBe('Hello!\n\nHow are you?')
|
||||
})
|
||||
|
||||
test('preserves mixed text and image tool results as multipart content', async () => {
|
||||
let requestBody: Record<string, unknown> | undefined
|
||||
|
||||
globalThis.fetch = (async (_input, init) => {
|
||||
requestBody = JSON.parse(String(init?.body))
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'gpt-4o',
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: 'done',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 12,
|
||||
completion_tokens: 4,
|
||||
total_tokens: 16,
|
||||
},
|
||||
}),
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
await client.beta.messages.create({
|
||||
model: 'gpt-4o',
|
||||
system: 'test system',
|
||||
messages: [
|
||||
{ role: 'user', content: 'Show me' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_use',
|
||||
id: 'call_1',
|
||||
name: 'Bash',
|
||||
input: { command: 'cat image.png' },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: 'call_1',
|
||||
content: [
|
||||
{ type: 'text', text: 'Here is the image:' },
|
||||
{
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: 'image/png',
|
||||
data: 'iVBORw0KGgo=',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const messages = requestBody?.messages as Array<Record<string, unknown>>
|
||||
const toolMessages = messages.filter(m => m.role === 'tool')
|
||||
expect(toolMessages.length).toBe(1)
|
||||
expect(Array.isArray(toolMessages[0].content)).toBe(true)
|
||||
const content = toolMessages[0].content as Array<Record<string, unknown>>
|
||||
expect(content.length).toBe(2)
|
||||
expect(content[0].type).toBe('text')
|
||||
expect(content[1].type).toBe('image_url')
|
||||
})
|
||||
|
||||
@@ -32,10 +32,9 @@ import { resolveGeminiCredential } from '../../utils/geminiAuth.js'
|
||||
import { hydrateGeminiAccessTokenFromSecureStorage } from '../../utils/geminiCredentials.js'
|
||||
import { hydrateGithubModelsTokenFromSecureStorage } from '../../utils/githubModelsCredentials.js'
|
||||
import {
|
||||
looksLikeLeakedReasoningPrefix,
|
||||
shouldBufferPotentialReasoningPrefix,
|
||||
stripLeakedReasoningPreamble,
|
||||
} from './reasoningLeakSanitizer.js'
|
||||
createThinkTagFilter,
|
||||
stripThinkTags,
|
||||
} from './thinkTagSanitizer.js'
|
||||
import {
|
||||
codexStreamToAnthropic,
|
||||
collectCodexCompletedResponse,
|
||||
@@ -47,12 +46,15 @@ import {
|
||||
type AnthropicUsage,
|
||||
type ShimCreateParams,
|
||||
} from './codexShim.js'
|
||||
import { compressToolHistory } from './compressToolHistory.js'
|
||||
import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
|
||||
import {
|
||||
getLocalProviderRetryBaseUrls,
|
||||
getGithubEndpointType,
|
||||
isLocalProviderUrl,
|
||||
resolveRuntimeCodexCredentials,
|
||||
resolveProviderRequest,
|
||||
getGithubEndpointType,
|
||||
shouldAttemptLocalToollessRetry,
|
||||
} from './providerConfig.js'
|
||||
import {
|
||||
buildOpenAICompatibilityErrorMessage,
|
||||
@@ -80,6 +82,10 @@ const GITHUB_429_MAX_RETRIES = 3
|
||||
const GITHUB_429_BASE_DELAY_SEC = 1
|
||||
const GITHUB_429_MAX_DELAY_SEC = 32
|
||||
const GEMINI_API_HOST = 'generativelanguage.googleapis.com'
|
||||
const MOONSHOT_API_HOSTS = new Set([
|
||||
'api.moonshot.ai',
|
||||
'api.moonshot.cn',
|
||||
])
|
||||
|
||||
const COPILOT_HEADERS: Record<string, string> = {
|
||||
'User-Agent': 'GitHubCopilotChat/0.26.7',
|
||||
@@ -145,6 +151,15 @@ function hasGeminiApiHost(baseUrl: string | undefined): boolean {
|
||||
}
|
||||
}
|
||||
|
||||
function isMoonshotBaseUrl(baseUrl: string | undefined): boolean {
|
||||
if (!baseUrl) return false
|
||||
try {
|
||||
return MOONSHOT_API_HOSTS.has(new URL(baseUrl).hostname.toLowerCase())
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
function formatRetryAfterHint(response: Response): string {
|
||||
const ra = response.headers.get('retry-after')
|
||||
return ra ? ` (Retry-After: ${ra})` : ''
|
||||
@@ -276,6 +291,15 @@ function convertToolResultContent(
|
||||
const text = parts[0].text ?? ''
|
||||
return isError ? `Error: ${text}` : text
|
||||
}
|
||||
|
||||
// Collapse arrays of only text blocks into a single string for DeepSeek
|
||||
// compatibility (issue #774). DeepSeek rejects arrays in role: "tool" messages.
|
||||
const allText = parts.every(p => p.type === 'text')
|
||||
if (allText) {
|
||||
const text = parts.map(p => p.text ?? '').join('\n\n')
|
||||
return isError ? `Error: ${text}` : text
|
||||
}
|
||||
|
||||
if (isError && parts[0]?.type === 'text') {
|
||||
parts[0] = { ...parts[0], text: `Error: ${parts[0].text ?? ''}` }
|
||||
} else if (isError) {
|
||||
@@ -334,6 +358,14 @@ function convertContentBlocks(
|
||||
|
||||
if (parts.length === 0) return ''
|
||||
if (parts.length === 1 && parts[0].type === 'text') return parts[0].text ?? ''
|
||||
|
||||
// Collapse arrays of only text blocks into a single string for DeepSeek
|
||||
// compatibility (issue #774).
|
||||
const allText = parts.every(p => p.type === 'text')
|
||||
if (allText) {
|
||||
return parts.map(p => p.text ?? '').join('\n\n')
|
||||
}
|
||||
|
||||
return parts
|
||||
}
|
||||
|
||||
@@ -345,19 +377,43 @@ function isGeminiMode(): boolean {
|
||||
}
|
||||
|
||||
function convertMessages(
|
||||
messages: Array<{ role: string; message?: { role?: string; content?: unknown }; content?: unknown }>,
|
||||
messages: Array<{
|
||||
role: string
|
||||
message?: { role?: string; content?: unknown }
|
||||
content?: unknown
|
||||
}>,
|
||||
system: unknown,
|
||||
): OpenAIMessage[] {
|
||||
const result: OpenAIMessage[] = []
|
||||
const knownToolCallIds = new Set<string>()
|
||||
|
||||
// Pre-scan for all tool results in the history to identify valid tool calls
|
||||
const toolResultIds = new Set<string>()
|
||||
for (const msg of messages) {
|
||||
const inner = msg.message ?? msg
|
||||
const content = (inner as { content?: unknown }).content
|
||||
if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
if (
|
||||
(block as { type?: string }).type === 'tool_result' &&
|
||||
(block as { tool_use_id?: string }).tool_use_id
|
||||
) {
|
||||
toolResultIds.add((block as { tool_use_id: string }).tool_use_id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// System message first
|
||||
const sysText = convertSystemPrompt(system)
|
||||
if (sysText) {
|
||||
result.push({ role: 'system', content: sysText })
|
||||
}
|
||||
|
||||
for (const msg of messages) {
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
const msg = messages[i]
|
||||
const isLastInHistory = i === messages.length - 1
|
||||
|
||||
// Claude Code wraps messages in { role, message: { role, content } }
|
||||
const inner = msg.message ?? msg
|
||||
const role = (inner as { role?: string }).role ?? msg.role
|
||||
@@ -366,8 +422,12 @@ function convertMessages(
|
||||
if (role === 'user') {
|
||||
// Check for tool_result blocks in user messages
|
||||
if (Array.isArray(content)) {
|
||||
const toolResults = content.filter((b: { type?: string }) => b.type === 'tool_result')
|
||||
const otherContent = content.filter((b: { type?: string }) => b.type !== 'tool_result')
|
||||
const toolResults = content.filter(
|
||||
(b: { type?: string }) => b.type === 'tool_result',
|
||||
)
|
||||
const otherContent = content.filter(
|
||||
(b: { type?: string }) => b.type !== 'tool_result',
|
||||
)
|
||||
|
||||
// Emit tool results as tool messages, but ONLY if we have a matching tool_use ID.
|
||||
// Mistral/OpenAI strictly require tool messages to follow an assistant message with tool_calls.
|
||||
@@ -382,7 +442,9 @@ function convertMessages(
|
||||
content: convertToolResultContent(tr.content, tr.is_error),
|
||||
})
|
||||
} else {
|
||||
logForDebugging(`Dropping orphan tool_result for ID: ${id} to prevent API error`)
|
||||
logForDebugging(
|
||||
`Dropping orphan tool_result for ID: ${id} to prevent API error`,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -402,8 +464,12 @@ function convertMessages(
|
||||
} else if (role === 'assistant') {
|
||||
// Check for tool_use blocks
|
||||
if (Array.isArray(content)) {
|
||||
const toolUses = content.filter((b: { type?: string }) => b.type === 'tool_use')
|
||||
const thinkingBlock = content.find((b: { type?: string }) => b.type === 'thinking')
|
||||
const toolUses = content.filter(
|
||||
(b: { type?: string }) => b.type === 'tool_use',
|
||||
)
|
||||
const thinkingBlock = content.find(
|
||||
(b: { type?: string }) => b.type === 'thinking',
|
||||
)
|
||||
const textContent = content.filter(
|
||||
(b: { type?: string }) => b.type !== 'tool_use' && b.type !== 'thinking',
|
||||
)
|
||||
@@ -412,12 +478,17 @@ function convertMessages(
|
||||
role: 'assistant',
|
||||
content: (() => {
|
||||
const c = convertContentBlocks(textContent)
|
||||
return typeof c === 'string' ? c : Array.isArray(c) ? c.map((p: { text?: string }) => p.text ?? '').join('') : ''
|
||||
return typeof c === 'string'
|
||||
? c
|
||||
: Array.isArray(c)
|
||||
? c.map((p: { text?: string }) => p.text ?? '').join('')
|
||||
: ''
|
||||
})(),
|
||||
}
|
||||
|
||||
if (toolUses.length > 0) {
|
||||
assistantMsg.tool_calls = toolUses.map(
|
||||
const mappedToolCalls = toolUses
|
||||
.map(
|
||||
(tu: {
|
||||
id?: string
|
||||
name?: string
|
||||
@@ -426,8 +497,18 @@ function convertMessages(
|
||||
signature?: string
|
||||
}) => {
|
||||
const id = tu.id ?? `call_${crypto.randomUUID().replace(/-/g, '')}`
|
||||
|
||||
// Only keep tool calls that have a corresponding result in the history,
|
||||
// or if it's the last message (prefill scenario).
|
||||
// Orphaned tool calls (e.g. from user interruption) cause 400 errors.
|
||||
if (!toolResultIds.has(id) && !isLastInHistory) {
|
||||
return null
|
||||
}
|
||||
|
||||
knownToolCallIds.add(id)
|
||||
const toolCall: NonNullable<OpenAIMessage['tool_calls']>[number] = {
|
||||
const toolCall: NonNullable<
|
||||
OpenAIMessage['tool_calls']
|
||||
>[number] = {
|
||||
id,
|
||||
type: 'function' as const,
|
||||
function: {
|
||||
@@ -449,33 +530,56 @@ function convertMessages(
|
||||
// If the model provided a signature in the tool_use block itself (e.g. from a previous Turn/Step)
|
||||
// Use thinkingBlock.signature for ALL tool calls in the same assistant turn if available.
|
||||
// The API requires the same signature on every replayed function call part in a parallel set.
|
||||
const signature = tu.signature ?? (thinkingBlock as any)?.signature
|
||||
const signature =
|
||||
tu.signature ?? (thinkingBlock as any)?.signature
|
||||
|
||||
// Merge into existing google-specific metadata if present
|
||||
const existingGoogle = (toolCall.extra_content?.google as Record<string, unknown>) ?? {}
|
||||
const existingGoogle =
|
||||
(toolCall.extra_content?.google as Record<
|
||||
string,
|
||||
unknown
|
||||
>) ?? {}
|
||||
toolCall.extra_content = {
|
||||
...toolCall.extra_content,
|
||||
google: {
|
||||
...existingGoogle,
|
||||
thought_signature: signature ?? "skip_thought_signature_validator"
|
||||
}
|
||||
thought_signature:
|
||||
signature ?? 'skip_thought_signature_validator',
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
return toolCall
|
||||
},
|
||||
)
|
||||
.filter((tc): tc is NonNullable<typeof tc> => tc !== null)
|
||||
|
||||
if (mappedToolCalls.length > 0) {
|
||||
assistantMsg.tool_calls = mappedToolCalls
|
||||
}
|
||||
}
|
||||
|
||||
// Only push assistant message if it has content or tool calls.
|
||||
// Stripped thinking-only blocks from user interruptions are empty and cause 400s.
|
||||
if (assistantMsg.content || assistantMsg.tool_calls?.length) {
|
||||
result.push(assistantMsg)
|
||||
}
|
||||
} else {
|
||||
result.push({
|
||||
const assistantMsg: OpenAIMessage = {
|
||||
role: 'assistant',
|
||||
content: (() => {
|
||||
const c = convertContentBlocks(content)
|
||||
return typeof c === 'string' ? c : Array.isArray(c) ? c.map((p: { text?: string }) => p.text ?? '').join('') : ''
|
||||
return typeof c === 'string'
|
||||
? c
|
||||
: Array.isArray(c)
|
||||
? c.map((p: { text?: string }) => p.text ?? '').join('')
|
||||
: ''
|
||||
})(),
|
||||
})
|
||||
}
|
||||
|
||||
if (assistantMsg.content) {
|
||||
result.push(assistantMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -489,25 +593,56 @@ function convertMessages(
|
||||
for (const msg of result) {
|
||||
const prev = coalesced[coalesced.length - 1]
|
||||
|
||||
if (prev && prev.role === msg.role && msg.role !== 'tool' && msg.role !== 'system') {
|
||||
const prevContent = prev.content
|
||||
// Mistral/Devstral: 'tool' message must be followed by an 'assistant' message.
|
||||
// If a 'tool' result is followed by a 'user' message, we must inject a semantic
|
||||
// assistant response to satisfy the strict role sequence:
|
||||
// ... -> assistant (calls) -> tool (results) -> assistant (semantic) -> user (next)
|
||||
if (prev && prev.role === 'tool' && msg.role === 'user') {
|
||||
coalesced.push({
|
||||
role: 'assistant',
|
||||
content: '[Tool execution interrupted by user]',
|
||||
})
|
||||
}
|
||||
|
||||
const lastAfterPossibleInjection = coalesced[coalesced.length - 1]
|
||||
if (
|
||||
lastAfterPossibleInjection &&
|
||||
lastAfterPossibleInjection.role === msg.role &&
|
||||
msg.role !== 'tool' &&
|
||||
msg.role !== 'system'
|
||||
) {
|
||||
const prevContent = lastAfterPossibleInjection.content
|
||||
const curContent = msg.content
|
||||
|
||||
if (typeof prevContent === 'string' && typeof curContent === 'string') {
|
||||
prev.content = prevContent + (prevContent && curContent ? '\n' : '') + curContent
|
||||
lastAfterPossibleInjection.content =
|
||||
prevContent + (prevContent && curContent ? '\n' : '') + curContent
|
||||
} else {
|
||||
const toArray = (
|
||||
c: string | Array<{ type: string; text?: string; image_url?: { url: string } }> | undefined,
|
||||
): Array<{ type: string; text?: string; image_url?: { url: string } }> => {
|
||||
c:
|
||||
| string
|
||||
| Array<{ type: string; text?: string; image_url?: { url: string } }>
|
||||
| undefined,
|
||||
): Array<{
|
||||
type: string
|
||||
text?: string
|
||||
image_url?: { url: string }
|
||||
}> => {
|
||||
if (!c) return []
|
||||
if (typeof c === 'string') return c ? [{ type: 'text', text: c }] : []
|
||||
return c
|
||||
}
|
||||
prev.content = [...toArray(prevContent), ...toArray(curContent)]
|
||||
lastAfterPossibleInjection.content = [
|
||||
...toArray(prevContent),
|
||||
...toArray(curContent),
|
||||
]
|
||||
}
|
||||
|
||||
if (msg.tool_calls?.length) {
|
||||
prev.tool_calls = [...(prev.tool_calls ?? []), ...msg.tool_calls]
|
||||
lastAfterPossibleInjection.tool_calls = [
|
||||
...(lastAfterPossibleInjection.tool_calls ?? []),
|
||||
...msg.tool_calls,
|
||||
]
|
||||
}
|
||||
} else {
|
||||
coalesced.push(msg)
|
||||
@@ -718,8 +853,7 @@ async function* openaiStreamToAnthropic(
|
||||
let hasEmittedContentStart = false
|
||||
let hasEmittedThinkingStart = false
|
||||
let hasClosedThinking = false
|
||||
let activeTextBuffer = ''
|
||||
let textBufferMode: 'none' | 'pending' | 'strip' = 'none'
|
||||
const thinkFilter = createThinkTagFilter()
|
||||
let lastStopReason: 'tool_use' | 'max_tokens' | 'end_turn' | null = null
|
||||
let hasEmittedFinalUsage = false
|
||||
let hasProcessedFinishReason = false
|
||||
@@ -798,14 +932,12 @@ async function* openaiStreamToAnthropic(
|
||||
const closeActiveContentBlock = async function* () {
|
||||
if (!hasEmittedContentStart) return
|
||||
|
||||
if (textBufferMode !== 'none') {
|
||||
const sanitized = stripLeakedReasoningPreamble(activeTextBuffer)
|
||||
if (sanitized) {
|
||||
const tail = thinkFilter.flush()
|
||||
if (tail) {
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: contentBlockIndex,
|
||||
delta: { type: 'text_delta', text: sanitized },
|
||||
}
|
||||
delta: { type: 'text_delta', text: tail },
|
||||
}
|
||||
}
|
||||
|
||||
@@ -815,8 +947,6 @@ async function* openaiStreamToAnthropic(
|
||||
}
|
||||
contentBlockIndex++
|
||||
hasEmittedContentStart = false
|
||||
activeTextBuffer = ''
|
||||
textBufferMode = 'none'
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -873,7 +1003,6 @@ async function* openaiStreamToAnthropic(
|
||||
contentBlockIndex++
|
||||
hasClosedThinking = true
|
||||
}
|
||||
activeTextBuffer += delta.content
|
||||
if (!hasEmittedContentStart) {
|
||||
yield {
|
||||
type: 'content_block_start',
|
||||
@@ -883,38 +1012,13 @@ async function* openaiStreamToAnthropic(
|
||||
hasEmittedContentStart = true
|
||||
}
|
||||
|
||||
if (
|
||||
textBufferMode === 'strip' ||
|
||||
looksLikeLeakedReasoningPrefix(activeTextBuffer)
|
||||
) {
|
||||
textBufferMode = 'strip'
|
||||
continue
|
||||
}
|
||||
|
||||
if (textBufferMode === 'pending') {
|
||||
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
|
||||
continue
|
||||
}
|
||||
const visible = thinkFilter.feed(delta.content)
|
||||
if (visible) {
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: contentBlockIndex,
|
||||
delta: {
|
||||
type: 'text_delta',
|
||||
text: activeTextBuffer,
|
||||
},
|
||||
delta: { type: 'text_delta', text: visible },
|
||||
}
|
||||
textBufferMode = 'none'
|
||||
continue
|
||||
}
|
||||
|
||||
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
|
||||
textBufferMode = 'pending'
|
||||
continue
|
||||
}
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: contentBlockIndex,
|
||||
delta: { type: 'text_delta', text: delta.content },
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1329,14 +1433,15 @@ class OpenAIShimMessages {
|
||||
params: ShimCreateParams,
|
||||
options?: { signal?: AbortSignal; headers?: Record<string, string> },
|
||||
): Promise<Response> {
|
||||
const openaiMessages = convertMessages(
|
||||
const compressedMessages = compressToolHistory(
|
||||
params.messages as Array<{
|
||||
role: string
|
||||
message?: { role?: string; content?: unknown }
|
||||
content?: unknown
|
||||
}>,
|
||||
params.system,
|
||||
request.resolvedModel,
|
||||
)
|
||||
const openaiMessages = convertMessages(compressedMessages, params.system)
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
model: request.resolvedModel,
|
||||
@@ -1372,14 +1477,19 @@ class OpenAIShimMessages {
|
||||
const isGithubCopilot = isGithub && githubEndpointType === 'copilot'
|
||||
const isGithubModels = isGithub && (githubEndpointType === 'models' || githubEndpointType === 'custom')
|
||||
|
||||
if ((isGithub || isMistral || isLocal) && body.max_completion_tokens !== undefined) {
|
||||
const isMoonshot = isMoonshotBaseUrl(request.baseUrl)
|
||||
|
||||
if ((isGithub || isMistral || isLocal || isMoonshot) && body.max_completion_tokens !== undefined) {
|
||||
body.max_tokens = body.max_completion_tokens
|
||||
delete body.max_completion_tokens
|
||||
}
|
||||
|
||||
// mistral and gemini don't recognize body.store — Gemini returns 400
|
||||
// "Invalid JSON payload received. Unknown name 'store': Cannot find field."
|
||||
if (isMistral || isGeminiMode()) {
|
||||
// Moonshot (api.moonshot.ai/.cn) has not published support for the
|
||||
// parameter either; strip it preemptively to avoid the same class of
|
||||
// error on strict-parse providers.
|
||||
if (isMistral || isGeminiMode() || isMoonshot) {
|
||||
delete body.store
|
||||
}
|
||||
|
||||
@@ -1459,46 +1569,93 @@ class OpenAIShimMessages {
|
||||
headers['X-GitHub-Api-Version'] = '2022-11-28'
|
||||
}
|
||||
|
||||
// Build the chat completions URL
|
||||
// Azure Cognitive Services / Azure OpenAI require a deployment-specific path
|
||||
// and an api-version query parameter.
|
||||
// Standard format: {base}/openai/deployments/{model}/chat/completions?api-version={version}
|
||||
// Non-Azure: {base}/chat/completions
|
||||
let chatCompletionsUrl: string
|
||||
const buildChatCompletionsUrl = (baseUrl: string): string => {
|
||||
// Azure Cognitive Services / Azure OpenAI require a deployment-specific
|
||||
// path and an api-version query parameter.
|
||||
if (isAzure) {
|
||||
const apiVersion = process.env.AZURE_OPENAI_API_VERSION ?? '2024-12-01-preview'
|
||||
const deployment = request.resolvedModel ?? process.env.OPENAI_MODEL ?? 'gpt-4o'
|
||||
// If base URL already contains /deployments/, use it as-is with api-version
|
||||
if (/\/deployments\//i.test(request.baseUrl)) {
|
||||
const base = request.baseUrl.replace(/\/+$/, '')
|
||||
chatCompletionsUrl = `${base}/chat/completions?api-version=${apiVersion}`
|
||||
} else {
|
||||
// Strip trailing /v1 or /openai/v1 if present, then build Azure path
|
||||
const base = request.baseUrl.replace(/\/(openai\/)?v1\/?$/, '').replace(/\/+$/, '')
|
||||
chatCompletionsUrl = `${base}/openai/deployments/${deployment}/chat/completions?api-version=${apiVersion}`
|
||||
}
|
||||
} else {
|
||||
chatCompletionsUrl = `${request.baseUrl}/chat/completions`
|
||||
|
||||
// If base URL already contains /deployments/, use it as-is with api-version.
|
||||
if (/\/deployments\//i.test(baseUrl)) {
|
||||
const normalizedBase = baseUrl.replace(/\/+$/, '')
|
||||
return `${normalizedBase}/chat/completions?api-version=${apiVersion}`
|
||||
}
|
||||
|
||||
const fetchInit = {
|
||||
// Strip trailing /v1 or /openai/v1 if present, then build Azure path.
|
||||
const normalizedBase = baseUrl
|
||||
.replace(/\/(openai\/)?v1\/?$/, '')
|
||||
.replace(/\/+$/, '')
|
||||
|
||||
return `${normalizedBase}/openai/deployments/${deployment}/chat/completions?api-version=${apiVersion}`
|
||||
}
|
||||
|
||||
return `${baseUrl}/chat/completions`
|
||||
}
|
||||
|
||||
const localRetryBaseUrls = isLocal
|
||||
? getLocalProviderRetryBaseUrls(request.baseUrl)
|
||||
: []
|
||||
|
||||
let activeBaseUrl = request.baseUrl
|
||||
let chatCompletionsUrl = buildChatCompletionsUrl(activeBaseUrl)
|
||||
const attemptedLocalBaseUrls = new Set<string>([activeBaseUrl])
|
||||
let didRetryWithoutTools = false
|
||||
|
||||
const promoteNextLocalBaseUrl = (
|
||||
reason: 'endpoint_not_found' | 'localhost_resolution_failed',
|
||||
): boolean => {
|
||||
for (const candidateBaseUrl of localRetryBaseUrls) {
|
||||
if (attemptedLocalBaseUrls.has(candidateBaseUrl)) {
|
||||
continue
|
||||
}
|
||||
|
||||
const previousUrl = chatCompletionsUrl
|
||||
attemptedLocalBaseUrls.add(candidateBaseUrl)
|
||||
activeBaseUrl = candidateBaseUrl
|
||||
chatCompletionsUrl = buildChatCompletionsUrl(activeBaseUrl)
|
||||
|
||||
logForDebugging(
|
||||
`[OpenAIShim] self-heal retry reason=${reason} method=POST from=${redactUrlForDiagnostics(previousUrl)} to=${redactUrlForDiagnostics(chatCompletionsUrl)} model=${request.resolvedModel}`,
|
||||
{ level: 'warn' },
|
||||
)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
let serializedBody = JSON.stringify(body)
|
||||
|
||||
const refreshSerializedBody = (): void => {
|
||||
serializedBody = JSON.stringify(body)
|
||||
}
|
||||
|
||||
const buildFetchInit = () => ({
|
||||
method: 'POST' as const,
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
body: serializedBody,
|
||||
signal: options?.signal,
|
||||
}
|
||||
})
|
||||
|
||||
const maxAttempts = isGithub ? GITHUB_429_MAX_RETRIES : 1
|
||||
const maxSelfHealAttempts = isLocal
|
||||
? localRetryBaseUrls.length + 1
|
||||
: 0
|
||||
const maxAttempts = (isGithub ? GITHUB_429_MAX_RETRIES : 1) + maxSelfHealAttempts
|
||||
|
||||
const throwClassifiedTransportError = (
|
||||
error: unknown,
|
||||
requestUrl: string,
|
||||
preclassifiedFailure?: ReturnType<typeof classifyOpenAINetworkFailure>,
|
||||
): never => {
|
||||
if (options?.signal?.aborted) {
|
||||
throw error
|
||||
}
|
||||
|
||||
const failure = classifyOpenAINetworkFailure(error, {
|
||||
const failure =
|
||||
preclassifiedFailure ??
|
||||
classifyOpenAINetworkFailure(error, {
|
||||
url: requestUrl,
|
||||
})
|
||||
const redactedUrl = redactUrlForDiagnostics(requestUrl)
|
||||
@@ -1531,8 +1688,11 @@ class OpenAIShimMessages {
|
||||
responseHeaders: Headers,
|
||||
requestUrl: string,
|
||||
rateHint = '',
|
||||
preclassifiedFailure?: ReturnType<typeof classifyOpenAIHttpFailure>,
|
||||
): never => {
|
||||
const failure = classifyOpenAIHttpFailure({
|
||||
const failure =
|
||||
preclassifiedFailure ??
|
||||
classifyOpenAIHttpFailure({
|
||||
status,
|
||||
body: errorBody,
|
||||
})
|
||||
@@ -1557,10 +1717,13 @@ class OpenAIShimMessages {
|
||||
let response: Response | undefined
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
try {
|
||||
response = await fetchWithProxyRetry(chatCompletionsUrl, fetchInit)
|
||||
response = await fetchWithProxyRetry(
|
||||
chatCompletionsUrl,
|
||||
buildFetchInit(),
|
||||
)
|
||||
} catch (error) {
|
||||
const isAbortError =
|
||||
fetchInit.signal?.aborted === true ||
|
||||
options?.signal?.aborted === true ||
|
||||
(typeof DOMException !== 'undefined' &&
|
||||
error instanceof DOMException &&
|
||||
error.name === 'AbortError') ||
|
||||
@@ -1573,7 +1736,19 @@ class OpenAIShimMessages {
|
||||
throw error
|
||||
}
|
||||
|
||||
throwClassifiedTransportError(error, chatCompletionsUrl)
|
||||
const failure = classifyOpenAINetworkFailure(error, {
|
||||
url: chatCompletionsUrl,
|
||||
})
|
||||
|
||||
if (
|
||||
isLocal &&
|
||||
failure.category === 'localhost_resolution_failed' &&
|
||||
promoteNextLocalBaseUrl('localhost_resolution_failed')
|
||||
) {
|
||||
continue
|
||||
}
|
||||
|
||||
throwClassifiedTransportError(error, chatCompletionsUrl, failure)
|
||||
}
|
||||
|
||||
if (response.ok) {
|
||||
@@ -1665,6 +1840,10 @@ class OpenAIShimMessages {
|
||||
return responsesResponse
|
||||
}
|
||||
const responsesErrorBody = await responsesResponse.text().catch(() => 'unknown error')
|
||||
const responsesFailure = classifyOpenAIHttpFailure({
|
||||
status: responsesResponse.status,
|
||||
body: responsesErrorBody,
|
||||
})
|
||||
let responsesErrorResponse: object | undefined
|
||||
try { responsesErrorResponse = JSON.parse(responsesErrorBody) } catch { /* raw text */ }
|
||||
throwClassifiedHttpError(
|
||||
@@ -1673,10 +1852,49 @@ class OpenAIShimMessages {
|
||||
responsesErrorResponse,
|
||||
responsesResponse.headers,
|
||||
responsesUrl,
|
||||
'',
|
||||
responsesFailure,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
const failure = classifyOpenAIHttpFailure({
|
||||
status: response.status,
|
||||
body: errorBody,
|
||||
})
|
||||
|
||||
if (
|
||||
isLocal &&
|
||||
failure.category === 'endpoint_not_found' &&
|
||||
promoteNextLocalBaseUrl('endpoint_not_found')
|
||||
) {
|
||||
continue
|
||||
}
|
||||
|
||||
const hasToolsPayload =
|
||||
Array.isArray(body.tools) &&
|
||||
body.tools.length > 0
|
||||
|
||||
if (
|
||||
!didRetryWithoutTools &&
|
||||
failure.category === 'tool_call_incompatible' &&
|
||||
shouldAttemptLocalToollessRetry({
|
||||
baseUrl: activeBaseUrl,
|
||||
hasTools: hasToolsPayload,
|
||||
})
|
||||
) {
|
||||
didRetryWithoutTools = true
|
||||
delete body.tools
|
||||
delete body.tool_choice
|
||||
refreshSerializedBody()
|
||||
|
||||
logForDebugging(
|
||||
`[OpenAIShim] self-heal retry reason=tool_call_incompatible mode=toolless method=POST url=${redactUrlForDiagnostics(chatCompletionsUrl)} model=${request.resolvedModel}`,
|
||||
{ level: 'warn' },
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
let errorResponse: object | undefined
|
||||
try { errorResponse = JSON.parse(errorBody) } catch { /* raw text */ }
|
||||
throwClassifiedHttpError(
|
||||
@@ -1686,6 +1904,7 @@ class OpenAIShimMessages {
|
||||
response.headers as unknown as Headers,
|
||||
chatCompletionsUrl,
|
||||
rateHint,
|
||||
failure,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1742,7 +1961,7 @@ class OpenAIShimMessages {
|
||||
if (typeof rawContent === 'string' && rawContent) {
|
||||
content.push({
|
||||
type: 'text',
|
||||
text: stripLeakedReasoningPreamble(rawContent),
|
||||
text: stripThinkTags(rawContent),
|
||||
})
|
||||
} else if (Array.isArray(rawContent) && rawContent.length > 0) {
|
||||
const parts: string[] = []
|
||||
@@ -1760,7 +1979,7 @@ class OpenAIShimMessages {
|
||||
if (joined) {
|
||||
content.push({
|
||||
type: 'text',
|
||||
text: stripLeakedReasoningPreamble(joined),
|
||||
text: stripThinkTags(joined),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,8 +2,10 @@ import { afterEach, expect, test } from 'bun:test'
|
||||
|
||||
import {
|
||||
getAdditionalModelOptionsCacheScope,
|
||||
getLocalProviderRetryBaseUrls,
|
||||
isLocalProviderUrl,
|
||||
resolveProviderRequest,
|
||||
shouldAttemptLocalToollessRetry,
|
||||
} from './providerConfig.js'
|
||||
|
||||
const originalEnv = {
|
||||
@@ -83,3 +85,42 @@ test('skips local model cache scope for remote openai-compatible providers', ()
|
||||
|
||||
expect(getAdditionalModelOptionsCacheScope()).toBeNull()
|
||||
})
|
||||
|
||||
test('derives local retry base URLs with /v1 and loopback fallback candidates', () => {
|
||||
expect(getLocalProviderRetryBaseUrls('http://localhost:11434')).toEqual([
|
||||
'http://localhost:11434/v1',
|
||||
'http://127.0.0.1:11434',
|
||||
'http://127.0.0.1:11434/v1',
|
||||
])
|
||||
})
|
||||
|
||||
test('does not derive local retry base URLs for remote providers', () => {
|
||||
expect(getLocalProviderRetryBaseUrls('https://api.openai.com/v1')).toEqual([])
|
||||
})
|
||||
|
||||
test('enables local toolless retry for likely Ollama endpoints with tools', () => {
|
||||
expect(
|
||||
shouldAttemptLocalToollessRetry({
|
||||
baseUrl: 'http://localhost:11434/v1',
|
||||
hasTools: true,
|
||||
}),
|
||||
).toBe(true)
|
||||
})
|
||||
|
||||
test('disables local toolless retry when no tools are present', () => {
|
||||
expect(
|
||||
shouldAttemptLocalToollessRetry({
|
||||
baseUrl: 'http://localhost:11434/v1',
|
||||
hasTools: false,
|
||||
}),
|
||||
).toBe(false)
|
||||
})
|
||||
|
||||
test('disables local toolless retry for non-Ollama local endpoints', () => {
|
||||
expect(
|
||||
shouldAttemptLocalToollessRetry({
|
||||
baseUrl: 'http://localhost:1234/v1',
|
||||
hasTools: true,
|
||||
}),
|
||||
).toBe(false)
|
||||
})
|
||||
|
||||
@@ -305,6 +305,101 @@ export function isLocalProviderUrl(baseUrl: string | undefined): boolean {
|
||||
}
|
||||
}
|
||||
|
||||
function trimTrailingSlash(value: string): string {
|
||||
return value.replace(/\/+$/, '')
|
||||
}
|
||||
|
||||
function normalizePathWithV1(pathname: string): string {
|
||||
const trimmed = trimTrailingSlash(pathname)
|
||||
if (!trimmed || trimmed === '/') {
|
||||
return '/v1'
|
||||
}
|
||||
|
||||
if (trimmed.toLowerCase().endsWith('/v1')) {
|
||||
return trimmed
|
||||
}
|
||||
|
||||
return `${trimmed}/v1`
|
||||
}
|
||||
|
||||
function isLikelyOllamaEndpoint(baseUrl: string): boolean {
|
||||
try {
|
||||
const parsed = new URL(baseUrl)
|
||||
const hostname = parsed.hostname.toLowerCase()
|
||||
const pathname = parsed.pathname.toLowerCase()
|
||||
|
||||
if (parsed.port === '11434') {
|
||||
return true
|
||||
}
|
||||
|
||||
return (
|
||||
hostname.includes('ollama') ||
|
||||
pathname.includes('ollama')
|
||||
)
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
export function getLocalProviderRetryBaseUrls(baseUrl: string): string[] {
|
||||
if (!isLocalProviderUrl(baseUrl)) {
|
||||
return []
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = new URL(baseUrl)
|
||||
const original = trimTrailingSlash(parsed.toString())
|
||||
const seen = new Set<string>([original])
|
||||
const candidates: string[] = []
|
||||
|
||||
const addCandidate = (hostname: string, pathname: string): void => {
|
||||
const next = new URL(parsed.toString())
|
||||
next.hostname = hostname
|
||||
next.pathname = pathname
|
||||
next.search = ''
|
||||
next.hash = ''
|
||||
|
||||
const normalized = trimTrailingSlash(next.toString())
|
||||
if (seen.has(normalized)) {
|
||||
return
|
||||
}
|
||||
|
||||
seen.add(normalized)
|
||||
candidates.push(normalized)
|
||||
}
|
||||
|
||||
const v1Pathname = normalizePathWithV1(parsed.pathname)
|
||||
if (v1Pathname !== trimTrailingSlash(parsed.pathname)) {
|
||||
addCandidate(parsed.hostname, v1Pathname)
|
||||
}
|
||||
|
||||
const hostname = parsed.hostname.toLowerCase().replace(/^\[|\]$/g, '')
|
||||
if (hostname === 'localhost' || hostname === '::1') {
|
||||
addCandidate('127.0.0.1', parsed.pathname || '/')
|
||||
addCandidate('127.0.0.1', v1Pathname)
|
||||
}
|
||||
|
||||
return candidates
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
export function shouldAttemptLocalToollessRetry(options: {
|
||||
baseUrl: string
|
||||
hasTools: boolean
|
||||
}): boolean {
|
||||
if (!options.hasTools) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (!isLocalProviderUrl(options.baseUrl)) {
|
||||
return false
|
||||
}
|
||||
|
||||
return isLikelyOllamaEndpoint(options.baseUrl)
|
||||
}
|
||||
|
||||
export function isCodexBaseUrl(baseUrl: string | undefined): boolean {
|
||||
if (!baseUrl) return false
|
||||
try {
|
||||
@@ -412,6 +507,9 @@ export function resolveProviderRequest(options?: {
|
||||
? normalizedGeminiEnvBaseUrl
|
||||
: asNamedEnvUrl(process.env.OPENAI_BASE_URL, 'OPENAI_BASE_URL')
|
||||
|
||||
// In Mistral mode, a literal "undefined" MISTRAL_BASE_URL is treated as
|
||||
// misconfiguration and falls back to OPENAI_API_BASE, then
|
||||
// DEFAULT_MISTRAL_BASE_URL for a safe default endpoint.
|
||||
const fallbackEnvBaseUrl = isMistralMode
|
||||
? (primaryEnvBaseUrl === undefined
|
||||
? asNamedEnvUrl(process.env.OPENAI_API_BASE, 'OPENAI_API_BASE') ?? DEFAULT_MISTRAL_BASE_URL
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
|
||||
import {
|
||||
looksLikeLeakedReasoningPrefix,
|
||||
shouldBufferPotentialReasoningPrefix,
|
||||
stripLeakedReasoningPreamble,
|
||||
} from './reasoningLeakSanitizer.ts'
|
||||
|
||||
describe('reasoning leak sanitizer', () => {
|
||||
test('strips explicit internal reasoning preambles', () => {
|
||||
const text =
|
||||
'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?'
|
||||
|
||||
expect(looksLikeLeakedReasoningPrefix(text)).toBe(true)
|
||||
expect(stripLeakedReasoningPreamble(text)).toBe(
|
||||
'Hey! How can I help you today?',
|
||||
)
|
||||
})
|
||||
|
||||
test('does not strip normal user-facing advice that mentions "the user should"', () => {
|
||||
const text =
|
||||
'The user should reset their password immediately.\n\nHere are the steps...'
|
||||
|
||||
expect(looksLikeLeakedReasoningPrefix(text)).toBe(false)
|
||||
expect(shouldBufferPotentialReasoningPrefix(text)).toBe(false)
|
||||
expect(stripLeakedReasoningPreamble(text)).toBe(text)
|
||||
})
|
||||
|
||||
test('does not strip legitimate first-person advice about responding to an incident', () => {
|
||||
const text =
|
||||
'I need to respond to this security incident immediately. The system is compromised.\n\nHere are the remediation steps...'
|
||||
|
||||
expect(looksLikeLeakedReasoningPrefix(text)).toBe(false)
|
||||
expect(shouldBufferPotentialReasoningPrefix(text)).toBe(false)
|
||||
expect(stripLeakedReasoningPreamble(text)).toBe(text)
|
||||
})
|
||||
|
||||
test('does not strip legitimate first-person advice about answering a support ticket', () => {
|
||||
const text =
|
||||
'I need to answer the support ticket before end of day. The customer is waiting.\n\nHere is the response I drafted...'
|
||||
|
||||
expect(looksLikeLeakedReasoningPrefix(text)).toBe(false)
|
||||
expect(shouldBufferPotentialReasoningPrefix(text)).toBe(false)
|
||||
expect(stripLeakedReasoningPreamble(text)).toBe(text)
|
||||
})
|
||||
})
|
||||
@@ -1,54 +0,0 @@
|
||||
const EXPLICIT_REASONING_START_RE =
|
||||
/^\s*(i should\b|i need to\b|let me think\b|the task\b|the request\b)/i
|
||||
|
||||
const EXPLICIT_REASONING_META_RE =
|
||||
/\b(user|request|question|prompt|message|task|greeting|small talk|briefly|friendly|concise)\b/i
|
||||
|
||||
const USER_META_START_RE =
|
||||
/^\s*the user\s+(just\s+)?(said|asked|is asking|wants|wanted|mentioned|seems|appears)\b/i
|
||||
|
||||
const USER_REASONING_RE =
|
||||
/^\s*the user\s+(just\s+)?(said|asked|is asking|wants|wanted|mentioned|seems|appears)\b[\s\S]*\b(i should|i need to|let me think|respond|reply|answer|greeting|small talk|briefly|friendly|concise)\b/i
|
||||
|
||||
export function shouldBufferPotentialReasoningPrefix(text: string): boolean {
|
||||
const normalized = text.trim()
|
||||
if (!normalized) return false
|
||||
|
||||
if (looksLikeLeakedReasoningPrefix(normalized)) {
|
||||
return true
|
||||
}
|
||||
|
||||
const hasParagraphBoundary = /\n\s*\n/.test(normalized)
|
||||
if (hasParagraphBoundary) {
|
||||
return false
|
||||
}
|
||||
|
||||
return (
|
||||
EXPLICIT_REASONING_START_RE.test(normalized) ||
|
||||
USER_META_START_RE.test(normalized)
|
||||
)
|
||||
}
|
||||
|
||||
export function looksLikeLeakedReasoningPrefix(text: string): boolean {
|
||||
const normalized = text.trim()
|
||||
if (!normalized) return false
|
||||
return (
|
||||
(EXPLICIT_REASONING_START_RE.test(normalized) &&
|
||||
EXPLICIT_REASONING_META_RE.test(normalized)) ||
|
||||
USER_REASONING_RE.test(normalized)
|
||||
)
|
||||
}
|
||||
|
||||
export function stripLeakedReasoningPreamble(text: string): string {
|
||||
const normalized = text.replace(/\r\n/g, '\n')
|
||||
const parts = normalized.split(/\n\s*\n/)
|
||||
if (parts.length < 2) return text
|
||||
|
||||
const first = parts[0]?.trim() ?? ''
|
||||
if (!looksLikeLeakedReasoningPrefix(first)) {
|
||||
return text
|
||||
}
|
||||
|
||||
const remainder = parts.slice(1).join('\n\n').trim()
|
||||
return remainder || text
|
||||
}
|
||||
191
src/services/api/smartModelRouting.test.ts
Normal file
191
src/services/api/smartModelRouting.test.ts
Normal file
@@ -0,0 +1,191 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
|
||||
import {
|
||||
routeModel,
|
||||
type SmartRoutingConfig,
|
||||
} from './smartModelRouting.ts'
|
||||
|
||||
const ENABLED: SmartRoutingConfig = {
|
||||
enabled: true,
|
||||
simpleModel: 'claude-haiku-4-5',
|
||||
strongModel: 'claude-opus-4-7',
|
||||
}
|
||||
|
||||
describe('routeModel — disabled / misconfigured', () => {
|
||||
test('disabled config routes to strong', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'hi' },
|
||||
{ ...ENABLED, enabled: false },
|
||||
)
|
||||
expect(decision.model).toBe('claude-opus-4-7')
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('disabled')
|
||||
})
|
||||
|
||||
test('missing simpleModel falls back to strong', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'hi' },
|
||||
{ ...ENABLED, simpleModel: '' },
|
||||
)
|
||||
expect(decision.model).toBe('claude-opus-4-7')
|
||||
expect(decision.complexity).toBe('strong')
|
||||
})
|
||||
|
||||
test('simpleModel === strongModel routes to strong (no-op)', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'hi' },
|
||||
{ ...ENABLED, simpleModel: 'claude-opus-4-7' },
|
||||
)
|
||||
expect(decision.model).toBe('claude-opus-4-7')
|
||||
expect(decision.complexity).toBe('strong')
|
||||
})
|
||||
})
|
||||
|
||||
describe('routeModel — simple path', () => {
|
||||
test('short greeting routes to simple', () => {
|
||||
const decision = routeModel({ userText: 'thanks!', turnNumber: 5 }, ENABLED)
|
||||
expect(decision.model).toBe('claude-haiku-4-5')
|
||||
expect(decision.complexity).toBe('simple')
|
||||
})
|
||||
|
||||
test('empty input routes to simple', () => {
|
||||
const decision = routeModel({ userText: ' ' }, ENABLED)
|
||||
expect(decision.model).toBe('claude-haiku-4-5')
|
||||
expect(decision.complexity).toBe('simple')
|
||||
})
|
||||
|
||||
test('mid-length chatter routes to simple', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'yep looks good, go ahead', turnNumber: 10 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('simple')
|
||||
})
|
||||
})
|
||||
|
||||
describe('routeModel — strong path', () => {
|
||||
test('first turn always routes to strong, even when short', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'fix the bug', turnNumber: 1 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.model).toBe('claude-opus-4-7')
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('first turn')
|
||||
})
|
||||
|
||||
test('code fence routes to strong', () => {
|
||||
const decision = routeModel(
|
||||
{
|
||||
userText: 'change this:\n```\nfoo()\n```',
|
||||
turnNumber: 5,
|
||||
},
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('code')
|
||||
})
|
||||
|
||||
test('inline code span routes to strong', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'rename `foo` to `bar`', turnNumber: 5 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
})
|
||||
|
||||
test('reasoning keyword "plan" routes to strong even when short', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'plan the refactor', turnNumber: 5 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('keyword')
|
||||
})
|
||||
|
||||
test('reasoning keyword "debug" routes to strong', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'debug the test', turnNumber: 5 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
})
|
||||
|
||||
test('"root cause" multi-word keyword routes to strong', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'find the root cause', turnNumber: 5 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
})
|
||||
|
||||
test('multi-paragraph input routes to strong', () => {
|
||||
const decision = routeModel(
|
||||
{
|
||||
userText: 'first thought.\n\nsecond thought.',
|
||||
turnNumber: 5,
|
||||
},
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('multi-paragraph')
|
||||
})
|
||||
|
||||
test('over-long input routes to strong', () => {
|
||||
const long = 'ok '.repeat(100) // ~300 chars, 100 words
|
||||
const decision = routeModel(
|
||||
{ userText: long, turnNumber: 5 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
})
|
||||
|
||||
test('exactly at the boundary stays simple', () => {
|
||||
const text = 'a'.repeat(160)
|
||||
const decision = routeModel(
|
||||
{ userText: text, turnNumber: 5 },
|
||||
{ ...ENABLED, simpleMaxChars: 160, simpleMaxWords: 28 },
|
||||
)
|
||||
expect(decision.complexity).toBe('simple')
|
||||
})
|
||||
|
||||
test('one char over the boundary routes to strong', () => {
|
||||
const text = 'a'.repeat(161)
|
||||
const decision = routeModel(
|
||||
{ userText: text, turnNumber: 5 },
|
||||
{ ...ENABLED, simpleMaxChars: 160, simpleMaxWords: 28 },
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('160 chars')
|
||||
})
|
||||
})
|
||||
|
||||
describe('routeModel — config overrides', () => {
|
||||
test('custom simpleMaxChars is honored', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'abcdefghijklmnop', turnNumber: 5 },
|
||||
{ ...ENABLED, simpleMaxChars: 10 },
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('10 chars')
|
||||
})
|
||||
|
||||
test('custom simpleMaxWords is honored', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'one two three four five', turnNumber: 5 },
|
||||
{ ...ENABLED, simpleMaxWords: 3 },
|
||||
)
|
||||
expect(decision.complexity).toBe('strong')
|
||||
expect(decision.reason).toContain('3 words')
|
||||
})
|
||||
})
|
||||
|
||||
describe('routeModel — reason strings', () => {
|
||||
test('simple decisions include char + word counts', () => {
|
||||
const decision = routeModel(
|
||||
{ userText: 'sounds good', turnNumber: 5 },
|
||||
ENABLED,
|
||||
)
|
||||
expect(decision.reason).toMatch(/\d+ chars, \d+ words/)
|
||||
})
|
||||
})
|
||||
215
src/services/api/smartModelRouting.ts
Normal file
215
src/services/api/smartModelRouting.ts
Normal file
@@ -0,0 +1,215 @@
|
||||
/**
|
||||
* Smart model routing — cheap-for-simple, strong-for-hard.
|
||||
*
|
||||
* For everyday short chatter ("ok", "thanks", "what does this do?") the
|
||||
* incremental quality of Opus/GPT-5 over Haiku/Mini is negligible while the
|
||||
* cost and latency are an order of magnitude worse. Smart routing opts a
|
||||
* user into routing such "obviously simple" turns to a cheaper model while
|
||||
* keeping the strong model for the anything-non-trivial path.
|
||||
*
|
||||
* This module is a pure primitive: it takes a turn description (the user's
|
||||
* text + light context) and returns which model to use, based on config.
|
||||
* It never reads env vars or state directly — caller supplies everything.
|
||||
*
|
||||
* Off by default. Users opt in via settings.smartRouting.enabled. Intent:
|
||||
* make this a copy-paste-small config block rather than a hidden heuristic,
|
||||
* so the tradeoff is visible and the user controls it.
|
||||
*/
|
||||
|
||||
export type SmartRoutingConfig = {
|
||||
enabled: boolean
|
||||
/** Model to use for turns classified as "simple". */
|
||||
simpleModel: string
|
||||
/** Model to use for turns classified as "strong" (or when unsure). */
|
||||
strongModel: string
|
||||
/** Max characters in user input to qualify as "simple". Default 160. */
|
||||
simpleMaxChars?: number
|
||||
/** Max whitespace-separated words to qualify as "simple". Default 28. */
|
||||
simpleMaxWords?: number
|
||||
}
|
||||
|
||||
export type RoutingDecision = {
|
||||
model: string
|
||||
complexity: 'simple' | 'strong'
|
||||
/** Human-readable reason — useful for the UI indicator and debug logs. */
|
||||
reason: string
|
||||
}
|
||||
|
||||
export type RoutingInput = {
|
||||
/** The user's message text for this turn. */
|
||||
userText: string
|
||||
/**
|
||||
* Optional: how many tool-use blocks the assistant has emitted in the
|
||||
* recent conversation. High values correlate with "continue this work"
|
||||
* follow-ups that can still be cheap, UNLESS the user also typed code
|
||||
* or strong-keyword text.
|
||||
*/
|
||||
recentToolUses?: number
|
||||
/**
|
||||
* Optional: turn number within the current session (1-indexed). The first
|
||||
* turn is often task-setup and benefits from the strong model even if
|
||||
* short — a bare "build X" opens the whole task.
|
||||
*/
|
||||
turnNumber?: number
|
||||
}
|
||||
|
||||
const DEFAULT_SIMPLE_MAX_CHARS = 160
|
||||
const DEFAULT_SIMPLE_MAX_WORDS = 28
|
||||
|
||||
// Keywords that strongly suggest reasoning/planning/design work.
|
||||
// Matching is word-boundary / case-insensitive. Must include enough anchors
|
||||
// that short prompts like "plan the refactor" route to strong even under
|
||||
// the char/word cutoff.
|
||||
const STRONG_KEYWORDS = [
|
||||
'plan',
|
||||
'design',
|
||||
'architect',
|
||||
'architecture',
|
||||
'refactor',
|
||||
'debug',
|
||||
'investigate',
|
||||
'analyze',
|
||||
'analyse',
|
||||
'implement',
|
||||
'optimize',
|
||||
'optimise',
|
||||
'review',
|
||||
'audit',
|
||||
'diagnose',
|
||||
'root cause',
|
||||
'root-cause',
|
||||
'why does',
|
||||
'why is',
|
||||
'how should',
|
||||
'why did',
|
||||
'propose',
|
||||
'trace',
|
||||
'reproduce',
|
||||
]
|
||||
|
||||
const STRONG_KEYWORD_RE = new RegExp(
|
||||
`\\b(?:${STRONG_KEYWORDS.map(k => k.replace(/[-]/g, '[-\\s]')).join('|')})\\b`,
|
||||
'i',
|
||||
)
|
||||
|
||||
const CODE_FENCE_RE = /```[\s\S]*?```|`[^`\n]+`/
|
||||
|
||||
function countWords(text: string): number {
|
||||
const trimmed = text.trim()
|
||||
if (!trimmed) return 0
|
||||
return trimmed.split(/\s+/).length
|
||||
}
|
||||
|
||||
function hasMultiParagraph(text: string): boolean {
|
||||
return /\n\s*\n/.test(text)
|
||||
}
|
||||
|
||||
function hasCode(text: string): boolean {
|
||||
return CODE_FENCE_RE.test(text)
|
||||
}
|
||||
|
||||
function hasStrongKeyword(text: string): boolean {
|
||||
return STRONG_KEYWORD_RE.test(text)
|
||||
}
|
||||
|
||||
/**
|
||||
* Decide whether to route to the simple or strong model based on heuristics.
|
||||
* Returns the chosen model + a reason. When routing is disabled or both
|
||||
* models match, the strong model is used (safe default).
|
||||
*/
|
||||
export function routeModel(
|
||||
input: RoutingInput,
|
||||
config: SmartRoutingConfig,
|
||||
): RoutingDecision {
|
||||
if (!config.enabled) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'smart-routing disabled',
|
||||
}
|
||||
}
|
||||
if (!config.simpleModel || !config.strongModel) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'simpleModel or strongModel missing from config',
|
||||
}
|
||||
}
|
||||
if (config.simpleModel === config.strongModel) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'simpleModel equals strongModel',
|
||||
}
|
||||
}
|
||||
|
||||
const text = input.userText ?? ''
|
||||
const trimmed = text.trim()
|
||||
|
||||
if (!trimmed) {
|
||||
// Empty input (e.g. resuming a tool-use chain) — cheap by default.
|
||||
return {
|
||||
model: config.simpleModel,
|
||||
complexity: 'simple',
|
||||
reason: 'empty user text',
|
||||
}
|
||||
}
|
||||
|
||||
// First turn of a session is task-setup — always use strong.
|
||||
if (input.turnNumber === 1) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'first turn of session',
|
||||
}
|
||||
}
|
||||
|
||||
const maxChars = config.simpleMaxChars ?? DEFAULT_SIMPLE_MAX_CHARS
|
||||
const maxWords = config.simpleMaxWords ?? DEFAULT_SIMPLE_MAX_WORDS
|
||||
|
||||
if (hasCode(trimmed)) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'contains code block or inline code',
|
||||
}
|
||||
}
|
||||
|
||||
if (hasStrongKeyword(trimmed)) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'contains reasoning/planning keyword',
|
||||
}
|
||||
}
|
||||
|
||||
if (hasMultiParagraph(trimmed)) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: 'multi-paragraph input',
|
||||
}
|
||||
}
|
||||
|
||||
if (trimmed.length > maxChars) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: `input > ${maxChars} chars`,
|
||||
}
|
||||
}
|
||||
|
||||
if (countWords(trimmed) > maxWords) {
|
||||
return {
|
||||
model: config.strongModel,
|
||||
complexity: 'strong',
|
||||
reason: `input > ${maxWords} words`,
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
model: config.simpleModel,
|
||||
complexity: 'simple',
|
||||
reason: `short (${trimmed.length} chars, ${countWords(trimmed)} words)`,
|
||||
}
|
||||
}
|
||||
183
src/services/api/thinkTagSanitizer.test.ts
Normal file
183
src/services/api/thinkTagSanitizer.test.ts
Normal file
@@ -0,0 +1,183 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
|
||||
import {
|
||||
createThinkTagFilter,
|
||||
stripThinkTags,
|
||||
} from './thinkTagSanitizer.ts'
|
||||
|
||||
describe('stripThinkTags — whole-text cleanup', () => {
|
||||
test('strips closed think pair', () => {
|
||||
expect(stripThinkTags('<think>reasoning</think>Hello')).toBe('Hello')
|
||||
})
|
||||
|
||||
test('strips closed thinking pair', () => {
|
||||
expect(stripThinkTags('<thinking>x</thinking>Out')).toBe('Out')
|
||||
})
|
||||
|
||||
test('strips closed reasoning pair', () => {
|
||||
expect(stripThinkTags('<reasoning>x</reasoning>Out')).toBe('Out')
|
||||
})
|
||||
|
||||
test('strips REASONING_SCRATCHPAD pair', () => {
|
||||
expect(stripThinkTags('<REASONING_SCRATCHPAD>plan</REASONING_SCRATCHPAD>Answer'))
|
||||
.toBe('Answer')
|
||||
})
|
||||
|
||||
test('is case-insensitive', () => {
|
||||
expect(stripThinkTags('<THINKING>x</THINKING>out')).toBe('out')
|
||||
expect(stripThinkTags('<Think>x</Think>out')).toBe('out')
|
||||
})
|
||||
|
||||
test('handles attributes on open tag', () => {
|
||||
expect(stripThinkTags('<think id="plan-1">reason</think>ok')).toBe('ok')
|
||||
})
|
||||
|
||||
test('strips unterminated open tag at block boundary', () => {
|
||||
expect(stripThinkTags('<think>reasoning that never closes')).toBe('')
|
||||
})
|
||||
|
||||
test('strips unterminated open tag after newline', () => {
|
||||
// Block-boundary match consumes the leading newline, same as hermes.
|
||||
expect(stripThinkTags('Answer: 42\n<think>second-guess myself'))
|
||||
.toBe('Answer: 42')
|
||||
})
|
||||
|
||||
test('strips orphan close tag', () => {
|
||||
expect(stripThinkTags('trailing </think>done')).toBe('trailing done')
|
||||
})
|
||||
|
||||
test('strips multiple blocks', () => {
|
||||
expect(stripThinkTags('<think>a</think>B<think>c</think>D')).toBe('BD')
|
||||
})
|
||||
|
||||
test('handles reasoning mid-response after content', () => {
|
||||
expect(stripThinkTags('Answer: 42\n<think>double-check</think>\nDone'))
|
||||
.toBe('Answer: 42\n\nDone')
|
||||
})
|
||||
|
||||
test('handles nested-looking tags (lazy match + orphan cleanup)', () => {
|
||||
expect(stripThinkTags('<think><think>x</think></think>y')).toBe('y')
|
||||
})
|
||||
|
||||
test('preserves legitimate non-think tags', () => {
|
||||
expect(stripThinkTags('use <div> and <span>')).toBe('use <div> and <span>')
|
||||
})
|
||||
|
||||
test('preserves text without any tags', () => {
|
||||
expect(stripThinkTags('Hello, world. I should respond briefly.')).toBe(
|
||||
'Hello, world. I should respond briefly.',
|
||||
)
|
||||
})
|
||||
|
||||
test('handles empty input', () => {
|
||||
expect(stripThinkTags('')).toBe('')
|
||||
})
|
||||
})
|
||||
|
||||
describe('createThinkTagFilter — streaming state machine', () => {
|
||||
test('passes through plain text', () => {
|
||||
const f = createThinkTagFilter()
|
||||
expect(f.feed('Hello, ')).toBe('Hello, ')
|
||||
expect(f.feed('world!')).toBe('world!')
|
||||
expect(f.flush()).toBe('')
|
||||
})
|
||||
|
||||
test('strips a complete think block in one chunk', () => {
|
||||
const f = createThinkTagFilter()
|
||||
expect(f.feed('pre<think>reason</think>post')).toBe('prepost')
|
||||
expect(f.flush()).toBe('')
|
||||
})
|
||||
|
||||
test('handles open tag split across deltas', () => {
|
||||
const f = createThinkTagFilter()
|
||||
expect(f.feed('before<th')).toBe('before')
|
||||
expect(f.feed('ink>reason</think>after')).toBe('after')
|
||||
expect(f.flush()).toBe('')
|
||||
})
|
||||
|
||||
test('handles close tag split across deltas', () => {
|
||||
const f = createThinkTagFilter()
|
||||
expect(f.feed('<think>reason</th')).toBe('')
|
||||
expect(f.feed('ink>keep')).toBe('keep')
|
||||
expect(f.flush()).toBe('')
|
||||
})
|
||||
|
||||
test('handles tag split on bare < boundary', () => {
|
||||
const f = createThinkTagFilter()
|
||||
expect(f.feed('leading <')).toBe('leading ')
|
||||
expect(f.feed('think>inner</think>tail')).toBe('tail')
|
||||
expect(f.flush()).toBe('')
|
||||
})
|
||||
|
||||
test('preserves partial non-tag < at boundary when next char rules it out', () => {
|
||||
const f = createThinkTagFilter()
|
||||
// "<d" — 'd' cannot start any of our tag names, so emit immediately
|
||||
expect(f.feed('pre<d')).toBe('pre<d')
|
||||
expect(f.feed('iv>rest')).toBe('iv>rest')
|
||||
expect(f.flush()).toBe('')
|
||||
})
|
||||
|
||||
test('case-insensitive streaming', () => {
|
||||
const f = createThinkTagFilter()
|
||||
expect(f.feed('<THINKING>x</THINKING>out')).toBe('out')
|
||||
expect(f.flush()).toBe('')
|
||||
})
|
||||
|
||||
test('unterminated open tag — flush drops remainder', () => {
|
||||
const f = createThinkTagFilter()
|
||||
expect(f.feed('<think>reasoning with no close ')).toBe('')
|
||||
expect(f.feed('and more reasoning')).toBe('')
|
||||
expect(f.flush()).toBe('')
|
||||
expect(f.isInsideBlock()).toBe(false)
|
||||
})
|
||||
|
||||
test('multiple blocks in single feed', () => {
|
||||
const f = createThinkTagFilter()
|
||||
expect(f.feed('<think>a</think>B<think>c</think>D')).toBe('BD')
|
||||
expect(f.flush()).toBe('')
|
||||
})
|
||||
|
||||
test('flush after clean stream emits nothing extra', () => {
|
||||
const f = createThinkTagFilter()
|
||||
expect(f.feed('complete message')).toBe('complete message')
|
||||
expect(f.flush()).toBe('')
|
||||
})
|
||||
|
||||
test('flush of bare < at end emits it (not a tag prefix)', () => {
|
||||
const f = createThinkTagFilter()
|
||||
// bare '<' held back; flush emits it since it has no tag-name chars
|
||||
expect(f.feed('x <')).toBe('x ')
|
||||
expect(f.flush()).toBe('<')
|
||||
})
|
||||
|
||||
test('flush of partial tag-name prefix at end drops it', () => {
|
||||
const f = createThinkTagFilter()
|
||||
expect(f.feed('x <thi')).toBe('x ')
|
||||
expect(f.flush()).toBe('')
|
||||
})
|
||||
|
||||
test('handles attributes on streaming open tag', () => {
|
||||
const f = createThinkTagFilter()
|
||||
expect(f.feed('<think type="plan">reason</think>ok')).toBe('ok')
|
||||
expect(f.flush()).toBe('')
|
||||
})
|
||||
|
||||
test('mid-delta transition: content, reasoning, content', () => {
|
||||
const f = createThinkTagFilter()
|
||||
expect(f.feed('Answer: 42\n<think>')).toBe('Answer: 42\n')
|
||||
expect(f.feed('double-check')).toBe('')
|
||||
expect(f.feed('</think>\nDone')).toBe('\nDone')
|
||||
expect(f.flush()).toBe('')
|
||||
})
|
||||
|
||||
test('orphan close tag mid-stream is stripped on flush via safety-net behavior', () => {
|
||||
// Filter alone treats orphan close as "we're not inside", so it emits as-is.
|
||||
// Safety net (stripThinkTags on final text) removes orphans.
|
||||
const f = createThinkTagFilter()
|
||||
const chunk1 = f.feed('trailing ')
|
||||
const chunk2 = f.feed('</think>done')
|
||||
const final = chunk1 + chunk2 + f.flush()
|
||||
// Orphan close appears in stream output; safety net cleans it
|
||||
expect(stripThinkTags(final)).toBe('trailing done')
|
||||
})
|
||||
})
|
||||
162
src/services/api/thinkTagSanitizer.ts
Normal file
162
src/services/api/thinkTagSanitizer.ts
Normal file
@@ -0,0 +1,162 @@
|
||||
/**
|
||||
* Think-tag sanitizer for reasoning content leaks.
|
||||
*
|
||||
* Some OpenAI-compatible reasoning models (MiniMax M2.7, GLM-4.5/5, DeepSeek, Kimi K2,
|
||||
* self-hosted vLLM builds) emit chain-of-thought inline inside the `content` field using
|
||||
* XML-like tags instead of the separate `reasoning_content` channel. Example:
|
||||
*
|
||||
* <think>the user wants foo, let me check bar</think>Here is the answer: ...
|
||||
*
|
||||
* This module strips those blocks structurally (tag-based), independent of English
|
||||
* phrasings. Three layers:
|
||||
*
|
||||
* 1. `createThinkTagFilter()` — streaming state machine. Feeds deltas, emits only
|
||||
* the visible (non-reasoning) portion, and buffers partial tags across chunk
|
||||
* boundaries so `</th` + `ink>` still parses correctly.
|
||||
*
|
||||
* 2. `stripThinkTags()` — whole-text cleanup. Removes closed pairs, unterminated
|
||||
* opens at block boundaries, and orphan open/close tags. Used for non-streaming
|
||||
* responses and as a safety net after stream close.
|
||||
*
|
||||
* 3. Flush discards buffered partial tags at stream end (false-negative bias —
|
||||
* prefer losing a partial reasoning fragment over leaking it).
|
||||
*/
|
||||
|
||||
const TAG_NAMES = [
|
||||
'think',
|
||||
'thinking',
|
||||
'reasoning',
|
||||
'thought',
|
||||
'reasoning_scratchpad',
|
||||
] as const
|
||||
|
||||
const TAG_ALT = TAG_NAMES.join('|')
|
||||
|
||||
const OPEN_TAG_RE = new RegExp(`<\\s*(?:${TAG_ALT})\\b[^>]*>`, 'i')
|
||||
const CLOSE_TAG_RE = new RegExp(`<\\s*/\\s*(?:${TAG_ALT})\\s*>`, 'i')
|
||||
|
||||
const CLOSED_PAIR_RE_G = new RegExp(
|
||||
`<\\s*(${TAG_ALT})\\b[^>]*>[\\s\\S]*?<\\s*/\\s*\\1\\s*>`,
|
||||
'gi',
|
||||
)
|
||||
const UNTERMINATED_OPEN_RE = new RegExp(
|
||||
`(?:^|\\n)[ \\t]*<\\s*(?:${TAG_ALT})\\b[^>]*>[\\s\\S]*$`,
|
||||
'i',
|
||||
)
|
||||
const ORPHAN_TAG_RE_G = new RegExp(
|
||||
`<\\s*/?\\s*(?:${TAG_ALT})\\b[^>]*>\\s*`,
|
||||
'gi',
|
||||
)
|
||||
|
||||
const MAX_PARTIAL_TAG = 64
|
||||
|
||||
/**
|
||||
* Remove reasoning/thinking blocks from a complete text body.
|
||||
*
|
||||
* Handles:
|
||||
* - Closed pairs: <think>...</think> (lazy match, anywhere in text)
|
||||
* - Unterminated open tags at a block boundary: strips from the tag to end of string
|
||||
* - Orphan open or close tags (no matching partner)
|
||||
*
|
||||
* False-negative bias: prefers leaving a few tag characters in rare edge cases over
|
||||
* stripping legitimate content.
|
||||
*/
|
||||
export function stripThinkTags(text: string): string {
|
||||
if (!text) return text
|
||||
let out = text
|
||||
out = out.replace(CLOSED_PAIR_RE_G, '')
|
||||
out = out.replace(UNTERMINATED_OPEN_RE, '')
|
||||
out = out.replace(ORPHAN_TAG_RE_G, '')
|
||||
return out
|
||||
}
|
||||
|
||||
export interface ThinkTagFilter {
|
||||
feed(chunk: string): string
|
||||
flush(): string
|
||||
isInsideBlock(): boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Streaming state machine. Feed deltas, emits visible (non-reasoning) text.
|
||||
* Handles tags split across chunk boundaries by holding back a short tail buffer
|
||||
* whenever the current buffer ends with what looks like a partial tag.
|
||||
*/
|
||||
export function createThinkTagFilter(): ThinkTagFilter {
|
||||
let inside = false
|
||||
let buffer = ''
|
||||
|
||||
function findPartialTagStart(s: string): number {
|
||||
const lastLt = s.lastIndexOf('<')
|
||||
if (lastLt === -1) return -1
|
||||
if (s.indexOf('>', lastLt) !== -1) return -1
|
||||
const tail = s.slice(lastLt)
|
||||
if (tail.length > MAX_PARTIAL_TAG) return -1
|
||||
|
||||
const m = /^<\s*\/?\s*([a-zA-Z_]\w*)?\s*$/.exec(tail)
|
||||
if (!m) return -1
|
||||
const partialName = (m[1] ?? '').toLowerCase()
|
||||
if (!partialName) return lastLt
|
||||
if (TAG_NAMES.some(name => name.startsWith(partialName))) return lastLt
|
||||
return -1
|
||||
}
|
||||
|
||||
function feed(chunk: string): string {
|
||||
if (!chunk) return ''
|
||||
buffer += chunk
|
||||
let out = ''
|
||||
|
||||
while (buffer.length > 0) {
|
||||
if (!inside) {
|
||||
const open = OPEN_TAG_RE.exec(buffer)
|
||||
if (open) {
|
||||
out += buffer.slice(0, open.index)
|
||||
buffer = buffer.slice(open.index + open[0].length)
|
||||
inside = true
|
||||
continue
|
||||
}
|
||||
|
||||
const partialStart = findPartialTagStart(buffer)
|
||||
if (partialStart === -1) {
|
||||
out += buffer
|
||||
buffer = ''
|
||||
} else {
|
||||
out += buffer.slice(0, partialStart)
|
||||
buffer = buffer.slice(partialStart)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
const close = CLOSE_TAG_RE.exec(buffer)
|
||||
if (close) {
|
||||
buffer = buffer.slice(close.index + close[0].length)
|
||||
inside = false
|
||||
continue
|
||||
}
|
||||
|
||||
const partialStart = findPartialTagStart(buffer)
|
||||
if (partialStart === -1) {
|
||||
buffer = ''
|
||||
} else {
|
||||
buffer = buffer.slice(partialStart)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
function flush(): string {
|
||||
const held = buffer
|
||||
const wasInside = inside
|
||||
buffer = ''
|
||||
inside = false
|
||||
|
||||
if (wasInside) return ''
|
||||
if (!held) return ''
|
||||
|
||||
if (/^<\s*\/?\s*[a-zA-Z_]/.test(held)) return ''
|
||||
return held
|
||||
}
|
||||
|
||||
return { feed, flush, isInsideBlock: () => inside }
|
||||
}
|
||||
@@ -70,7 +70,7 @@ describe('runAutoFixCheck', () => {
|
||||
|
||||
test('handles timeout gracefully', async () => {
|
||||
const result = await runAutoFixCheck({
|
||||
lint: 'sleep 10',
|
||||
lint: 'node -e "setTimeout(() => {}, 10000)"',
|
||||
timeout: 100,
|
||||
|
||||
cwd: '/tmp',
|
||||
|
||||
@@ -46,14 +46,31 @@ async function runCommand(
|
||||
|
||||
const killTree = () => {
|
||||
try {
|
||||
if (!isWindows && proc.pid) {
|
||||
if (isWindows && proc.pid) {
|
||||
// shell=true on Windows can leave child commands running unless we
|
||||
// terminate the full process tree.
|
||||
const killer = spawn('taskkill', ['/pid', String(proc.pid), '/T', '/F'], {
|
||||
windowsHide: true,
|
||||
stdio: 'ignore',
|
||||
})
|
||||
killer.unref()
|
||||
return
|
||||
}
|
||||
|
||||
if (proc.pid) {
|
||||
// Kill the entire process group
|
||||
process.kill(-proc.pid, 'SIGTERM')
|
||||
} else {
|
||||
proc.kill('SIGTERM')
|
||||
return
|
||||
}
|
||||
|
||||
proc.kill('SIGTERM')
|
||||
} catch {
|
||||
// Process may have already exited
|
||||
// Process may have already exited; fallback to direct child kill.
|
||||
try {
|
||||
proc.kill('SIGTERM')
|
||||
} catch {
|
||||
// Ignore final fallback errors.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -16,12 +16,21 @@ describe('getEffectiveContextWindowSize', () => {
|
||||
// 8k minus 20k summary reservation = -12k, causing infinite auto-compact.
|
||||
// Now the fallback is 128k and there's a floor, so effective is always
|
||||
// at least reservedTokensForSummary + buffer.
|
||||
//
|
||||
// The exact floor depends on the max-output-tokens slot-reservation cap
|
||||
// (tengu_otk_slot_v1 GrowthBook flag). With cap enabled, the model's
|
||||
// default output cap drops to CAPPED_DEFAULT_MAX_TOKENS (8k), so the
|
||||
// summary reservation is 8k and the floor is 8k + 13k = 21k. With cap
|
||||
// disabled it's 20k + 13k = 33k. Assert the worst case so the test is
|
||||
// stable regardless of flag state in CI vs local.
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
try {
|
||||
const effective = getEffectiveContextWindowSize('some-unknown-3p-model')
|
||||
expect(effective).toBeGreaterThan(0)
|
||||
// Must be at least summary reservation (20k) + buffer (13k) = 33k
|
||||
expect(effective).toBeGreaterThanOrEqual(33_000)
|
||||
// 21k = CAPPED_DEFAULT_MAX_TOKENS (8k) + AUTOCOMPACT_BUFFER_TOKENS (13k).
|
||||
// Covers the anti-regression intent of issue #635 without assuming
|
||||
// the GrowthBook flag state.
|
||||
expect(effective).toBeGreaterThanOrEqual(21_000)
|
||||
} finally {
|
||||
delete process.env.CLAUDE_CODE_USE_OPENAI
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ export const TIME_BASED_MC_CLEARED_MESSAGE = '[Old tool result content cleared]'
|
||||
const IMAGE_MAX_TOKEN_SIZE = 2000
|
||||
|
||||
// Only compact these built-in tools (MCP tools are also compactable via prefix match)
|
||||
const COMPACTABLE_TOOLS = new Set<string>([
|
||||
export const COMPACTABLE_TOOLS = new Set<string>([
|
||||
FILE_READ_TOOL_NAME,
|
||||
...SHELL_TOOL_NAMES,
|
||||
GREP_TOOL_NAME,
|
||||
@@ -51,7 +51,7 @@ const COMPACTABLE_TOOLS = new Set<string>([
|
||||
|
||||
const MCP_TOOL_PREFIX = 'mcp__'
|
||||
|
||||
function isCompactableTool(name: string): boolean {
|
||||
export function isCompactableTool(name: string): boolean {
|
||||
return COMPACTABLE_TOOLS.has(name) || name.startsWith(MCP_TOOL_PREFIX)
|
||||
}
|
||||
|
||||
|
||||
@@ -2524,7 +2524,7 @@ export async function transformResultContent(
|
||||
return [
|
||||
{
|
||||
type: 'text',
|
||||
text: resultContent.text,
|
||||
text: recursivelySanitizeUnicode(resultContent.text) as string,
|
||||
},
|
||||
]
|
||||
case 'audio': {
|
||||
@@ -2569,7 +2569,9 @@ export async function transformResultContent(
|
||||
return [
|
||||
{
|
||||
type: 'text',
|
||||
text: `${prefix}${resource.text}`,
|
||||
text: recursivelySanitizeUnicode(
|
||||
`${prefix}${resource.text}`,
|
||||
) as string,
|
||||
},
|
||||
]
|
||||
} else if ('blob' in resource) {
|
||||
|
||||
@@ -26,10 +26,10 @@ test('initializeWiki creates the expected wiki scaffold', async () => {
|
||||
|
||||
expect(result.alreadyExisted).toBe(false)
|
||||
expect(result.createdFiles).toEqual([
|
||||
'.openclaude/wiki/schema.md',
|
||||
'.openclaude/wiki/index.md',
|
||||
'.openclaude/wiki/log.md',
|
||||
'.openclaude/wiki/pages/architecture.md',
|
||||
join('.openclaude', 'wiki', 'schema.md'),
|
||||
join('.openclaude', 'wiki', 'index.md'),
|
||||
join('.openclaude', 'wiki', 'log.md'),
|
||||
join('.openclaude', 'wiki', 'pages', 'architecture.md'),
|
||||
])
|
||||
expect(await readFile(paths.schemaFile, 'utf8')).toContain(
|
||||
'# OpenClaude Wiki Schema',
|
||||
|
||||
@@ -59,7 +59,7 @@ export function generatePrompt(): string {
|
||||
## Configurable settings list
|
||||
The following settings are available for you to change:
|
||||
|
||||
### Global Settings (stored in ~/.claude.json)
|
||||
### Global Settings (stored in ~/.openclaude.json)
|
||||
${globalSettings.join('\n')}
|
||||
|
||||
### Project Settings (stored in settings.json)
|
||||
|
||||
@@ -15,6 +15,7 @@ import {
|
||||
} from '../../utils/mcpOutputStorage.js'
|
||||
import { getSettings_DEPRECATED } from '../../utils/settings/settings.js'
|
||||
import { asSystemPrompt } from '../../utils/systemPromptType.js'
|
||||
import { ssrfGuardedLookup } from '../../utils/hooks/ssrfGuard.js'
|
||||
import { isPreapprovedHost } from './preapproved.js'
|
||||
import { makeSecondaryModelPrompt } from './prompt.js'
|
||||
|
||||
@@ -281,6 +282,7 @@ export async function getWithPermittedRedirects(
|
||||
maxRedirects: 0,
|
||||
responseType: 'arraybuffer',
|
||||
maxContentLength: MAX_HTTP_CONTENT_LENGTH,
|
||||
lookup: ssrfGuardedLookup,
|
||||
headers: {
|
||||
Accept: 'text/markdown, text/html, */*',
|
||||
'User-Agent': getWebFetchUserAgent(),
|
||||
|
||||
@@ -693,7 +693,7 @@ export function refreshAwsAuth(awsAuthRefresh: string): Promise<boolean> {
|
||||
'AWS auth refresh timed out after 3 minutes. Run your auth command manually in a separate terminal.',
|
||||
)
|
||||
: chalk.red(
|
||||
'Error running awsAuthRefresh (in settings or ~/.claude.json):',
|
||||
'Error running awsAuthRefresh (in settings or ~/.openclaude.json):',
|
||||
)
|
||||
// biome-ignore lint/suspicious/noConsole:: intentional console output
|
||||
console.error(message)
|
||||
@@ -771,7 +771,7 @@ async function getAwsCredsFromCredentialExport(): Promise<{
|
||||
}
|
||||
} catch (e) {
|
||||
const message = chalk.red(
|
||||
'Error getting AWS credentials from awsCredentialExport (in settings or ~/.claude.json):',
|
||||
'Error getting AWS credentials from awsCredentialExport (in settings or ~/.openclaude.json):',
|
||||
)
|
||||
if (e instanceof Error) {
|
||||
// biome-ignore lint/suspicious/noConsole:: intentional console output
|
||||
@@ -961,7 +961,7 @@ export function refreshGcpAuth(gcpAuthRefresh: string): Promise<boolean> {
|
||||
'GCP auth refresh timed out after 3 minutes. Run your auth command manually in a separate terminal.',
|
||||
)
|
||||
: chalk.red(
|
||||
'Error running gcpAuthRefresh (in settings or ~/.claude.json):',
|
||||
'Error running gcpAuthRefresh (in settings or ~/.openclaude.json):',
|
||||
)
|
||||
// biome-ignore lint/suspicious/noConsole:: intentional console output
|
||||
console.error(message)
|
||||
@@ -1959,7 +1959,7 @@ export async function validateForceLoginOrg(): Promise<OrgValidationResult> {
|
||||
|
||||
// Always fetch the authoritative org UUID from the profile endpoint.
|
||||
// Even keychain-sourced tokens verify server-side: the cached org UUID
|
||||
// in ~/.claude.json is user-writable and cannot be trusted.
|
||||
// in ~/.openclaude.json is user-writable and cannot be trusted.
|
||||
const { source } = getAuthTokenSource()
|
||||
const isEnvVarToken =
|
||||
source === 'CLAUDE_CODE_OAUTH_TOKEN' ||
|
||||
|
||||
@@ -28,7 +28,7 @@ import { getSettingsForSource } from './settings/settings.js'
|
||||
* is lazy-initialized) and ensure Node.js compatibility.
|
||||
*
|
||||
* This is safe to call before the trust dialog because we only read from
|
||||
* user-controlled files (~/.claude/settings.json and ~/.claude.json),
|
||||
* user-controlled files (~/.claude/settings.json and ~/.openclaude.json),
|
||||
* not from project-level settings.
|
||||
*/
|
||||
export function applyExtraCACertsFromConfig(): void {
|
||||
@@ -52,7 +52,7 @@ export function applyExtraCACertsFromConfig(): void {
|
||||
* after the trust dialog. But we need the CA cert early to establish the TLS
|
||||
* connection to an HTTPS proxy during init().
|
||||
*
|
||||
* We read from global config (~/.claude.json) and user settings
|
||||
* We read from global config (~/.openclaude.json) and user settings
|
||||
* (~/.claude/settings.json). These are user-controlled files that don't
|
||||
* require trust approval.
|
||||
*/
|
||||
|
||||
@@ -355,7 +355,7 @@ exec ${command}
|
||||
*
|
||||
* Only positive detections are persisted. A negative result from the
|
||||
* filesystem scan is not cached, because it may come from a machine that
|
||||
* shares ~/.claude.json but has no local Chrome (e.g. a remote dev
|
||||
* shares ~/.openclaude.json but has no local Chrome (e.g. a remote dev
|
||||
* environment using the bridge), and caching it would permanently poison
|
||||
* auto-enable for every session on every machine that reads that config.
|
||||
*/
|
||||
|
||||
@@ -244,6 +244,7 @@ export type GlobalConfig = {
|
||||
bypassPermissionsModeAccepted?: boolean
|
||||
hasUsedBackslashReturn?: boolean
|
||||
autoCompactEnabled: boolean // Controls whether auto-compact is enabled
|
||||
toolHistoryCompressionEnabled: boolean // Compress old tool_result content for small-context providers
|
||||
showTurnDuration: boolean // Controls whether to show turn duration message (e.g., "Cooked for 1m 6s")
|
||||
/**
|
||||
* @deprecated Use settings.env instead.
|
||||
@@ -622,6 +623,7 @@ function createDefaultGlobalConfig(): GlobalConfig {
|
||||
verbose: false,
|
||||
editorMode: 'normal',
|
||||
autoCompactEnabled: true,
|
||||
toolHistoryCompressionEnabled: true,
|
||||
showTurnDuration: true,
|
||||
hasSeenTasksHint: false,
|
||||
hasUsedStash: false,
|
||||
@@ -668,6 +670,7 @@ export const GLOBAL_CONFIG_KEYS = [
|
||||
'editorMode',
|
||||
'hasUsedBackslashReturn',
|
||||
'autoCompactEnabled',
|
||||
'toolHistoryCompressionEnabled',
|
||||
'showTurnDuration',
|
||||
'diffTool',
|
||||
'env',
|
||||
@@ -918,7 +921,7 @@ let configCacheHits = 0
|
||||
let configCacheMisses = 0
|
||||
// Session-total count of actual disk writes to the global config file.
|
||||
// Exposed for internal-only dev diagnostics (see inc-4552) so anomalous write
|
||||
// rates surface in the UI before they corrupt ~/.claude.json.
|
||||
// rates surface in the UI before they corrupt ~/.openclaude.json.
|
||||
let globalConfigWriteCount = 0
|
||||
|
||||
export function getGlobalConfigWriteCount(): number {
|
||||
@@ -1257,7 +1260,7 @@ function saveConfigWithLock<A extends object>(
|
||||
const currentConfig = getConfig(file, createDefault)
|
||||
if (file === getGlobalClaudeFile() && wouldLoseAuthState(currentConfig)) {
|
||||
logForDebugging(
|
||||
'saveConfigWithLock: re-read config is missing auth that cache has; refusing to write to avoid wiping ~/.claude.json. See GH #3117.',
|
||||
'saveConfigWithLock: re-read config is missing auth that cache has; refusing to write to avoid wiping ~/.openclaude.json. See GH #3117.',
|
||||
{ level: 'error' },
|
||||
)
|
||||
logEvent('tengu_config_auth_loss_prevented', {})
|
||||
|
||||
@@ -253,7 +253,7 @@ async function resolveClaudePath(): Promise<string> {
|
||||
* Check whether the OS-level protocol handler is already registered AND
|
||||
* points at the expected `claude` binary. Reads the registration artifact
|
||||
* directly (symlink target, .desktop Exec line, registry value) rather than
|
||||
* a cached flag in ~/.claude.json, so:
|
||||
* a cached flag in ~/.openclaude.json, so:
|
||||
* - the check is per-machine (config can sync across machines; OS state can't)
|
||||
* - stale paths self-heal (install-method change → re-register next session)
|
||||
* - deleted artifacts self-heal
|
||||
@@ -311,7 +311,7 @@ export async function ensureDeepLinkProtocolRegistered(): Promise<void> {
|
||||
// EACCES/ENOSPC are deterministic — retrying next session won't help.
|
||||
// Throttle to once per 24h so a read-only ~/.local/share/applications
|
||||
// doesn't generate a failure event on every startup. Marker lives in
|
||||
// ~/.claude (per-machine, not synced) rather than ~/.claude.json (can sync).
|
||||
// ~/.claude (per-machine, not synced) rather than ~/.openclaude.json (can sync).
|
||||
const failureMarkerPath = path.join(
|
||||
getClaudeConfigHomeDir(),
|
||||
'.deep-link-register-failed',
|
||||
|
||||
62
src/utils/env.test.ts
Normal file
62
src/utils/env.test.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
import { afterEach, beforeEach, expect, test } from 'bun:test'
|
||||
import { mkdtempSync, rmSync, writeFileSync } from 'fs'
|
||||
import { tmpdir } from 'os'
|
||||
import { join } from 'path'
|
||||
|
||||
const originalEnv = {
|
||||
CLAUDE_CONFIG_DIR: process.env.CLAUDE_CONFIG_DIR,
|
||||
CLAUDE_CODE_CUSTOM_OAUTH_URL: process.env.CLAUDE_CODE_CUSTOM_OAUTH_URL,
|
||||
USER_TYPE: process.env.USER_TYPE,
|
||||
}
|
||||
|
||||
let tempDir: string
|
||||
|
||||
beforeEach(() => {
|
||||
tempDir = mkdtempSync(join(tmpdir(), 'openclaude-env-test-'))
|
||||
process.env.CLAUDE_CONFIG_DIR = tempDir
|
||||
delete process.env.CLAUDE_CODE_CUSTOM_OAUTH_URL
|
||||
delete process.env.USER_TYPE
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(tempDir, { recursive: true, force: true })
|
||||
if (originalEnv.CLAUDE_CONFIG_DIR === undefined) {
|
||||
delete process.env.CLAUDE_CONFIG_DIR
|
||||
} else {
|
||||
process.env.CLAUDE_CONFIG_DIR = originalEnv.CLAUDE_CONFIG_DIR
|
||||
}
|
||||
if (originalEnv.CLAUDE_CODE_CUSTOM_OAUTH_URL === undefined) {
|
||||
delete process.env.CLAUDE_CODE_CUSTOM_OAUTH_URL
|
||||
} else {
|
||||
process.env.CLAUDE_CODE_CUSTOM_OAUTH_URL = originalEnv.CLAUDE_CODE_CUSTOM_OAUTH_URL
|
||||
}
|
||||
if (originalEnv.USER_TYPE === undefined) {
|
||||
delete process.env.USER_TYPE
|
||||
} else {
|
||||
process.env.USER_TYPE = originalEnv.USER_TYPE
|
||||
}
|
||||
})
|
||||
|
||||
async function importFreshEnvModule() {
|
||||
return import(`./env.js?ts=${Date.now()}-${Math.random()}`)
|
||||
}
|
||||
|
||||
// getGlobalClaudeFile — three migration branches
|
||||
|
||||
test('getGlobalClaudeFile: new install returns .openclaude.json when neither file exists', async () => {
|
||||
const { getGlobalClaudeFile } = await importFreshEnvModule()
|
||||
expect(getGlobalClaudeFile()).toBe(join(tempDir, '.openclaude.json'))
|
||||
})
|
||||
|
||||
test('getGlobalClaudeFile: existing user keeps .claude.json when only legacy file exists', async () => {
|
||||
writeFileSync(join(tempDir, '.claude.json'), '{}')
|
||||
const { getGlobalClaudeFile } = await importFreshEnvModule()
|
||||
expect(getGlobalClaudeFile()).toBe(join(tempDir, '.claude.json'))
|
||||
})
|
||||
|
||||
test('getGlobalClaudeFile: migrated user uses .openclaude.json when both files exist', async () => {
|
||||
writeFileSync(join(tempDir, '.claude.json'), '{}')
|
||||
writeFileSync(join(tempDir, '.openclaude.json'), '{}')
|
||||
const { getGlobalClaudeFile } = await importFreshEnvModule()
|
||||
expect(getGlobalClaudeFile()).toBe(join(tempDir, '.openclaude.json'))
|
||||
})
|
||||
@@ -21,8 +21,21 @@ export const getGlobalClaudeFile = memoize((): string => {
|
||||
return join(getClaudeConfigHomeDir(), '.config.json')
|
||||
}
|
||||
|
||||
const filename = `.claude${fileSuffixForOauthConfig()}.json`
|
||||
return join(process.env.CLAUDE_CONFIG_DIR || homedir(), filename)
|
||||
const oauthSuffix = fileSuffixForOauthConfig()
|
||||
const configDir = process.env.CLAUDE_CONFIG_DIR || homedir()
|
||||
|
||||
// Default to .openclaude.json. Fall back to .claude.json only if the new
|
||||
// file doesn't exist yet and the legacy one does (same migration pattern
|
||||
// as resolveClaudeConfigHomeDir for the config directory).
|
||||
const newFilename = `.openclaude${oauthSuffix}.json`
|
||||
const legacyFilename = `.claude${oauthSuffix}.json`
|
||||
if (
|
||||
!getFsImplementation().existsSync(join(configDir, newFilename)) &&
|
||||
getFsImplementation().existsSync(join(configDir, legacyFilename))
|
||||
) {
|
||||
return join(configDir, legacyFilename)
|
||||
}
|
||||
return join(configDir, newFilename)
|
||||
})
|
||||
|
||||
const hasInternetAccess = memoize(async (): Promise<boolean> => {
|
||||
|
||||
@@ -24,7 +24,7 @@ type CachedParse = { ok: true; value: unknown } | { ok: false }
|
||||
// lodash memoize default resolver = first arg only).
|
||||
// Skip caching above this size — the LRU stores the full string as the key,
|
||||
// so a 200KB config file would pin ~10MB in #keyList across 50 slots. Large
|
||||
// inputs like ~/.claude.json also change between reads (numStartups bumps on
|
||||
// inputs like ~/.openclaude.json also change between reads (numStartups bumps on
|
||||
// every CC startup), so the cache never hits anyway.
|
||||
const PARSE_CACHE_MAX_KEY_BYTES = 8 * 1024
|
||||
|
||||
|
||||
@@ -44,9 +44,10 @@ function getCandidateLocalBinaryPaths(localInstallDir: string): string[] {
|
||||
}
|
||||
|
||||
export function isManagedLocalInstallationPath(execPath: string): boolean {
|
||||
const normalizedExecPath = execPath.replace(/\\+/g, '/')
|
||||
return (
|
||||
execPath.includes('/.openclaude/local/node_modules/') ||
|
||||
execPath.includes('/.claude/local/node_modules/')
|
||||
normalizedExecPath.includes('/.openclaude/local/node_modules/') ||
|
||||
normalizedExecPath.includes('/.claude/local/node_modules/')
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -131,7 +131,7 @@ export function applySafeConfigEnvironmentVariables(): void {
|
||||
: null
|
||||
}
|
||||
|
||||
// Global config (~/.claude.json) is user-controlled. In CCD mode,
|
||||
// Global config (~/.openclaude.json) is user-controlled. In CCD mode,
|
||||
// filterSettingsEnv strips keys that were in the spawn env snapshot so
|
||||
// the desktop host's operational vars (OTEL, etc.) are not overridden.
|
||||
Object.assign(process.env, filterSettingsEnv(getGlobalConfig().env))
|
||||
|
||||
@@ -123,7 +123,6 @@ export const SAFE_ENV_VARS = new Set([
|
||||
'ANTHROPIC_DEFAULT_SONNET_MODEL_DESCRIPTION',
|
||||
'ANTHROPIC_DEFAULT_SONNET_MODEL_NAME',
|
||||
'ANTHROPIC_DEFAULT_SONNET_MODEL_SUPPORTED_CAPABILITIES',
|
||||
'ANTHROPIC_FOUNDRY_API_KEY',
|
||||
'ANTHROPIC_MODEL',
|
||||
'ANTHROPIC_SMALL_FAST_MODEL_AWS_REGION',
|
||||
'ANTHROPIC_SMALL_FAST_MODEL',
|
||||
|
||||
205
src/utils/model/benchmark.ts
Normal file
205
src/utils/model/benchmark.ts
Normal file
@@ -0,0 +1,205 @@
|
||||
/**
|
||||
* Model Benchmarking for OpenClaude
|
||||
*
|
||||
* Tests and compares model speed/quality for informed model selection.
|
||||
* Supports OpenAI-compatible, Ollama, Anthropic, Bedrock, Vertex.
|
||||
*/
|
||||
|
||||
import { getAPIProvider } from './providers.js'
|
||||
|
||||
export interface BenchmarkResult {
|
||||
model: string
|
||||
provider: string
|
||||
firstTokenMs: number
|
||||
totalTokens: number
|
||||
tokensPerSecond: number
|
||||
success: boolean
|
||||
error?: string
|
||||
}
|
||||
|
||||
const TEST_PROMPT = 'Write a short hello world in Python.'
|
||||
const MAX_TOKENS = 50
|
||||
const TIMEOUT_MS = 30000
|
||||
|
||||
function getBenchmarkEndpoint(): string | null {
|
||||
const provider = getAPIProvider()
|
||||
const baseUrl = process.env.OPENAI_BASE_URL
|
||||
|
||||
// Check for Ollama (local)
|
||||
if (baseUrl?.includes('localhost:11434') || baseUrl?.includes('localhost:11435')) {
|
||||
return `${baseUrl}/chat/completions`
|
||||
}
|
||||
// OpenAI-compatible endpoints
|
||||
if (provider === 'openai' || provider === 'firstParty') {
|
||||
return `${baseUrl || 'https://api.openai.com/v1'}/chat/completions`
|
||||
}
|
||||
// NVIDIA NIM or MiniMax via OPENAI_BASE_URL
|
||||
if (baseUrl?.includes('nvidia') || baseUrl?.includes('minimax')) {
|
||||
return `${baseUrl}/chat/completions`
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function getBenchmarkAuthHeader(): string | null {
|
||||
const apiKey = process.env.OPENAI_API_KEY
|
||||
if (!apiKey) return null
|
||||
return `Bearer ${apiKey}`
|
||||
}
|
||||
|
||||
export async function benchmarkModel(
|
||||
model: string,
|
||||
onChunk?: (text: string) => void,
|
||||
): Promise<BenchmarkResult> {
|
||||
const endpoint = getBenchmarkEndpoint()
|
||||
const authHeader = getBenchmarkAuthHeader()
|
||||
|
||||
if (!endpoint || !authHeader) {
|
||||
return {
|
||||
model,
|
||||
provider: getAPIProvider(),
|
||||
firstTokenMs: 0,
|
||||
totalTokens: 0,
|
||||
tokensPerSecond: 0,
|
||||
success: false,
|
||||
error: 'Benchmark not supported for this provider',
|
||||
}
|
||||
}
|
||||
|
||||
const startTime = performance.now()
|
||||
let totalTokens = 0
|
||||
let firstTokenMs: number | null = null
|
||||
|
||||
try {
|
||||
const response = await fetch(endpoint, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': authHeader,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages: [{ role: 'user', content: TEST_PROMPT }],
|
||||
max_tokens: MAX_TOKENS,
|
||||
stream: true,
|
||||
}),
|
||||
signal: AbortSignal.timeout(TIMEOUT_MS),
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
let errorMsg = `HTTP ${response.status}`
|
||||
try {
|
||||
const error = await response.json()
|
||||
errorMsg = error.error?.message || errorMsg
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
return {
|
||||
model,
|
||||
provider: getAPIProvider(),
|
||||
firstTokenMs: 0,
|
||||
totalTokens: 0,
|
||||
tokensPerSecond: 0,
|
||||
success: false,
|
||||
error: errorMsg,
|
||||
}
|
||||
}
|
||||
|
||||
const reader = response.body?.getReader()
|
||||
if (!reader) {
|
||||
throw new Error('No response body')
|
||||
}
|
||||
|
||||
const decoder = new TextDecoder()
|
||||
let buffer = ''
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
const lines = buffer.split('\n')
|
||||
buffer = lines.pop() || ''
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.slice(6)
|
||||
if (data === '[DONE]') continue
|
||||
|
||||
try {
|
||||
const json = JSON.parse(data)
|
||||
const content = json.choices?.[0]?.delta?.content
|
||||
if (content) {
|
||||
if (firstTokenMs === null) {
|
||||
firstTokenMs = performance.now() - startTime
|
||||
}
|
||||
totalTokens += content.length / 4
|
||||
onChunk?.(content)
|
||||
}
|
||||
} catch {
|
||||
// skip invalid JSON
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const totalMs = performance.now() - startTime
|
||||
const tokensPerSecond = totalMs > 0 ? (totalTokens / totalMs) * 1000 : 0
|
||||
|
||||
return {
|
||||
model,
|
||||
provider: getAPIProvider(),
|
||||
firstTokenMs: firstTokenMs ?? 0,
|
||||
totalTokens,
|
||||
tokensPerSecond,
|
||||
success: true,
|
||||
}
|
||||
} catch (error) {
|
||||
return {
|
||||
model,
|
||||
provider: getAPIProvider(),
|
||||
firstTokenMs: 0,
|
||||
totalTokens: 0,
|
||||
tokensPerSecond: 0,
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function benchmarkMultipleModels(
|
||||
models: string[],
|
||||
onProgress?: (completed: number, total: number, result: BenchmarkResult) => void,
|
||||
): Promise<BenchmarkResult[]> {
|
||||
const results: BenchmarkResult[] = []
|
||||
|
||||
for (let i = 0; i < models.length; i++) {
|
||||
const result = await benchmarkModel(models[i])
|
||||
results.push(result)
|
||||
onProgress?.(i + 1, models.length, result)
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
export function formatBenchmarkResults(results: BenchmarkResult[]): string {
|
||||
const header = 'Model'.padEnd(40) + 'TPS' + ' First Token' + ' Status'
|
||||
const divider = '-'.repeat(70)
|
||||
|
||||
const rows = results
|
||||
.sort((a, b) => b.tokensPerSecond - a.tokensPerSecond)
|
||||
.map(r => {
|
||||
const name = r.model.length > 38 ? r.model.slice(0, 37) + '…' : r.model
|
||||
const tps = r.tokensPerSecond.toFixed(1).padStart(6)
|
||||
const first = r.firstTokenMs > 0 ? `${r.firstTokenMs.toFixed(0)}ms`.padStart(12) : 'N/A'.padStart(12)
|
||||
const status = r.success ? '✓' : '✗'
|
||||
return name.padEnd(40) + tps + ' ' + first + ' ' + status
|
||||
})
|
||||
|
||||
return [header, divider, ...rows].join('\n')
|
||||
}
|
||||
|
||||
export function isBenchmarkSupported(): boolean {
|
||||
const endpoint = getBenchmarkEndpoint()
|
||||
const authHeader = getBenchmarkAuthHeader()
|
||||
return endpoint !== null && authHeader !== null
|
||||
}
|
||||
@@ -20,7 +20,7 @@ export const OPENAI_MODEL_DEFAULTS = {
|
||||
// Override with GEMINI_MODEL env var.
|
||||
// ---------------------------------------------------------------------------
|
||||
export const GEMINI_MODEL_DEFAULTS = {
|
||||
opus: 'gemini-2.5-pro-preview-03-25', // most capable
|
||||
opus: 'gemini-2.5-pro', // most capable
|
||||
sonnet: 'gemini-2.0-flash', // balanced
|
||||
haiku: 'gemini-2.0-flash-lite', // fast & cheap
|
||||
} as const
|
||||
@@ -112,7 +112,7 @@ export const CLAUDE_OPUS_4_CONFIG = {
|
||||
vertex: 'claude-opus-4@20250514',
|
||||
foundry: 'claude-opus-4',
|
||||
openai: 'gpt-4o',
|
||||
gemini: 'gemini-2.5-pro-preview-03-25',
|
||||
gemini: 'gemini-2.5-pro',
|
||||
github: 'github:copilot',
|
||||
codex: 'gpt-5.4',
|
||||
'nvidia-nim': 'nvidia/llama-3.1-nemotron-70b-instruct',
|
||||
@@ -125,7 +125,7 @@ export const CLAUDE_OPUS_4_1_CONFIG = {
|
||||
vertex: 'claude-opus-4-1@20250805',
|
||||
foundry: 'claude-opus-4-1',
|
||||
openai: 'gpt-4o',
|
||||
gemini: 'gemini-2.5-pro-preview-03-25',
|
||||
gemini: 'gemini-2.5-pro',
|
||||
github: 'github:copilot',
|
||||
codex: 'gpt-5.4',
|
||||
'nvidia-nim': 'nvidia/llama-3.1-nemotron-70b-instruct',
|
||||
@@ -138,7 +138,7 @@ export const CLAUDE_OPUS_4_5_CONFIG = {
|
||||
vertex: 'claude-opus-4-5@20251101',
|
||||
foundry: 'claude-opus-4-5',
|
||||
openai: 'gpt-4o',
|
||||
gemini: 'gemini-2.5-pro-preview-03-25',
|
||||
gemini: 'gemini-2.5-pro',
|
||||
github: 'github:copilot',
|
||||
codex: 'gpt-5.4',
|
||||
'nvidia-nim': 'nvidia/llama-3.1-nemotron-70b-instruct',
|
||||
@@ -151,7 +151,7 @@ export const CLAUDE_OPUS_4_6_CONFIG = {
|
||||
vertex: 'claude-opus-4-6',
|
||||
foundry: 'claude-opus-4-6',
|
||||
openai: 'gpt-4o',
|
||||
gemini: 'gemini-2.5-pro-preview-03-25',
|
||||
gemini: 'gemini-2.5-pro',
|
||||
github: 'github:copilot',
|
||||
codex: 'gpt-5.4',
|
||||
'nvidia-nim': 'nvidia/llama-3.1-nemotron-70b-instruct',
|
||||
|
||||
115
src/utils/model/model.openai-shim-providers.test.ts
Normal file
115
src/utils/model/model.openai-shim-providers.test.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
import { afterEach, beforeEach, expect, test } from 'bun:test'
|
||||
|
||||
import { saveGlobalConfig } from '../config.js'
|
||||
import { getUserSpecifiedModelSetting } from './model.js'
|
||||
|
||||
const SAVED_ENV = {
|
||||
CLAUDE_CODE_USE_OPENAI: process.env.CLAUDE_CODE_USE_OPENAI,
|
||||
CLAUDE_CODE_USE_GEMINI: process.env.CLAUDE_CODE_USE_GEMINI,
|
||||
CLAUDE_CODE_USE_GITHUB: process.env.CLAUDE_CODE_USE_GITHUB,
|
||||
CLAUDE_CODE_USE_MISTRAL: process.env.CLAUDE_CODE_USE_MISTRAL,
|
||||
CLAUDE_CODE_USE_BEDROCK: process.env.CLAUDE_CODE_USE_BEDROCK,
|
||||
CLAUDE_CODE_USE_VERTEX: process.env.CLAUDE_CODE_USE_VERTEX,
|
||||
CLAUDE_CODE_USE_FOUNDRY: process.env.CLAUDE_CODE_USE_FOUNDRY,
|
||||
NVIDIA_NIM: process.env.NVIDIA_NIM,
|
||||
MINIMAX_API_KEY: process.env.MINIMAX_API_KEY,
|
||||
OPENAI_MODEL: process.env.OPENAI_MODEL,
|
||||
OPENAI_BASE_URL: process.env.OPENAI_BASE_URL,
|
||||
CODEX_API_KEY: process.env.CODEX_API_KEY,
|
||||
CHATGPT_ACCOUNT_ID: process.env.CHATGPT_ACCOUNT_ID,
|
||||
}
|
||||
|
||||
function restoreEnv(key: keyof typeof SAVED_ENV): void {
|
||||
if (SAVED_ENV[key] === undefined) {
|
||||
delete process.env[key]
|
||||
} else {
|
||||
process.env[key] = SAVED_ENV[key]
|
||||
}
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
delete process.env.CLAUDE_CODE_USE_OPENAI
|
||||
delete process.env.CLAUDE_CODE_USE_GEMINI
|
||||
delete process.env.CLAUDE_CODE_USE_GITHUB
|
||||
delete process.env.CLAUDE_CODE_USE_MISTRAL
|
||||
delete process.env.CLAUDE_CODE_USE_BEDROCK
|
||||
delete process.env.CLAUDE_CODE_USE_VERTEX
|
||||
delete process.env.CLAUDE_CODE_USE_FOUNDRY
|
||||
delete process.env.NVIDIA_NIM
|
||||
delete process.env.MINIMAX_API_KEY
|
||||
delete process.env.OPENAI_MODEL
|
||||
delete process.env.OPENAI_BASE_URL
|
||||
delete process.env.CODEX_API_KEY
|
||||
delete process.env.CHATGPT_ACCOUNT_ID
|
||||
saveGlobalConfig(current => ({
|
||||
...current,
|
||||
model: undefined,
|
||||
}))
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
for (const key of Object.keys(SAVED_ENV) as Array<keyof typeof SAVED_ENV>) {
|
||||
restoreEnv(key)
|
||||
}
|
||||
saveGlobalConfig(current => ({
|
||||
...current,
|
||||
model: undefined,
|
||||
}))
|
||||
})
|
||||
|
||||
test('codex provider reads OPENAI_MODEL, not stale settings.model', () => {
|
||||
// Regression: switching from Moonshot (settings.model='kimi-k2.6' persisted
|
||||
// from that session) to the Codex profile. Codex profile correctly sets
|
||||
// OPENAI_MODEL=codexplan + base URL to chatgpt.com/backend-api/codex.
|
||||
// getUserSpecifiedModelSetting previously ignored env for 'codex' provider
|
||||
// and returned settings.model='kimi-k2.6', causing Codex's API to reject
|
||||
// the request: "The 'kimi-k2.6' model is not supported when using Codex".
|
||||
saveGlobalConfig(current => ({ ...current, model: 'kimi-k2.6' }))
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
process.env.OPENAI_BASE_URL = 'https://chatgpt.com/backend-api/codex'
|
||||
process.env.OPENAI_MODEL = 'codexplan'
|
||||
process.env.CODEX_API_KEY = 'codex-test'
|
||||
process.env.CHATGPT_ACCOUNT_ID = 'acct_test'
|
||||
|
||||
const model = getUserSpecifiedModelSetting()
|
||||
expect(model).toBe('codexplan')
|
||||
})
|
||||
|
||||
test('nvidia-nim provider reads OPENAI_MODEL, not stale settings.model', () => {
|
||||
saveGlobalConfig(current => ({ ...current, model: 'kimi-k2.6' }))
|
||||
process.env.NVIDIA_NIM = '1'
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
process.env.OPENAI_MODEL = 'nvidia/llama-3.1-nemotron-70b-instruct'
|
||||
|
||||
const model = getUserSpecifiedModelSetting()
|
||||
expect(model).toBe('nvidia/llama-3.1-nemotron-70b-instruct')
|
||||
})
|
||||
|
||||
test('minimax provider reads OPENAI_MODEL, not stale settings.model', () => {
|
||||
saveGlobalConfig(current => ({ ...current, model: 'kimi-k2.6' }))
|
||||
process.env.MINIMAX_API_KEY = 'minimax-test'
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
process.env.OPENAI_MODEL = 'MiniMax-M2.5'
|
||||
|
||||
const model = getUserSpecifiedModelSetting()
|
||||
expect(model).toBe('MiniMax-M2.5')
|
||||
})
|
||||
|
||||
test('openai provider still reads OPENAI_MODEL (regression guard)', () => {
|
||||
saveGlobalConfig(current => ({ ...current, model: 'stale-default' }))
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
process.env.OPENAI_MODEL = 'gpt-4o'
|
||||
|
||||
const model = getUserSpecifiedModelSetting()
|
||||
expect(model).toBe('gpt-4o')
|
||||
})
|
||||
|
||||
test('github provider still reads OPENAI_MODEL (regression guard)', () => {
|
||||
saveGlobalConfig(current => ({ ...current, model: 'stale-default' }))
|
||||
process.env.CLAUDE_CODE_USE_GITHUB = '1'
|
||||
process.env.OPENAI_MODEL = 'github:copilot'
|
||||
|
||||
const model = getUserSpecifiedModelSetting()
|
||||
expect(model).toBe('github:copilot')
|
||||
})
|
||||
|
||||
@@ -91,11 +91,24 @@ export function getUserSpecifiedModelSetting(): ModelSetting | undefined {
|
||||
const setting = normalizeModelSetting(settings.model)
|
||||
// Read the model env var that matches the active provider to prevent
|
||||
// cross-provider leaks (e.g. ANTHROPIC_MODEL sent to the OpenAI API).
|
||||
//
|
||||
// All OpenAI-shim providers (openai, codex, github, nvidia-nim, minimax)
|
||||
// set CLAUDE_CODE_USE_OPENAI=1 + OPENAI_MODEL via
|
||||
// applyProviderProfileToProcessEnv. Earlier this check only included
|
||||
// openai/github — codex/nvidia-nim/minimax fell through to the stale
|
||||
// settings.model, so switching from (say) Moonshot to Codex kept firing
|
||||
// `kimi-k2.6` at the Codex endpoint and getting 400s.
|
||||
const provider = getAPIProvider()
|
||||
const isOpenAIShimProvider =
|
||||
provider === 'openai' ||
|
||||
provider === 'codex' ||
|
||||
provider === 'github' ||
|
||||
provider === 'nvidia-nim' ||
|
||||
provider === 'minimax'
|
||||
specifiedModel =
|
||||
(provider === 'gemini' ? process.env.GEMINI_MODEL : undefined) ||
|
||||
(provider === 'mistral' ? process.env.MISTRAL_MODEL : undefined) ||
|
||||
(provider === 'openai' || provider === 'gemini' || provider === 'mistral' || provider === 'github' ? process.env.OPENAI_MODEL : undefined) ||
|
||||
(isOpenAIShimProvider ? process.env.OPENAI_MODEL : undefined) ||
|
||||
(provider === 'firstParty' ? process.env.ANTHROPIC_MODEL : undefined) ||
|
||||
setting ||
|
||||
undefined
|
||||
@@ -140,7 +153,7 @@ export function getDefaultOpusModel(): ModelName {
|
||||
}
|
||||
// Gemini provider
|
||||
if (getAPIProvider() === 'gemini') {
|
||||
return process.env.GEMINI_MODEL || 'gemini-2.5-pro-preview-03-25'
|
||||
return process.env.GEMINI_MODEL || 'gemini-2.5-pro'
|
||||
}
|
||||
// Mistral provider
|
||||
if (getAPIProvider() === 'mistral') {
|
||||
|
||||
30
src/utils/model/modelCache.test.ts
Normal file
30
src/utils/model/modelCache.test.ts
Normal file
@@ -0,0 +1,30 @@
|
||||
import { describe, expect, it, beforeEach, afterEach, vi } from 'bun:test'
|
||||
import { isModelCacheValid, getCachedModelsFromDisk, saveModelsToCache } from '../model/modelCache.js'
|
||||
|
||||
vi.mock('../model/ollamaModels.js', () => ({
|
||||
isOllamaProvider: vi.fn(() => true),
|
||||
}))
|
||||
|
||||
describe('modelCache', () => {
|
||||
const mockModel = { value: 'llama3', label: 'Llama 3', description: 'Test model' }
|
||||
|
||||
describe('isModelCacheValid', () => {
|
||||
it('returns false for non-existent cache', async () => {
|
||||
const result = await isModelCacheValid('ollama')
|
||||
expect(result).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe('getCachedModelsFromDisk', () => {
|
||||
it('returns null when not cache available', async () => {
|
||||
const result = await getCachedModelsFromDisk()
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe('saveModelsToCache', () => {
|
||||
it('has saveModelsToCache function', () => {
|
||||
expect(typeof saveModelsToCache).toBe('function')
|
||||
})
|
||||
})
|
||||
})
|
||||
165
src/utils/model/modelCache.ts
Normal file
165
src/utils/model/modelCache.ts
Normal file
@@ -0,0 +1,165 @@
|
||||
/**
|
||||
* Model Caching for OpenClaude
|
||||
*
|
||||
* Caches model lists to disk for faster startup and offline access.
|
||||
* Uses async fs operations to avoid blocking the event loop.
|
||||
*/
|
||||
|
||||
import { access, readFile, writeFile, mkdir, unlink } from 'node:fs/promises'
|
||||
import { existsSync } from 'node:fs'
|
||||
import { join } from 'node:path'
|
||||
import { homedir } from 'node:os'
|
||||
import { getAPIProvider } from './providers.js'
|
||||
|
||||
const CACHE_VERSION = '1'
|
||||
const CACHE_TTL_HOURS = 24
|
||||
const CACHE_DIR_NAME = '.openclaude-model-cache'
|
||||
|
||||
interface ModelCache {
|
||||
version: string
|
||||
timestamp: number
|
||||
provider: string
|
||||
models: Array<{ value: string; label: string; description: string }>
|
||||
}
|
||||
|
||||
function getCacheDir(): string {
|
||||
const home = homedir()
|
||||
const cacheDir = join(home, CACHE_DIR_NAME)
|
||||
if (!existsSync(cacheDir)) {
|
||||
mkdir(cacheDir, { recursive: true })
|
||||
}
|
||||
return cacheDir
|
||||
}
|
||||
|
||||
function getCacheFilePath(provider: string): string {
|
||||
return join(getCacheDir(), `${provider}.json`)
|
||||
}
|
||||
|
||||
function isOpenAICompatibleProvider(): boolean {
|
||||
const baseUrl = process.env.OPENAI_BASE_URL || ''
|
||||
return baseUrl.includes('localhost') || baseUrl.includes('nvidia') || baseUrl.includes('minimax') || getAPIProvider() === 'openai'
|
||||
}
|
||||
|
||||
export async function isModelCacheValid(provider: string): Promise<boolean> {
|
||||
const cachePath = getCacheFilePath(provider)
|
||||
|
||||
try {
|
||||
await access(cachePath)
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
|
||||
try {
|
||||
const data = JSON.parse(await readFile(cachePath, 'utf-8')) as ModelCache
|
||||
if (data.version !== CACHE_VERSION) {
|
||||
return false
|
||||
}
|
||||
if (data.provider !== provider) {
|
||||
return false
|
||||
}
|
||||
|
||||
const ageHours = (Date.now() - data.timestamp) / (1000 * 60 * 60)
|
||||
return ageHours < CACHE_TTL_HOURS
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
export async function getCachedModelsFromDisk<T>(): Promise<T[] | null> {
|
||||
const provider = getAPIProvider()
|
||||
const baseUrl = process.env.OPENAI_BASE_URL || ''
|
||||
const isLocalOllama = baseUrl.includes('localhost:11434') || baseUrl.includes('localhost:11435')
|
||||
const isNvidia = baseUrl.includes('nvidia') || baseUrl.includes('integrate.api.nvidia')
|
||||
const isMiniMax = baseUrl.includes('minimax')
|
||||
|
||||
if (!isLocalOllama && !isNvidia && !isMiniMax && provider !== 'openai') {
|
||||
return null
|
||||
}
|
||||
|
||||
const cachePath = getCacheFilePath(provider)
|
||||
|
||||
if (!(await isModelCacheValid(provider))) {
|
||||
return null
|
||||
}
|
||||
|
||||
try {
|
||||
const data = JSON.parse(await readFile(cachePath, 'utf-8')) as ModelCache
|
||||
return data.models as T[]
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
export async function saveModelsToCache(
|
||||
models: Array<{ value: string; label: string; description: string }>,
|
||||
): Promise<void> {
|
||||
const provider = getAPIProvider()
|
||||
if (!provider) return
|
||||
|
||||
const cachePath = getCacheFilePath(provider)
|
||||
const cacheData: ModelCache = {
|
||||
version: CACHE_VERSION,
|
||||
timestamp: Date.now(),
|
||||
provider,
|
||||
models,
|
||||
}
|
||||
|
||||
try {
|
||||
await writeFile(cachePath, JSON.stringify(cacheData, null, 2), 'utf-8')
|
||||
} catch (error) {
|
||||
console.warn('[ModelCache] Failed to save cache:', error)
|
||||
}
|
||||
}
|
||||
|
||||
export async function clearModelCache(provider?: string): Promise<void> {
|
||||
if (provider) {
|
||||
const cachePath = getCacheFilePath(provider)
|
||||
try {
|
||||
await unlink(cachePath)
|
||||
} catch {
|
||||
// ignore if doesn't exist
|
||||
}
|
||||
} else {
|
||||
const cacheDir = getCacheDir()
|
||||
try {
|
||||
await unlink(join(cacheDir, 'ollama.json'))
|
||||
await unlink(join(cacheDir, 'nvidia-nim.json'))
|
||||
await unlink(join(cacheDir, 'minimax.json'))
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function getModelCacheInfo(): Promise<{ provider: string; age: string } | null> {
|
||||
const provider = getAPIProvider()
|
||||
const cachePath = getCacheFilePath(provider)
|
||||
|
||||
try {
|
||||
await access(cachePath)
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
|
||||
try {
|
||||
const data = JSON.parse(await readFile(cachePath, 'utf-8')) as ModelCache
|
||||
const ageMs = Date.now() - data.timestamp
|
||||
const ageHours = Math.floor(ageMs / (1000 * 60 * 60))
|
||||
const ageMins = Math.floor((ageMs % (1000 * 60 * 60)) / (1000 * 60))
|
||||
|
||||
return {
|
||||
provider: data.provider,
|
||||
age: ageHours > 0 ? `${ageHours}h ${ageMins}m` : `${ageMins}m`,
|
||||
}
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
export function isCacheAvailable(): boolean {
|
||||
const baseUrl = process.env.OPENAI_BASE_URL || ''
|
||||
const isLocalOllama = baseUrl.includes('localhost:11434') || baseUrl.includes('localhost:11435')
|
||||
const isNvidia = baseUrl.includes('nvidia') || baseUrl.includes('integrate.api.nvidia')
|
||||
const isMiniMax = baseUrl.includes('minimax')
|
||||
return isLocalOllama || isNvidia || isMiniMax || getAPIProvider() === 'openai'
|
||||
}
|
||||
@@ -219,6 +219,17 @@ const OPENAI_CONTEXT_WINDOWS: Record<string, number> = {
|
||||
'kimi-k2.5': 262_144,
|
||||
'glm-5': 202_752,
|
||||
'glm-4.7': 202_752,
|
||||
|
||||
// Moonshot AI direct API (api.moonshot.ai/v1). Values from Moonshot's
|
||||
// published model card — all K2 tier share 256K context. Prefix matching
|
||||
// in lookupByKey catches variants like "kimi-k2.6-preview".
|
||||
'kimi-k2.6': 262_144,
|
||||
'kimi-k2': 131_072,
|
||||
'kimi-k2-instruct': 131_072,
|
||||
'kimi-k2-thinking': 262_144,
|
||||
'moonshot-v1-8k': 8_192,
|
||||
'moonshot-v1-32k': 32_768,
|
||||
'moonshot-v1-128k': 131_072,
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -391,6 +402,15 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record<string, number> = {
|
||||
'kimi-k2.5': 32_768,
|
||||
'glm-5': 16_384,
|
||||
'glm-4.7': 16_384,
|
||||
|
||||
// Moonshot AI direct API
|
||||
'kimi-k2.6': 32_768,
|
||||
'kimi-k2': 32_768,
|
||||
'kimi-k2-instruct': 32_768,
|
||||
'kimi-k2-thinking': 32_768,
|
||||
'moonshot-v1-8k': 4_096,
|
||||
'moonshot-v1-32k': 16_384,
|
||||
'moonshot-v1-128k': 32_768,
|
||||
}
|
||||
|
||||
function lookupByModel<T>(table: Record<string, T>, model: string): T | undefined {
|
||||
|
||||
@@ -107,3 +107,60 @@ test('official OpenAI base URLs now keep provider detection on openai for aliase
|
||||
const { getAPIProvider } = await importFreshProvidersModule()
|
||||
expect(getAPIProvider()).toBe('openai')
|
||||
})
|
||||
|
||||
// isGithubNativeAnthropicMode
|
||||
|
||||
test('isGithubNativeAnthropicMode: false when CLAUDE_CODE_USE_GITHUB is not set', async () => {
|
||||
clearProviderEnv()
|
||||
process.env.OPENAI_MODEL = 'claude-sonnet-4-5'
|
||||
const { isGithubNativeAnthropicMode } = await importFreshProvidersModule()
|
||||
expect(isGithubNativeAnthropicMode()).toBe(false)
|
||||
})
|
||||
|
||||
test('isGithubNativeAnthropicMode: true for bare claude- model via OPENAI_MODEL', async () => {
|
||||
clearProviderEnv()
|
||||
process.env.CLAUDE_CODE_USE_GITHUB = '1'
|
||||
process.env.OPENAI_MODEL = 'claude-sonnet-4-5'
|
||||
const { isGithubNativeAnthropicMode } = await importFreshProvidersModule()
|
||||
expect(isGithubNativeAnthropicMode()).toBe(true)
|
||||
})
|
||||
|
||||
test('isGithubNativeAnthropicMode: true for github:copilot:claude- compound format', async () => {
|
||||
clearProviderEnv()
|
||||
process.env.CLAUDE_CODE_USE_GITHUB = '1'
|
||||
process.env.OPENAI_MODEL = 'github:copilot:claude-sonnet-4'
|
||||
const { isGithubNativeAnthropicMode } = await importFreshProvidersModule()
|
||||
expect(isGithubNativeAnthropicMode()).toBe(true)
|
||||
})
|
||||
|
||||
test('isGithubNativeAnthropicMode: true when resolvedModel is a claude- model', async () => {
|
||||
clearProviderEnv()
|
||||
process.env.CLAUDE_CODE_USE_GITHUB = '1'
|
||||
process.env.OPENAI_MODEL = 'github:copilot'
|
||||
const { isGithubNativeAnthropicMode } = await importFreshProvidersModule()
|
||||
expect(isGithubNativeAnthropicMode('claude-haiku-4-5')).toBe(true)
|
||||
})
|
||||
|
||||
test('isGithubNativeAnthropicMode: false for generic github:copilot alias', async () => {
|
||||
clearProviderEnv()
|
||||
process.env.CLAUDE_CODE_USE_GITHUB = '1'
|
||||
process.env.OPENAI_MODEL = 'github:copilot'
|
||||
const { isGithubNativeAnthropicMode } = await importFreshProvidersModule()
|
||||
expect(isGithubNativeAnthropicMode()).toBe(false)
|
||||
})
|
||||
|
||||
test('isGithubNativeAnthropicMode: false for non-Claude model', async () => {
|
||||
clearProviderEnv()
|
||||
process.env.CLAUDE_CODE_USE_GITHUB = '1'
|
||||
process.env.OPENAI_MODEL = 'gpt-4o'
|
||||
const { isGithubNativeAnthropicMode } = await importFreshProvidersModule()
|
||||
expect(isGithubNativeAnthropicMode()).toBe(false)
|
||||
})
|
||||
|
||||
test('isGithubNativeAnthropicMode: false for github:copilot:gpt- model', async () => {
|
||||
clearProviderEnv()
|
||||
process.env.CLAUDE_CODE_USE_GITHUB = '1'
|
||||
process.env.OPENAI_MODEL = 'github:copilot:gpt-4o'
|
||||
const { isGithubNativeAnthropicMode } = await importFreshProvidersModule()
|
||||
expect(isGithubNativeAnthropicMode()).toBe(false)
|
||||
})
|
||||
|
||||
@@ -45,6 +45,24 @@ export function getAPIProvider(): APIProvider {
|
||||
export function usesAnthropicAccountFlow(): boolean {
|
||||
return getAPIProvider() === 'firstParty'
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true when the GitHub provider should use Anthropic's native API
|
||||
* format instead of the OpenAI-compatible shim.
|
||||
*
|
||||
* Enabled when CLAUDE_CODE_USE_GITHUB=1 and the model string contains "claude-"
|
||||
* anywhere (handles bare names like "claude-sonnet-4" and compound formats like
|
||||
* "github:copilot:claude-sonnet-4" or any future provider-prefixed variants).
|
||||
*
|
||||
* api.githubcopilot.com supports Anthropic native format for Claude models,
|
||||
* enabling prompt caching via cache_control blocks which significantly reduces
|
||||
* per-turn token costs by caching the system prompt and tool definitions.
|
||||
*/
|
||||
export function isGithubNativeAnthropicMode(resolvedModel?: string): boolean {
|
||||
if (!isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB)) return false
|
||||
const model = resolvedModel?.trim() || process.env.OPENAI_MODEL?.trim() || ''
|
||||
return model.toLowerCase().includes('claude-')
|
||||
}
|
||||
function isCodexModel(): boolean {
|
||||
return shouldUseCodexTransport(
|
||||
process.env.OPENAI_MODEL || '',
|
||||
|
||||
@@ -64,6 +64,7 @@ export const DANGEROUS_FILES = [
|
||||
'.profile',
|
||||
'.ripgreprc',
|
||||
'.mcp.json',
|
||||
'.openclaude.json',
|
||||
'.claude.json',
|
||||
] as const
|
||||
|
||||
|
||||
@@ -532,6 +532,7 @@ export async function gitPull(
|
||||
): Promise<{ code: number; stderr: string }> {
|
||||
logForDebugging(`git pull: cwd=${cwd} ref=${ref ?? 'default'}`)
|
||||
const env = { ...process.env, ...GIT_NO_PROMPT_ENV }
|
||||
const baseArgs = ['-c', 'core.hooksPath=/dev/null']
|
||||
const credentialArgs = options?.disableCredentialHelper
|
||||
? ['-c', 'credential.helper=']
|
||||
: []
|
||||
@@ -539,7 +540,7 @@ export async function gitPull(
|
||||
if (ref) {
|
||||
const fetchResult = await execFileNoThrowWithCwd(
|
||||
gitExe(),
|
||||
[...credentialArgs, 'fetch', 'origin', ref],
|
||||
[...baseArgs, ...credentialArgs, 'fetch', 'origin', ref],
|
||||
{ cwd, timeout: getPluginGitTimeoutMs(), stdin: 'ignore', env },
|
||||
)
|
||||
|
||||
@@ -549,7 +550,7 @@ export async function gitPull(
|
||||
|
||||
const checkoutResult = await execFileNoThrowWithCwd(
|
||||
gitExe(),
|
||||
[...credentialArgs, 'checkout', ref],
|
||||
[...baseArgs, ...credentialArgs, 'checkout', ref],
|
||||
{ cwd, timeout: getPluginGitTimeoutMs(), stdin: 'ignore', env },
|
||||
)
|
||||
|
||||
@@ -559,7 +560,7 @@ export async function gitPull(
|
||||
|
||||
const pullResult = await execFileNoThrowWithCwd(
|
||||
gitExe(),
|
||||
[...credentialArgs, 'pull', 'origin', ref],
|
||||
[...baseArgs, ...credentialArgs, 'pull', 'origin', ref],
|
||||
{ cwd, timeout: getPluginGitTimeoutMs(), stdin: 'ignore', env },
|
||||
)
|
||||
if (pullResult.code !== 0) {
|
||||
@@ -571,7 +572,7 @@ export async function gitPull(
|
||||
|
||||
const result = await execFileNoThrowWithCwd(
|
||||
gitExe(),
|
||||
[...credentialArgs, 'pull', 'origin', 'HEAD'],
|
||||
[...baseArgs, ...credentialArgs, 'pull', 'origin', 'HEAD'],
|
||||
{ cwd, timeout: getPluginGitTimeoutMs(), stdin: 'ignore', env },
|
||||
)
|
||||
if (result.code !== 0) {
|
||||
@@ -625,6 +626,8 @@ async function gitSubmoduleUpdate(
|
||||
[
|
||||
'-c',
|
||||
'core.sshCommand=ssh -o BatchMode=yes -o StrictHostKeyChecking=yes',
|
||||
'-c',
|
||||
'core.hooksPath=/dev/null',
|
||||
...credentialArgs,
|
||||
'submodule',
|
||||
'update',
|
||||
@@ -810,6 +813,8 @@ export async function gitClone(
|
||||
const args = [
|
||||
'-c',
|
||||
'core.sshCommand=ssh -o BatchMode=yes -o StrictHostKeyChecking=yes',
|
||||
'-c',
|
||||
'core.hooksPath=/dev/null',
|
||||
'clone',
|
||||
'--depth',
|
||||
'1',
|
||||
|
||||
299
src/utils/providerAutoDetect.test.ts
Normal file
299
src/utils/providerAutoDetect.test.ts
Normal file
@@ -0,0 +1,299 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
|
||||
import {
|
||||
detectBestProvider,
|
||||
detectLocalService,
|
||||
detectProviderFromEnv,
|
||||
} from './providerAutoDetect.ts'
|
||||
|
||||
// Hermetic env scan: always report "no Codex auth on disk" so tests don't
|
||||
// depend on the dev machine's ~/.codex/auth.json state.
|
||||
function scan(env: Record<string, string | undefined>) {
|
||||
return detectProviderFromEnv({ env, hasCodexAuth: () => false })
|
||||
}
|
||||
|
||||
describe('detectProviderFromEnv — priority order', () => {
|
||||
test('ANTHROPIC_API_KEY wins over all others', () => {
|
||||
expect(
|
||||
scan({
|
||||
ANTHROPIC_API_KEY: 'sk-ant-x',
|
||||
OPENAI_API_KEY: 'sk-x',
|
||||
GEMINI_API_KEY: 'gem-x',
|
||||
}),
|
||||
).toEqual({ kind: 'anthropic', source: 'ANTHROPIC_API_KEY set' })
|
||||
})
|
||||
|
||||
test('CODEX_API_KEY beats OpenAI/Gemini/etc', () => {
|
||||
expect(
|
||||
scan({
|
||||
CODEX_API_KEY: 'codex-x',
|
||||
OPENAI_API_KEY: 'sk-x',
|
||||
}),
|
||||
).toEqual({ kind: 'codex', source: 'CODEX_API_KEY set' })
|
||||
})
|
||||
|
||||
test('CHATGPT_ACCOUNT_ID alone is enough for Codex', () => {
|
||||
expect(
|
||||
scan({
|
||||
CHATGPT_ACCOUNT_ID: 'acct-123',
|
||||
}),
|
||||
).toEqual({ kind: 'codex', source: 'CHATGPT_ACCOUNT_ID set' })
|
||||
})
|
||||
|
||||
test('Codex auth file on disk is detected without any env', () => {
|
||||
expect(
|
||||
detectProviderFromEnv({ env: {}, hasCodexAuth: () => true }),
|
||||
).toEqual({ kind: 'codex', source: '~/.codex/auth.json present' })
|
||||
})
|
||||
|
||||
test('GITHUB_TOKEN wins over OpenAI', () => {
|
||||
expect(
|
||||
scan({
|
||||
GITHUB_TOKEN: 'ghp-x',
|
||||
OPENAI_API_KEY: 'sk-x',
|
||||
}),
|
||||
).toEqual({ kind: 'github', source: 'GITHUB_TOKEN set (GitHub Copilot)' })
|
||||
})
|
||||
|
||||
test('GH_TOKEN is equivalent to GITHUB_TOKEN', () => {
|
||||
expect(
|
||||
scan({
|
||||
GH_TOKEN: 'ghp-x',
|
||||
}),
|
||||
).toEqual({ kind: 'github', source: 'GH_TOKEN set (GitHub Copilot)' })
|
||||
})
|
||||
|
||||
test('OPENAI_API_KEYS (plural) detected', () => {
|
||||
expect(
|
||||
scan({
|
||||
OPENAI_API_KEYS: 'sk-a,sk-b',
|
||||
}),
|
||||
).toEqual({ kind: 'openai', source: 'OPENAI_API_KEYS set' })
|
||||
})
|
||||
|
||||
test('OPENAI_API_KEY reports baseUrl when set', () => {
|
||||
expect(
|
||||
scan({
|
||||
OPENAI_API_KEY: 'sk-x',
|
||||
OPENAI_BASE_URL: 'https://openrouter.ai/api/v1',
|
||||
}),
|
||||
).toEqual({
|
||||
kind: 'openai',
|
||||
source: 'OPENAI_API_KEY set',
|
||||
baseUrl: 'https://openrouter.ai/api/v1',
|
||||
})
|
||||
})
|
||||
|
||||
test('GEMINI_API_KEY detected', () => {
|
||||
expect(scan({ GEMINI_API_KEY: 'gem-x' })).toEqual({
|
||||
kind: 'gemini',
|
||||
source: 'GEMINI_API_KEY set',
|
||||
})
|
||||
})
|
||||
|
||||
test('GOOGLE_API_KEY also detects Gemini', () => {
|
||||
expect(scan({ GOOGLE_API_KEY: 'gk-x' })).toEqual({
|
||||
kind: 'gemini',
|
||||
source: 'GOOGLE_API_KEY set',
|
||||
})
|
||||
})
|
||||
|
||||
test('MISTRAL_API_KEY detected', () => {
|
||||
expect(scan({ MISTRAL_API_KEY: 'mis-x' })).toEqual({
|
||||
kind: 'mistral',
|
||||
source: 'MISTRAL_API_KEY set',
|
||||
})
|
||||
})
|
||||
|
||||
test('MINIMAX_API_KEY detected', () => {
|
||||
expect(scan({ MINIMAX_API_KEY: 'mm-x' })).toEqual({
|
||||
kind: 'minimax',
|
||||
source: 'MINIMAX_API_KEY set',
|
||||
})
|
||||
})
|
||||
|
||||
test('empty-string values are ignored', () => {
|
||||
expect(
|
||||
scan({
|
||||
ANTHROPIC_API_KEY: '',
|
||||
OPENAI_API_KEY: ' ',
|
||||
GEMINI_API_KEY: 'gem-x',
|
||||
}),
|
||||
).toEqual({ kind: 'gemini', source: 'GEMINI_API_KEY set' })
|
||||
})
|
||||
|
||||
test('no credentials → null', () => {
|
||||
expect(scan({})).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe('detectLocalService', () => {
|
||||
test('returns Ollama when its /api/tags responds ok', async () => {
|
||||
const fetchImpl = (async (input: URL | RequestInfo) => {
|
||||
const url = typeof input === 'string' ? input : (input as URL).toString()
|
||||
if (url.includes(':11434')) {
|
||||
return new Response('{"models":[]}', { status: 200 })
|
||||
}
|
||||
return new Response('', { status: 404 })
|
||||
}) as typeof fetch
|
||||
|
||||
const result = await detectLocalService({
|
||||
env: {},
|
||||
fetchImpl,
|
||||
timeoutMs: 200,
|
||||
})
|
||||
expect(result?.kind).toBe('ollama')
|
||||
expect(result?.baseUrl).toBe('http://localhost:11434')
|
||||
})
|
||||
|
||||
test('Ollama wins over LM Studio even when both are reachable', async () => {
|
||||
const fetchImpl = (async () => new Response('{}', { status: 200 })) as typeof fetch
|
||||
const result = await detectLocalService({
|
||||
env: {},
|
||||
fetchImpl,
|
||||
timeoutMs: 200,
|
||||
})
|
||||
expect(result?.kind).toBe('ollama')
|
||||
})
|
||||
|
||||
test('falls back to LM Studio when Ollama is unreachable', async () => {
|
||||
const fetchImpl = (async (input: URL | RequestInfo) => {
|
||||
const url = typeof input === 'string' ? input : (input as URL).toString()
|
||||
if (url.includes(':1234')) {
|
||||
return new Response('{"data":[]}', { status: 200 })
|
||||
}
|
||||
return new Response('', { status: 404 })
|
||||
}) as typeof fetch
|
||||
|
||||
const result = await detectLocalService({
|
||||
env: {},
|
||||
fetchImpl,
|
||||
timeoutMs: 200,
|
||||
})
|
||||
expect(result?.kind).toBe('lm-studio')
|
||||
expect(result?.baseUrl).toBe('http://localhost:1234')
|
||||
})
|
||||
|
||||
test('returns null when no local services respond', async () => {
|
||||
const fetchImpl = (async () =>
|
||||
new Response('', { status: 500 })) as typeof fetch
|
||||
const result = await detectLocalService({
|
||||
env: {},
|
||||
fetchImpl,
|
||||
timeoutMs: 200,
|
||||
})
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
|
||||
test('honors OLLAMA_BASE_URL override', async () => {
|
||||
const probedUrls: string[] = []
|
||||
const fetchImpl = (async (input: URL | RequestInfo) => {
|
||||
const url = typeof input === 'string' ? input : (input as URL).toString()
|
||||
probedUrls.push(url)
|
||||
return new Response('{"models":[]}', { status: 200 })
|
||||
}) as typeof fetch
|
||||
|
||||
const result = await detectLocalService({
|
||||
env: { OLLAMA_BASE_URL: 'http://10.0.0.5:11434' },
|
||||
fetchImpl,
|
||||
timeoutMs: 200,
|
||||
})
|
||||
expect(result?.baseUrl).toBe('http://10.0.0.5:11434')
|
||||
expect(probedUrls).toContain('http://10.0.0.5:11434/api/tags')
|
||||
})
|
||||
|
||||
test('probe timeout does not throw — returns null', async () => {
|
||||
const fetchImpl = (async (_input: URL | RequestInfo, init?: RequestInit) => {
|
||||
// Respect the caller's abort signal so the race with timeoutMs is fair.
|
||||
return new Promise<Response>((_resolve, reject) => {
|
||||
const onAbort = () => reject(new Error('aborted'))
|
||||
init?.signal?.addEventListener('abort', onAbort)
|
||||
setTimeout(() => {
|
||||
init?.signal?.removeEventListener('abort', onAbort)
|
||||
_resolve(new Response('ok'))
|
||||
}, 500)
|
||||
})
|
||||
}) as typeof fetch
|
||||
|
||||
const result = await detectLocalService({
|
||||
env: {},
|
||||
fetchImpl,
|
||||
timeoutMs: 50,
|
||||
})
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
|
||||
test('network errors do not throw', async () => {
|
||||
const fetchImpl = (async () => {
|
||||
throw new Error('ECONNREFUSED')
|
||||
}) as typeof fetch
|
||||
|
||||
const result = await detectLocalService({
|
||||
env: {},
|
||||
fetchImpl,
|
||||
timeoutMs: 200,
|
||||
})
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe('detectBestProvider — orchestrator', () => {
|
||||
test('env match short-circuits the local probe', async () => {
|
||||
let probeCalled = false
|
||||
const fetchImpl = (async () => {
|
||||
probeCalled = true
|
||||
return new Response('{}', { status: 200 })
|
||||
}) as typeof fetch
|
||||
|
||||
const result = await detectBestProvider({
|
||||
env: { ANTHROPIC_API_KEY: 'sk-ant' },
|
||||
fetchImpl,
|
||||
timeoutMs: 200,
|
||||
hasCodexAuth: () => false,
|
||||
})
|
||||
expect(result?.kind).toBe('anthropic')
|
||||
expect(probeCalled).toBe(false)
|
||||
})
|
||||
|
||||
test('env miss falls through to local-service probe', async () => {
|
||||
const fetchImpl = (async () => new Response('{}', { status: 200 })) as typeof fetch
|
||||
const result = await detectBestProvider({
|
||||
env: {},
|
||||
fetchImpl,
|
||||
timeoutMs: 200,
|
||||
hasCodexAuth: () => false,
|
||||
})
|
||||
expect(result?.kind).toBe('ollama')
|
||||
})
|
||||
|
||||
test('skipLocal prevents network probes', async () => {
|
||||
let probeCalled = false
|
||||
const fetchImpl = (async () => {
|
||||
probeCalled = true
|
||||
return new Response('{}', { status: 200 })
|
||||
}) as typeof fetch
|
||||
|
||||
const result = await detectBestProvider({
|
||||
env: {},
|
||||
fetchImpl,
|
||||
skipLocal: true,
|
||||
hasCodexAuth: () => false,
|
||||
})
|
||||
expect(result).toBeNull()
|
||||
expect(probeCalled).toBe(false)
|
||||
})
|
||||
|
||||
test('completely empty environment returns null', async () => {
|
||||
const fetchImpl = (async () => {
|
||||
throw new Error('nothing reachable')
|
||||
}) as typeof fetch
|
||||
|
||||
const result = await detectBestProvider({
|
||||
env: {},
|
||||
fetchImpl,
|
||||
timeoutMs: 100,
|
||||
hasCodexAuth: () => false,
|
||||
})
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
})
|
||||
283
src/utils/providerAutoDetect.ts
Normal file
283
src/utils/providerAutoDetect.ts
Normal file
@@ -0,0 +1,283 @@
|
||||
/**
|
||||
* Zero-config provider autodetection.
|
||||
*
|
||||
* Scans the environment (API keys, OAuth tokens, stored credentials) and local
|
||||
* network (Ollama, LM Studio) to pick the best provider for first-run users
|
||||
* who have not explicitly configured one. Returns a structured detection
|
||||
* result that callers can consume to build a launch-ready profile env, or
|
||||
* null when nothing is detected — in which case the existing onboarding /
|
||||
* picker flow should take over.
|
||||
*
|
||||
* Detection priority (first match wins):
|
||||
* 1. ANTHROPIC_API_KEY → first-party Claude (most capable default)
|
||||
* 2. Codex: CODEX_API_KEY, CHATGPT_ACCOUNT_ID, or valid ~/.codex/auth.json
|
||||
* 3. GitHub Copilot: GITHUB_TOKEN or GH_TOKEN
|
||||
* 4. OPENAI_API_KEY / OPENAI_API_KEYS
|
||||
* 5. GEMINI_API_KEY or GOOGLE_API_KEY
|
||||
* 6. MISTRAL_API_KEY
|
||||
* 7. MINIMAX_API_KEY
|
||||
* 8. Local Ollama reachable (default localhost:11434)
|
||||
* 9. Local LM Studio reachable (default localhost:1234)
|
||||
*
|
||||
* Local-service probes are parallelized and cheap (short timeout, no
|
||||
* request body). Env scans are synchronous and run first so we don't make
|
||||
* network calls when a credential is already present.
|
||||
*
|
||||
* This module intentionally does NOT decide whether to apply the detection;
|
||||
* callers should gate on hasExplicitProviderSelection() (providerProfile.ts)
|
||||
* and the presence of a persisted profile file.
|
||||
*/
|
||||
|
||||
import { existsSync } from 'fs'
|
||||
import { homedir } from 'os'
|
||||
import { join } from 'path'
|
||||
|
||||
export type DetectedProviderKind =
|
||||
| 'anthropic'
|
||||
| 'codex'
|
||||
| 'github'
|
||||
| 'openai'
|
||||
| 'gemini'
|
||||
| 'mistral'
|
||||
| 'minimax'
|
||||
| 'ollama'
|
||||
| 'lm-studio'
|
||||
|
||||
export type DetectedProvider = {
|
||||
kind: DetectedProviderKind
|
||||
/** One-line human-readable reason, e.g. "ANTHROPIC_API_KEY set". */
|
||||
source: string
|
||||
/** Present when the detection already resolved a usable base URL. */
|
||||
baseUrl?: string
|
||||
/** Present when detection also narrowed down a specific model. */
|
||||
model?: string
|
||||
}
|
||||
|
||||
type EnvLike = NodeJS.ProcessEnv | Record<string, string | undefined>
|
||||
|
||||
function envHasNonEmpty(env: EnvLike, key: string): boolean {
|
||||
const value = env[key]
|
||||
return typeof value === 'string' && value.trim().length > 0
|
||||
}
|
||||
|
||||
function firstSet(env: EnvLike, keys: readonly string[]): string | undefined {
|
||||
for (const key of keys) {
|
||||
if (envHasNonEmpty(env, key)) return key
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
function defaultHasCodexAuthFile(): boolean {
|
||||
const paths = [
|
||||
process.env.CODEX_AUTH_PATH,
|
||||
join(homedir(), '.codex', 'auth.json'),
|
||||
]
|
||||
return paths.some(p => p && existsSync(p))
|
||||
}
|
||||
|
||||
export type DetectProviderFromEnvOptions = {
|
||||
env?: EnvLike
|
||||
/**
|
||||
* Override Codex auth-file detection. Primarily for tests — the default
|
||||
* implementation checks ~/.codex/auth.json and CODEX_AUTH_PATH on disk.
|
||||
*/
|
||||
hasCodexAuth?: () => boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronous env-only scan. Returns the highest-priority env-provided
|
||||
* provider, or null if nothing is present. Intentionally does not touch
|
||||
* the network — fast path for the common case where a user has exported
|
||||
* one of the standard API-key env vars.
|
||||
*/
|
||||
function isOptionsObject(
|
||||
value: EnvLike | DetectProviderFromEnvOptions | undefined,
|
||||
): value is DetectProviderFromEnvOptions {
|
||||
if (!value || typeof value !== 'object') return false
|
||||
if ('hasCodexAuth' in value && typeof value.hasCodexAuth === 'function') {
|
||||
return true
|
||||
}
|
||||
if ('env' in value && typeof (value as { env?: unknown }).env === 'object') {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
export function detectProviderFromEnv(
|
||||
envOrOptions: EnvLike | DetectProviderFromEnvOptions = process.env,
|
||||
): DetectedProvider | null {
|
||||
const options: DetectProviderFromEnvOptions = isOptionsObject(envOrOptions)
|
||||
? envOrOptions
|
||||
: { env: envOrOptions as EnvLike }
|
||||
const env = options.env ?? process.env
|
||||
const hasCodexAuth = options.hasCodexAuth ?? defaultHasCodexAuthFile
|
||||
if (envHasNonEmpty(env, 'ANTHROPIC_API_KEY')) {
|
||||
return { kind: 'anthropic', source: 'ANTHROPIC_API_KEY set' }
|
||||
}
|
||||
|
||||
if (
|
||||
envHasNonEmpty(env, 'CODEX_API_KEY') ||
|
||||
envHasNonEmpty(env, 'CHATGPT_ACCOUNT_ID') ||
|
||||
envHasNonEmpty(env, 'CODEX_ACCOUNT_ID') ||
|
||||
hasCodexAuth()
|
||||
) {
|
||||
const sourceEnv =
|
||||
firstSet(env, ['CODEX_API_KEY', 'CHATGPT_ACCOUNT_ID', 'CODEX_ACCOUNT_ID'])
|
||||
return {
|
||||
kind: 'codex',
|
||||
source: sourceEnv ? `${sourceEnv} set` : '~/.codex/auth.json present',
|
||||
}
|
||||
}
|
||||
|
||||
const githubKey = firstSet(env, ['GITHUB_TOKEN', 'GH_TOKEN'])
|
||||
if (githubKey) {
|
||||
return {
|
||||
kind: 'github',
|
||||
source: `${githubKey} set (GitHub Copilot)`,
|
||||
}
|
||||
}
|
||||
|
||||
const openaiKey = firstSet(env, ['OPENAI_API_KEYS', 'OPENAI_API_KEY'])
|
||||
if (openaiKey) {
|
||||
return {
|
||||
kind: 'openai',
|
||||
source: `${openaiKey} set`,
|
||||
baseUrl: env.OPENAI_BASE_URL ?? env.OPENAI_API_BASE,
|
||||
}
|
||||
}
|
||||
|
||||
const geminiKey = firstSet(env, ['GEMINI_API_KEY', 'GOOGLE_API_KEY'])
|
||||
if (geminiKey) {
|
||||
return { kind: 'gemini', source: `${geminiKey} set` }
|
||||
}
|
||||
|
||||
if (envHasNonEmpty(env, 'MISTRAL_API_KEY')) {
|
||||
return { kind: 'mistral', source: 'MISTRAL_API_KEY set' }
|
||||
}
|
||||
|
||||
if (envHasNonEmpty(env, 'MINIMAX_API_KEY')) {
|
||||
return { kind: 'minimax', source: 'MINIMAX_API_KEY set' }
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
type LocalProbe = {
|
||||
kind: DetectedProviderKind
|
||||
url: string
|
||||
timeoutMs: number
|
||||
source: string
|
||||
baseUrl: string
|
||||
}
|
||||
|
||||
const DEFAULT_LOCAL_PROBE_TIMEOUT_MS = 1200
|
||||
|
||||
async function probeReachable(
|
||||
url: string,
|
||||
timeoutMs: number,
|
||||
fetchImpl: typeof fetch,
|
||||
): Promise<boolean> {
|
||||
const controller = new AbortController()
|
||||
const timer = setTimeout(() => controller.abort(), timeoutMs)
|
||||
try {
|
||||
const response = await fetchImpl(url, {
|
||||
method: 'GET',
|
||||
signal: controller.signal,
|
||||
})
|
||||
return response.ok
|
||||
} catch {
|
||||
return false
|
||||
} finally {
|
||||
clearTimeout(timer)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the highest-priority local service reachable from the host.
|
||||
* Runs probes in parallel and picks by priority rather than first-response,
|
||||
* so slow-but-preferred services still win over fast-but-lower-priority ones.
|
||||
*/
|
||||
export async function detectLocalService(options?: {
|
||||
env?: EnvLike
|
||||
fetchImpl?: typeof fetch
|
||||
timeoutMs?: number
|
||||
}): Promise<DetectedProvider | null> {
|
||||
const env = options?.env ?? process.env
|
||||
const fetchImpl = options?.fetchImpl ?? globalThis.fetch
|
||||
const timeoutMs = options?.timeoutMs ?? DEFAULT_LOCAL_PROBE_TIMEOUT_MS
|
||||
|
||||
const ollamaBase = (env.OLLAMA_BASE_URL ?? 'http://localhost:11434').replace(
|
||||
/\/+$/,
|
||||
'',
|
||||
)
|
||||
const lmStudioBase = (env.LM_STUDIO_BASE_URL ?? 'http://localhost:1234').replace(
|
||||
/\/+$/,
|
||||
'',
|
||||
)
|
||||
|
||||
const probes: LocalProbe[] = [
|
||||
{
|
||||
kind: 'ollama',
|
||||
url: `${ollamaBase}/api/tags`,
|
||||
timeoutMs,
|
||||
source: `Ollama reachable at ${ollamaBase}`,
|
||||
baseUrl: ollamaBase,
|
||||
},
|
||||
{
|
||||
kind: 'lm-studio',
|
||||
url: `${lmStudioBase}/v1/models`,
|
||||
timeoutMs,
|
||||
source: `LM Studio reachable at ${lmStudioBase}`,
|
||||
baseUrl: lmStudioBase,
|
||||
},
|
||||
]
|
||||
|
||||
const results = await Promise.all(
|
||||
probes.map(async probe => ({
|
||||
probe,
|
||||
reachable: await probeReachable(probe.url, probe.timeoutMs, fetchImpl),
|
||||
})),
|
||||
)
|
||||
|
||||
for (const { probe, reachable } of results) {
|
||||
if (reachable) {
|
||||
return {
|
||||
kind: probe.kind,
|
||||
source: probe.source,
|
||||
baseUrl: probe.baseUrl,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Orchestrator: env scan first (sync, free), then local-service probes
|
||||
* (async, ~1-2s worst case) only if nothing was found in env.
|
||||
*/
|
||||
export async function detectBestProvider(options?: {
|
||||
env?: EnvLike
|
||||
fetchImpl?: typeof fetch
|
||||
timeoutMs?: number
|
||||
/** Skip local-service probes — useful for tests or offline smoke checks. */
|
||||
skipLocal?: boolean
|
||||
/** Override for Codex auth-file detection. See detectProviderFromEnv. */
|
||||
hasCodexAuth?: () => boolean
|
||||
}): Promise<DetectedProvider | null> {
|
||||
const env = options?.env ?? process.env
|
||||
|
||||
const fromEnv = detectProviderFromEnv({
|
||||
env,
|
||||
hasCodexAuth: options?.hasCodexAuth,
|
||||
})
|
||||
if (fromEnv) return fromEnv
|
||||
|
||||
if (options?.skipLocal) return null
|
||||
|
||||
return detectLocalService({
|
||||
env,
|
||||
fetchImpl: options?.fetchImpl,
|
||||
timeoutMs: options?.timeoutMs,
|
||||
})
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
import { afterEach, expect, mock, test } from 'bun:test'
|
||||
|
||||
import {
|
||||
getLocalOpenAICompatibleProviderLabel,
|
||||
listOpenAICompatibleModels,
|
||||
} from './providerDiscovery.js'
|
||||
async function loadProviderDiscoveryModule() {
|
||||
// @ts-expect-error cache-busting query string for Bun module mocks
|
||||
return import(`./providerDiscovery.js?ts=${Date.now()}-${Math.random()}`)
|
||||
}
|
||||
|
||||
const originalFetch = globalThis.fetch
|
||||
const originalEnv = {
|
||||
@@ -16,6 +16,8 @@ afterEach(() => {
|
||||
})
|
||||
|
||||
test('lists models from a local openai-compatible /models endpoint', async () => {
|
||||
const { listOpenAICompatibleModels } = await loadProviderDiscoveryModule()
|
||||
|
||||
globalThis.fetch = mock((input, init) => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
expect(url).toBe('http://localhost:1234/v1/models')
|
||||
@@ -47,6 +49,8 @@ test('lists models from a local openai-compatible /models endpoint', async () =>
|
||||
})
|
||||
|
||||
test('returns null when a local openai-compatible /models request fails', async () => {
|
||||
const { listOpenAICompatibleModels } = await loadProviderDiscoveryModule()
|
||||
|
||||
globalThis.fetch = mock(() =>
|
||||
Promise.resolve(new Response('not available', { status: 503 })),
|
||||
) as typeof globalThis.fetch
|
||||
@@ -56,13 +60,19 @@ test('returns null when a local openai-compatible /models request fails', async
|
||||
).resolves.toBeNull()
|
||||
})
|
||||
|
||||
test('detects LM Studio from the default localhost port', () => {
|
||||
test('detects LM Studio from the default localhost port', async () => {
|
||||
const { getLocalOpenAICompatibleProviderLabel } =
|
||||
await loadProviderDiscoveryModule()
|
||||
|
||||
expect(getLocalOpenAICompatibleProviderLabel('http://localhost:1234/v1')).toBe(
|
||||
'LM Studio',
|
||||
)
|
||||
})
|
||||
|
||||
test('detects common local openai-compatible providers by hostname', () => {
|
||||
test('detects common local openai-compatible providers by hostname', async () => {
|
||||
const { getLocalOpenAICompatibleProviderLabel } =
|
||||
await loadProviderDiscoveryModule()
|
||||
|
||||
expect(
|
||||
getLocalOpenAICompatibleProviderLabel('http://localai.local:8080/v1'),
|
||||
).toBe('LocalAI')
|
||||
@@ -71,8 +81,283 @@ test('detects common local openai-compatible providers by hostname', () => {
|
||||
).toBe('vLLM')
|
||||
})
|
||||
|
||||
test('falls back to a generic local openai-compatible label', () => {
|
||||
test('detects Moonshot (Kimi) from api.moonshot.ai hostname', async () => {
|
||||
const { getLocalOpenAICompatibleProviderLabel } =
|
||||
await loadProviderDiscoveryModule()
|
||||
|
||||
expect(
|
||||
getLocalOpenAICompatibleProviderLabel('https://api.moonshot.ai/v1'),
|
||||
).toBe('Moonshot (Kimi)')
|
||||
})
|
||||
|
||||
test('falls back to a generic local openai-compatible label', async () => {
|
||||
const { getLocalOpenAICompatibleProviderLabel } =
|
||||
await loadProviderDiscoveryModule()
|
||||
|
||||
expect(
|
||||
getLocalOpenAICompatibleProviderLabel('http://127.0.0.1:8080/v1'),
|
||||
).toBe('Local OpenAI-compatible')
|
||||
})
|
||||
|
||||
test('ollama generation readiness reports unreachable when tags endpoint is down', async () => {
|
||||
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
const calledUrls: string[] = []
|
||||
globalThis.fetch = mock(input => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
calledUrls.push(url)
|
||||
return Promise.resolve(new Response('not available', { status: 503 }))
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeOllamaGenerationReadiness({
|
||||
baseUrl: 'http://localhost:11434',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
state: 'unreachable',
|
||||
models: [],
|
||||
})
|
||||
|
||||
expect(calledUrls).toEqual([
|
||||
'http://localhost:11434/api/tags',
|
||||
])
|
||||
})
|
||||
|
||||
test('ollama generation readiness reports no models when server is reachable', async () => {
|
||||
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
const calledUrls: string[] = []
|
||||
globalThis.fetch = mock(input => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
calledUrls.push(url)
|
||||
return Promise.resolve(
|
||||
new Response(JSON.stringify({ models: [] }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
}),
|
||||
)
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeOllamaGenerationReadiness({
|
||||
baseUrl: 'http://localhost:11434',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
state: 'no_models',
|
||||
models: [],
|
||||
})
|
||||
|
||||
expect(calledUrls).toEqual([
|
||||
'http://localhost:11434/api/tags',
|
||||
])
|
||||
})
|
||||
|
||||
test('ollama generation readiness reports generation_failed when requested model is missing', async () => {
|
||||
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
const calledUrls: string[] = []
|
||||
globalThis.fetch = mock(input => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
calledUrls.push(url)
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
models: [{ name: 'llama3.1:8b', size: 1024 }],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
),
|
||||
)
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeOllamaGenerationReadiness({
|
||||
baseUrl: 'http://localhost:11434',
|
||||
model: 'qwen2.5-coder:7b',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
state: 'generation_failed',
|
||||
probeModel: 'qwen2.5-coder:7b',
|
||||
detail: 'requested model not installed: qwen2.5-coder:7b',
|
||||
})
|
||||
|
||||
expect(calledUrls).toEqual(['http://localhost:11434/api/tags'])
|
||||
})
|
||||
|
||||
test('ollama generation readiness reports generation failures when chat probe fails', async () => {
|
||||
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
globalThis.fetch = mock(input => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
if (url.endsWith('/api/tags')) {
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
models: [{ name: 'qwen2.5-coder:7b', size: 42 }],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
return Promise.resolve(new Response('model not found', { status: 404 }))
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeOllamaGenerationReadiness({
|
||||
baseUrl: 'http://localhost:11434',
|
||||
model: 'qwen2.5-coder:7b',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
state: 'generation_failed',
|
||||
probeModel: 'qwen2.5-coder:7b',
|
||||
})
|
||||
})
|
||||
|
||||
test('ollama generation readiness reports generation_failed when chat probe returns invalid JSON', async () => {
|
||||
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
globalThis.fetch = mock(input => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
if (url.endsWith('/api/tags')) {
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
models: [{ name: 'llama3.1:8b', size: 1024 }],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
return Promise.resolve(
|
||||
new Response('<html>proxy error</html>', {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'text/html' },
|
||||
}),
|
||||
)
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeOllamaGenerationReadiness({
|
||||
baseUrl: 'http://localhost:11434',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
state: 'generation_failed',
|
||||
probeModel: 'llama3.1:8b',
|
||||
detail: 'invalid JSON response',
|
||||
})
|
||||
})
|
||||
|
||||
test('ollama generation readiness reports ready when chat probe succeeds', async () => {
|
||||
const { probeOllamaGenerationReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
globalThis.fetch = mock(input => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
if (url.endsWith('/api/tags')) {
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
models: [{ name: 'llama3.1:8b', size: 1024 }],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
message: { role: 'assistant', content: 'OK' },
|
||||
done: true,
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
),
|
||||
)
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeOllamaGenerationReadiness({
|
||||
baseUrl: 'http://localhost:11434',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
state: 'ready',
|
||||
probeModel: 'llama3.1:8b',
|
||||
})
|
||||
})
|
||||
|
||||
test('atomic chat readiness reports unreachable when /v1/models is down', async () => {
|
||||
const { probeAtomicChatReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
const calledUrls: string[] = []
|
||||
globalThis.fetch = mock(input => {
|
||||
const url = typeof input === 'string' ? input : input.url
|
||||
calledUrls.push(url)
|
||||
return Promise.resolve(new Response('unavailable', { status: 503 }))
|
||||
}) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeAtomicChatReadiness({ baseUrl: 'http://127.0.0.1:1337' }),
|
||||
).resolves.toEqual({ state: 'unreachable' })
|
||||
|
||||
expect(calledUrls[0]).toBe('http://127.0.0.1:1337/v1/models')
|
||||
})
|
||||
|
||||
test('atomic chat readiness reports no_models when server is reachable but empty', async () => {
|
||||
const { probeAtomicChatReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
globalThis.fetch = mock(() =>
|
||||
Promise.resolve(
|
||||
new Response(JSON.stringify({ data: [] }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
}),
|
||||
),
|
||||
) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeAtomicChatReadiness({ baseUrl: 'http://127.0.0.1:1337' }),
|
||||
).resolves.toEqual({ state: 'no_models' })
|
||||
})
|
||||
|
||||
test('atomic chat readiness returns loaded model ids when ready', async () => {
|
||||
const { probeAtomicChatReadiness } = await loadProviderDiscoveryModule()
|
||||
|
||||
globalThis.fetch = mock(() =>
|
||||
Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{ id: 'Qwen3_5-4B_Q4_K_M' },
|
||||
{ id: 'llama-3.1-8b-instruct' },
|
||||
],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
),
|
||||
),
|
||||
) as typeof globalThis.fetch
|
||||
|
||||
await expect(
|
||||
probeAtomicChatReadiness({ baseUrl: 'http://127.0.0.1:1337' }),
|
||||
).resolves.toEqual({
|
||||
state: 'ready',
|
||||
models: ['Qwen3_5-4B_Q4_K_M', 'llama-3.1-8b-instruct'],
|
||||
})
|
||||
})
|
||||
@@ -4,6 +4,13 @@ import { DEFAULT_OPENAI_BASE_URL } from '../services/api/providerConfig.js'
|
||||
export const DEFAULT_OLLAMA_BASE_URL = 'http://localhost:11434'
|
||||
export const DEFAULT_ATOMIC_CHAT_BASE_URL = 'http://127.0.0.1:1337'
|
||||
|
||||
export type OllamaGenerationReadiness = {
|
||||
state: 'ready' | 'unreachable' | 'no_models' | 'generation_failed'
|
||||
models: OllamaModelDescriptor[]
|
||||
probeModel?: string
|
||||
detail?: string
|
||||
}
|
||||
|
||||
function withTimeoutSignal(timeoutMs: number): {
|
||||
signal: AbortSignal
|
||||
clear: () => void
|
||||
@@ -20,6 +27,83 @@ function trimTrailingSlash(value: string): string {
|
||||
return value.replace(/\/+$/, '')
|
||||
}
|
||||
|
||||
function compactDetail(value: string, maxLength = 180): string {
|
||||
const compact = value.trim().replace(/\s+/g, ' ')
|
||||
if (!compact) {
|
||||
return ''
|
||||
}
|
||||
|
||||
if (compact.length <= maxLength) {
|
||||
return compact
|
||||
}
|
||||
|
||||
return `${compact.slice(0, maxLength)}...`
|
||||
}
|
||||
|
||||
type OllamaTagsPayload = {
|
||||
models?: Array<{
|
||||
name?: string
|
||||
size?: number
|
||||
details?: {
|
||||
family?: string
|
||||
families?: string[]
|
||||
parameter_size?: string
|
||||
quantization_level?: string
|
||||
}
|
||||
}>
|
||||
}
|
||||
|
||||
function normalizeOllamaModels(
|
||||
payload: OllamaTagsPayload,
|
||||
): OllamaModelDescriptor[] {
|
||||
return (payload.models ?? [])
|
||||
.filter(model => Boolean(model.name))
|
||||
.map(model => ({
|
||||
name: model.name!,
|
||||
sizeBytes: typeof model.size === 'number' ? model.size : null,
|
||||
family: model.details?.family ?? null,
|
||||
families: model.details?.families ?? [],
|
||||
parameterSize: model.details?.parameter_size ?? null,
|
||||
quantizationLevel: model.details?.quantization_level ?? null,
|
||||
}))
|
||||
}
|
||||
|
||||
async function fetchOllamaModelsProbe(
|
||||
baseUrl?: string,
|
||||
timeoutMs = 5000,
|
||||
): Promise<{
|
||||
reachable: boolean
|
||||
models: OllamaModelDescriptor[]
|
||||
}> {
|
||||
const { signal, clear } = withTimeoutSignal(timeoutMs)
|
||||
try {
|
||||
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
|
||||
method: 'GET',
|
||||
signal,
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
return {
|
||||
reachable: false,
|
||||
models: [],
|
||||
}
|
||||
}
|
||||
|
||||
const payload = (await response.json().catch(() => ({}))) as OllamaTagsPayload
|
||||
return {
|
||||
reachable: true,
|
||||
models: normalizeOllamaModels(payload),
|
||||
}
|
||||
} catch {
|
||||
return {
|
||||
reachable: false,
|
||||
models: [],
|
||||
}
|
||||
} finally {
|
||||
clear()
|
||||
}
|
||||
}
|
||||
|
||||
export function getOllamaApiBaseUrl(baseUrl?: string): string {
|
||||
const parsed = new URL(
|
||||
baseUrl || process.env.OLLAMA_BASE_URL || DEFAULT_OLLAMA_BASE_URL,
|
||||
@@ -113,6 +197,10 @@ export function getLocalOpenAICompatibleProviderLabel(baseUrl?: string): string
|
||||
if (host.includes('minimax') || haystack.includes('minimax')) {
|
||||
return 'MiniMax'
|
||||
}
|
||||
// Moonshot AI (Kimi) direct API
|
||||
if (host.includes('moonshot') || haystack.includes('moonshot') || haystack.includes('kimi')) {
|
||||
return 'Moonshot (Kimi)'
|
||||
}
|
||||
} catch {
|
||||
// Fall back to the generic label when the base URL is malformed.
|
||||
}
|
||||
@@ -121,61 +209,15 @@ export function getLocalOpenAICompatibleProviderLabel(baseUrl?: string): string
|
||||
}
|
||||
|
||||
export async function hasLocalOllama(baseUrl?: string): Promise<boolean> {
|
||||
const { signal, clear } = withTimeoutSignal(1200)
|
||||
try {
|
||||
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
|
||||
method: 'GET',
|
||||
signal,
|
||||
})
|
||||
return response.ok
|
||||
} catch {
|
||||
return false
|
||||
} finally {
|
||||
clear()
|
||||
}
|
||||
const { reachable } = await fetchOllamaModelsProbe(baseUrl, 1200)
|
||||
return reachable
|
||||
}
|
||||
|
||||
export async function listOllamaModels(
|
||||
baseUrl?: string,
|
||||
): Promise<OllamaModelDescriptor[]> {
|
||||
const { signal, clear } = withTimeoutSignal(5000)
|
||||
try {
|
||||
const response = await fetch(`${getOllamaApiBaseUrl(baseUrl)}/api/tags`, {
|
||||
method: 'GET',
|
||||
signal,
|
||||
})
|
||||
if (!response.ok) {
|
||||
return []
|
||||
}
|
||||
|
||||
const data = (await response.json()) as {
|
||||
models?: Array<{
|
||||
name?: string
|
||||
size?: number
|
||||
details?: {
|
||||
family?: string
|
||||
families?: string[]
|
||||
parameter_size?: string
|
||||
quantization_level?: string
|
||||
}
|
||||
}>
|
||||
}
|
||||
|
||||
return (data.models ?? [])
|
||||
.filter(model => Boolean(model.name))
|
||||
.map(model => ({
|
||||
name: model.name!,
|
||||
sizeBytes: typeof model.size === 'number' ? model.size : null,
|
||||
family: model.details?.family ?? null,
|
||||
families: model.details?.families ?? [],
|
||||
parameterSize: model.details?.parameter_size ?? null,
|
||||
quantizationLevel: model.details?.quantization_level ?? null,
|
||||
}))
|
||||
} catch {
|
||||
return []
|
||||
} finally {
|
||||
clear()
|
||||
}
|
||||
const { models } = await fetchOllamaModelsProbe(baseUrl, 5000)
|
||||
return models
|
||||
}
|
||||
|
||||
export async function listOpenAICompatibleModels(options?: {
|
||||
@@ -260,6 +302,24 @@ export async function listAtomicChatModels(
|
||||
}
|
||||
}
|
||||
|
||||
export type AtomicChatReadiness =
|
||||
| { state: 'unreachable' }
|
||||
| { state: 'no_models' }
|
||||
| { state: 'ready'; models: string[] }
|
||||
|
||||
export async function probeAtomicChatReadiness(options?: {
|
||||
baseUrl?: string
|
||||
}): Promise<AtomicChatReadiness> {
|
||||
if (!(await hasLocalAtomicChat(options?.baseUrl))) {
|
||||
return { state: 'unreachable' }
|
||||
}
|
||||
const models = await listAtomicChatModels(options?.baseUrl)
|
||||
if (models.length === 0) {
|
||||
return { state: 'no_models' }
|
||||
}
|
||||
return { state: 'ready', models }
|
||||
}
|
||||
|
||||
export async function benchmarkOllamaModel(
|
||||
modelName: string,
|
||||
baseUrl?: string,
|
||||
@@ -294,3 +354,106 @@ export async function benchmarkOllamaModel(
|
||||
clear()
|
||||
}
|
||||
}
|
||||
|
||||
export async function probeOllamaGenerationReadiness(options?: {
|
||||
baseUrl?: string
|
||||
model?: string
|
||||
timeoutMs?: number
|
||||
}): Promise<OllamaGenerationReadiness> {
|
||||
const timeoutMs = options?.timeoutMs ?? 8000
|
||||
const { reachable, models } = await fetchOllamaModelsProbe(
|
||||
options?.baseUrl,
|
||||
timeoutMs,
|
||||
)
|
||||
if (!reachable) {
|
||||
return {
|
||||
state: 'unreachable',
|
||||
models: [],
|
||||
}
|
||||
}
|
||||
|
||||
if (models.length === 0) {
|
||||
return {
|
||||
state: 'no_models',
|
||||
models: [],
|
||||
}
|
||||
}
|
||||
|
||||
const requestedModel = options?.model?.trim() || undefined
|
||||
if (requestedModel && !models.some(model => model.name === requestedModel)) {
|
||||
return {
|
||||
state: 'generation_failed',
|
||||
models,
|
||||
probeModel: requestedModel,
|
||||
detail: `requested model not installed: ${requestedModel}`,
|
||||
}
|
||||
}
|
||||
|
||||
const probeModel = requestedModel ?? models[0]!.name
|
||||
const { signal, clear } = withTimeoutSignal(timeoutMs)
|
||||
|
||||
try {
|
||||
const response = await fetch(`${getOllamaApiBaseUrl(options?.baseUrl)}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
signal,
|
||||
body: JSON.stringify({
|
||||
model: probeModel,
|
||||
stream: false,
|
||||
messages: [{ role: 'user', content: 'Reply with OK.' }],
|
||||
options: {
|
||||
temperature: 0,
|
||||
num_predict: 8,
|
||||
},
|
||||
}),
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
const responseBody = await response.text().catch(() => '')
|
||||
const detailSuffix = compactDetail(responseBody)
|
||||
return {
|
||||
state: 'generation_failed',
|
||||
models,
|
||||
probeModel,
|
||||
detail: detailSuffix
|
||||
? `status ${response.status}: ${detailSuffix}`
|
||||
: `status ${response.status}`,
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await response.json()
|
||||
} catch {
|
||||
return {
|
||||
state: 'generation_failed',
|
||||
models,
|
||||
probeModel,
|
||||
detail: 'invalid JSON response',
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
state: 'ready',
|
||||
models,
|
||||
probeModel,
|
||||
}
|
||||
} catch (error) {
|
||||
const detail =
|
||||
error instanceof Error
|
||||
? error.name === 'AbortError'
|
||||
? 'request timed out'
|
||||
: error.message
|
||||
: String(error)
|
||||
|
||||
return {
|
||||
state: 'generation_failed',
|
||||
models,
|
||||
probeModel,
|
||||
detail,
|
||||
}
|
||||
} finally {
|
||||
clear()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -572,31 +572,64 @@ test('buildStartupEnvFromProfile leaves explicit provider selections untouched',
|
||||
assert.equal(env.OPENAI_API_KEY, undefined)
|
||||
})
|
||||
|
||||
test('buildStartupEnvFromProfile lets saved startup profile override profile-managed env', async () => {
|
||||
test('buildStartupEnvFromProfile preserves plural-profile env when the legacy file is stale', async () => {
|
||||
// Regression: a user saves a provider via /provider (plural system).
|
||||
// addProviderProfile does NOT sync the legacy .openclaude-profile.json,
|
||||
// so the legacy file retains whatever it had from an earlier setup (e.g.
|
||||
// OpenAI defaults). At startup, applyActiveProviderProfileFromConfig()
|
||||
// correctly applies the active plural profile (Moonshot) first, marking
|
||||
// env with CLAUDE_CODE_PROVIDER_PROFILE_ENV_APPLIED=1. The legacy-file
|
||||
// load must NOT overwrite that env — it previously did, surfacing as
|
||||
// "banner shows the wrong provider / model".
|
||||
const processEnv = {
|
||||
CLAUDE_CODE_PROVIDER_PROFILE_ENV_APPLIED: '1',
|
||||
CLAUDE_CODE_PROVIDER_PROFILE_ENV_APPLIED_ID: 'saved_ollama',
|
||||
CLAUDE_CODE_PROVIDER_PROFILE_ENV_APPLIED_ID: 'saved_moonshot',
|
||||
CLAUDE_CODE_USE_OPENAI: '1',
|
||||
OPENAI_BASE_URL: 'http://localhost:11434/v1',
|
||||
OPENAI_MODEL: 'llama3.1:8b',
|
||||
OPENAI_BASE_URL: 'https://api.moonshot.ai/v1',
|
||||
OPENAI_MODEL: 'kimi-k2.6',
|
||||
}
|
||||
|
||||
const env = await buildStartupEnvFromProfile({
|
||||
// Stale legacy file — points at SambaNova, but user's active plural
|
||||
// profile is Moonshot and was just applied.
|
||||
persisted: profile('openai', {
|
||||
OPENAI_API_KEY: 'sk-persisted',
|
||||
OPENAI_API_KEY: 'sk-stale',
|
||||
OPENAI_MODEL: 'Meta-Llama-3.1-70B-Instruct',
|
||||
OPENAI_BASE_URL: 'https://api.sambanova.ai/v1',
|
||||
}),
|
||||
processEnv,
|
||||
})
|
||||
|
||||
assert.equal(env, processEnv)
|
||||
assert.equal(env.OPENAI_BASE_URL, 'https://api.moonshot.ai/v1')
|
||||
assert.equal(env.OPENAI_MODEL, 'kimi-k2.6')
|
||||
// Plural markers are retained — downstream code uses them to verify the
|
||||
// env still belongs to the profile it was applied from.
|
||||
assert.equal(env.CLAUDE_CODE_PROVIDER_PROFILE_ENV_APPLIED, '1')
|
||||
assert.equal(env.CLAUDE_CODE_PROVIDER_PROFILE_ENV_APPLIED_ID, 'saved_moonshot')
|
||||
})
|
||||
|
||||
test('buildStartupEnvFromProfile falls back to legacy file when plural system has not applied', async () => {
|
||||
// Counter-example: first-run user with only the legacy file (no plural
|
||||
// active profile yet). The legacy file is the correct source, so the
|
||||
// load must proceed as before.
|
||||
const processEnv = {
|
||||
CLAUDE_CODE_USE_OPENAI: '1',
|
||||
}
|
||||
|
||||
const env = await buildStartupEnvFromProfile({
|
||||
persisted: profile('openai', {
|
||||
OPENAI_API_KEY: 'sk-legacy',
|
||||
OPENAI_MODEL: 'gpt-4o',
|
||||
OPENAI_BASE_URL: 'https://api.openai.com/v1',
|
||||
}),
|
||||
processEnv,
|
||||
})
|
||||
|
||||
assert.notEqual(env, processEnv)
|
||||
assert.equal(env.CLAUDE_CODE_USE_OPENAI, '1')
|
||||
assert.equal(env.OPENAI_API_KEY, 'sk-persisted')
|
||||
assert.equal(env.OPENAI_MODEL, 'Meta-Llama-3.1-70B-Instruct')
|
||||
assert.equal(env.OPENAI_BASE_URL, 'https://api.sambanova.ai/v1')
|
||||
assert.equal(env.CLAUDE_CODE_PROVIDER_PROFILE_ENV_APPLIED, undefined)
|
||||
assert.equal(env.CLAUDE_CODE_PROVIDER_PROFILE_ENV_APPLIED_ID, undefined)
|
||||
assert.equal(env.OPENAI_API_KEY, 'sk-legacy')
|
||||
assert.equal(env.OPENAI_BASE_URL, 'https://api.openai.com/v1')
|
||||
assert.equal(env.OPENAI_MODEL, 'gpt-4o')
|
||||
})
|
||||
|
||||
test('buildStartupEnvFromProfile treats explicit falsey provider flags as user intent', async () => {
|
||||
|
||||
@@ -841,43 +841,35 @@ export async function buildStartupEnvFromProfile(options?: {
|
||||
const processEnv = options?.processEnv ?? process.env
|
||||
const persisted = options?.persisted ?? loadProfileFile()
|
||||
|
||||
// Saved /provider profiles should still win over provider-manager env that was
|
||||
// auto-applied during startup. Only an explicit shell/flag provider selection
|
||||
// should bypass the persisted startup profile.
|
||||
//
|
||||
const profileManagedEnv = processEnv.CLAUDE_CODE_PROVIDER_PROFILE_ENV_APPLIED === '1'
|
||||
|
||||
// If the user explicitly selected a provider via env, allow it to bypass
|
||||
// the persisted profile only when we can prove it was managed by the
|
||||
// persisted profile env itself.
|
||||
// The legacy single-profile file (~/.openclaude-profile.json) is a
|
||||
// first-run / fallback mechanism. The newer plural provider-profile
|
||||
// system (`/provider` presets + activeProviderProfileId in config) is
|
||||
// applied earlier in the bootstrap via applyActiveProviderProfileFromConfig
|
||||
// and signals completion with CLAUDE_CODE_PROVIDER_PROFILE_ENV_APPLIED=1.
|
||||
//
|
||||
// Practically: on initial startup, provider routing env vars can already
|
||||
// be present due to earlier auto-application steps. We should still apply
|
||||
// the persisted profile rather than returning early.
|
||||
// If the plural system has already set env, trust it — do NOT overlay the
|
||||
// legacy file. addProviderProfile() does not sync the legacy file, so a
|
||||
// stale legacy file (e.g. OpenAI defaults from an earlier manual setup)
|
||||
// would otherwise overwrite the correct plural env and surface as the
|
||||
// "banner shows gpt-4o / api.openai.com even though my saved profile is
|
||||
// Moonshot" bug.
|
||||
if (profileManagedEnv) {
|
||||
return processEnv
|
||||
}
|
||||
|
||||
if (!persisted) {
|
||||
return processEnv
|
||||
}
|
||||
|
||||
const launchProcessEnv = profileManagedEnv
|
||||
? (() => {
|
||||
const cleanedEnv = { ...processEnv }
|
||||
for (const key of PROFILE_ENV_KEYS) {
|
||||
delete cleanedEnv[key]
|
||||
}
|
||||
delete cleanedEnv.CLAUDE_CODE_PROVIDER_PROFILE_ENV_APPLIED
|
||||
delete cleanedEnv.CLAUDE_CODE_PROVIDER_PROFILE_ENV_APPLIED_ID
|
||||
return cleanedEnv
|
||||
})()
|
||||
: processEnv
|
||||
|
||||
return buildLaunchEnv({
|
||||
profile: persisted.profile,
|
||||
persisted,
|
||||
goal:
|
||||
options?.goal ??
|
||||
normalizeRecommendationGoal(processEnv.OPENCLAUDE_PROFILE_GOAL),
|
||||
processEnv: launchProcessEnv,
|
||||
processEnv,
|
||||
getOllamaChatBaseUrl:
|
||||
options?.getOllamaChatBaseUrl ?? getOllamaChatBaseUrl,
|
||||
resolveOllamaDefaultModel: options?.resolveOllamaDefaultModel,
|
||||
|
||||
@@ -256,6 +256,83 @@ describe('applyActiveProviderProfileFromConfig', () => {
|
||||
expect(process.env.OPENAI_MODEL).toBe('qwen2.5:3b')
|
||||
})
|
||||
|
||||
test('applies active profile when a bare CLAUDE_CODE_USE_OPENAI flag is stale (no BASE_URL/MODEL)', async () => {
|
||||
// Regression: a leftover `CLAUDE_CODE_USE_OPENAI=1` in the shell with no
|
||||
// paired OPENAI_BASE_URL / OPENAI_MODEL is not a real explicit selection
|
||||
// — it's a stale export. The previous guard treated it as intent and
|
||||
// skipped the saved profile, causing the startup banner to show hardcoded
|
||||
// defaults (gpt-4o @ api.openai.com) instead of the user's active
|
||||
// profile.
|
||||
const { applyActiveProviderProfileFromConfig } =
|
||||
await importFreshProviderProfileModules()
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
delete process.env.OPENAI_BASE_URL
|
||||
delete process.env.OPENAI_API_BASE
|
||||
delete process.env.OPENAI_MODEL
|
||||
|
||||
const applied = applyActiveProviderProfileFromConfig({
|
||||
providerProfiles: [
|
||||
buildProfile({
|
||||
id: 'saved_moonshot',
|
||||
baseUrl: 'https://api.moonshot.ai/v1',
|
||||
model: 'kimi-k2.6',
|
||||
}),
|
||||
],
|
||||
activeProviderProfileId: 'saved_moonshot',
|
||||
} as any)
|
||||
|
||||
expect(applied?.id).toBe('saved_moonshot')
|
||||
expect(process.env.OPENAI_BASE_URL).toBe('https://api.moonshot.ai/v1')
|
||||
expect(process.env.OPENAI_MODEL).toBe('kimi-k2.6')
|
||||
})
|
||||
|
||||
test('still respects complete shell selection with USE flag + BASE_URL', async () => {
|
||||
// Counter-example: when the user really did set both the flag AND a
|
||||
// concrete BASE_URL, that IS explicit intent and wins over the saved
|
||||
// profile. This preserves the original "explicit startup wins" semantic.
|
||||
const { applyActiveProviderProfileFromConfig } =
|
||||
await importFreshProviderProfileModules()
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
process.env.OPENAI_BASE_URL = 'http://192.168.1.1:8080/v1'
|
||||
delete process.env.OPENAI_MODEL
|
||||
|
||||
const applied = applyActiveProviderProfileFromConfig({
|
||||
providerProfiles: [
|
||||
buildProfile({
|
||||
id: 'saved_moonshot',
|
||||
baseUrl: 'https://api.moonshot.ai/v1',
|
||||
model: 'kimi-k2.6',
|
||||
}),
|
||||
],
|
||||
activeProviderProfileId: 'saved_moonshot',
|
||||
} as any)
|
||||
|
||||
expect(applied).toBeUndefined()
|
||||
expect(process.env.OPENAI_BASE_URL).toBe('http://192.168.1.1:8080/v1')
|
||||
})
|
||||
|
||||
test('still respects complete shell selection with USE flag + MODEL', async () => {
|
||||
const { applyActiveProviderProfileFromConfig } =
|
||||
await importFreshProviderProfileModules()
|
||||
process.env.CLAUDE_CODE_USE_OPENAI = '1'
|
||||
process.env.OPENAI_MODEL = 'gpt-4o-mini'
|
||||
delete process.env.OPENAI_BASE_URL
|
||||
|
||||
const applied = applyActiveProviderProfileFromConfig({
|
||||
providerProfiles: [
|
||||
buildProfile({
|
||||
id: 'saved_moonshot',
|
||||
baseUrl: 'https://api.moonshot.ai/v1',
|
||||
model: 'kimi-k2.6',
|
||||
}),
|
||||
],
|
||||
activeProviderProfileId: 'saved_moonshot',
|
||||
} as any)
|
||||
|
||||
expect(applied).toBeUndefined()
|
||||
expect(process.env.OPENAI_MODEL).toBe('gpt-4o-mini')
|
||||
})
|
||||
|
||||
test('does not override explicit startup selection when profile marker is stale', async () => {
|
||||
const { applyActiveProviderProfileFromConfig } =
|
||||
await importFreshProviderProfileModules()
|
||||
@@ -450,6 +527,18 @@ describe('getProviderPresetDefaults', () => {
|
||||
expect(defaults.baseUrl).toBe('http://localhost:11434/v1')
|
||||
expect(defaults.model).toBe('llama3.1:8b')
|
||||
})
|
||||
|
||||
test('atomic-chat preset defaults to a local Atomic Chat endpoint', async () => {
|
||||
const { getProviderPresetDefaults } = await importFreshProviderProfileModules()
|
||||
delete process.env.OPENAI_MODEL
|
||||
|
||||
const defaults = getProviderPresetDefaults('atomic-chat')
|
||||
|
||||
expect(defaults.provider).toBe('openai')
|
||||
expect(defaults.name).toBe('Atomic Chat')
|
||||
expect(defaults.baseUrl).toBe('http://127.0.0.1:1337/v1')
|
||||
expect(defaults.requiresApiKey).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe('setActiveProviderProfile', () => {
|
||||
|
||||
@@ -33,6 +33,7 @@ export type ProviderPreset =
|
||||
| 'custom'
|
||||
| 'nvidia-nim'
|
||||
| 'minimax'
|
||||
| 'atomic-chat'
|
||||
|
||||
export type ProviderProfileInput = {
|
||||
provider?: ProviderProfile['provider']
|
||||
@@ -285,6 +286,15 @@ export function getProviderPresetDefaults(
|
||||
apiKey: process.env.MINIMAX_API_KEY ?? '',
|
||||
requiresApiKey: true,
|
||||
}
|
||||
case 'atomic-chat':
|
||||
return {
|
||||
provider: 'openai',
|
||||
name: 'Atomic Chat',
|
||||
baseUrl: 'http://127.0.0.1:1337/v1',
|
||||
model: process.env.OPENAI_MODEL ?? 'local-model',
|
||||
apiKey: '',
|
||||
requiresApiKey: false,
|
||||
}
|
||||
case 'ollama':
|
||||
default:
|
||||
return {
|
||||
@@ -322,6 +332,58 @@ function hasProviderSelectionFlags(
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A "complete" explicit provider selection = a USE flag AND at least one
|
||||
* concrete config value that tells us WHERE to route (a base URL) or WHAT
|
||||
* to run (a model id). A bare `CLAUDE_CODE_USE_OPENAI=1` with nothing else
|
||||
* is almost always a stale shell export from a previous session, not real
|
||||
* intent — and if we respect it, we skip the user's saved active profile
|
||||
* and fall back to hardcoded defaults (gpt-4o / api.openai.com), which is
|
||||
* the exact bug users report as "my saved provider isn't picked up".
|
||||
*
|
||||
* Used to gate whether saved-profile env should override shell state at
|
||||
* startup. The weaker `hasProviderSelectionFlags` is still used for the
|
||||
* anthropic-profile conflict check (any flag is a conflict for
|
||||
* first-party anthropic) and for alignment fingerprinting.
|
||||
*/
|
||||
function hasCompleteProviderSelection(
|
||||
processEnv: NodeJS.ProcessEnv = process.env,
|
||||
): boolean {
|
||||
if (!hasProviderSelectionFlags(processEnv)) return false
|
||||
if (processEnv.CLAUDE_CODE_USE_OPENAI !== undefined) {
|
||||
return (
|
||||
trimOrUndefined(processEnv.OPENAI_BASE_URL) !== undefined ||
|
||||
trimOrUndefined(processEnv.OPENAI_API_BASE) !== undefined ||
|
||||
trimOrUndefined(processEnv.OPENAI_MODEL) !== undefined
|
||||
)
|
||||
}
|
||||
if (processEnv.CLAUDE_CODE_USE_GEMINI !== undefined) {
|
||||
return (
|
||||
trimOrUndefined(processEnv.GEMINI_BASE_URL) !== undefined ||
|
||||
trimOrUndefined(processEnv.GEMINI_MODEL) !== undefined ||
|
||||
trimOrUndefined(processEnv.GEMINI_API_KEY) !== undefined ||
|
||||
trimOrUndefined(processEnv.GOOGLE_API_KEY) !== undefined
|
||||
)
|
||||
}
|
||||
if (processEnv.CLAUDE_CODE_USE_MISTRAL !== undefined) {
|
||||
return (
|
||||
trimOrUndefined(processEnv.MISTRAL_BASE_URL) !== undefined ||
|
||||
trimOrUndefined(processEnv.MISTRAL_MODEL) !== undefined ||
|
||||
trimOrUndefined(processEnv.MISTRAL_API_KEY) !== undefined
|
||||
)
|
||||
}
|
||||
if (processEnv.CLAUDE_CODE_USE_GITHUB !== undefined) {
|
||||
return (
|
||||
trimOrUndefined(processEnv.GITHUB_TOKEN) !== undefined ||
|
||||
trimOrUndefined(processEnv.GH_TOKEN) !== undefined ||
|
||||
trimOrUndefined(processEnv.OPENAI_MODEL) !== undefined
|
||||
)
|
||||
}
|
||||
// Bedrock / Vertex / Foundry signal cloud-provider routing in env; treat
|
||||
// the flag alone as complete (these paths rely on ambient AWS/GCP creds).
|
||||
return true
|
||||
}
|
||||
|
||||
function hasConflictingProviderFlagsForProfile(
|
||||
processEnv: NodeJS.ProcessEnv,
|
||||
profile: ProviderProfile,
|
||||
@@ -564,9 +626,15 @@ export function applyActiveProviderProfileFromConfig(
|
||||
processEnv[PROFILE_ENV_APPLIED_FLAG] === '1' &&
|
||||
trimOrUndefined(processEnv[PROFILE_ENV_APPLIED_ID]) === activeProfile.id
|
||||
|
||||
if (!options?.force && (hasProviderSelectionFlags(processEnv) || processEnv[PROFILE_ENV_APPLIED_FLAG] === '1')) {
|
||||
if (!options?.force && (hasCompleteProviderSelection(processEnv) || processEnv[PROFILE_ENV_APPLIED_FLAG] === '1')) {
|
||||
// Respect explicit startup provider intent. Auto-heal only when this
|
||||
// exact active profile previously applied the current env.
|
||||
// NOTE: we gate on hasCompleteProviderSelection (flag + concrete config)
|
||||
// rather than hasProviderSelectionFlags alone. A bare CLAUDE_CODE_USE_*=1
|
||||
// with no BASE_URL/MODEL is almost always a stale shell export, not
|
||||
// intent — respecting it would skip the saved profile and fall through
|
||||
// to hardcoded provider defaults, which surfaces as "my saved provider
|
||||
// isn't being picked up at startup".
|
||||
if (!isCurrentEnvProfileManaged) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
@@ -456,10 +456,19 @@ const checkDependencies = memoize((): SandboxDependencyCheck => {
|
||||
})
|
||||
})
|
||||
|
||||
/**
|
||||
* Read sandbox.enabled only from trusted settings sources.
|
||||
* projectSettings is intentionally excluded — a malicious repo could
|
||||
* otherwise disable the sandbox via .claude/settings.json.
|
||||
*/
|
||||
function getSandboxEnabledSetting(): boolean {
|
||||
try {
|
||||
const settings = getSettings_DEPRECATED()
|
||||
return settings?.sandbox?.enabled ?? false
|
||||
return !!(
|
||||
getSettingsForSource('userSettings')?.sandbox?.enabled ||
|
||||
getSettingsForSource('localSettings')?.sandbox?.enabled ||
|
||||
getSettingsForSource('flagSettings')?.sandbox?.enabled ||
|
||||
getSettingsForSource('policySettings')?.sandbox?.enabled
|
||||
)
|
||||
} catch (error) {
|
||||
logForDebugging(`Failed to get settings for sandbox check: ${error}`)
|
||||
return false
|
||||
|
||||
@@ -300,9 +300,9 @@ export function getRelativeSettingsFilePathForSource(
|
||||
): string {
|
||||
switch (source) {
|
||||
case 'projectSettings':
|
||||
return join('.openclaude', 'settings.json')
|
||||
return '.openclaude/settings.json'
|
||||
case 'localSettings':
|
||||
return join('.openclaude', 'settings.local.json')
|
||||
return '.openclaude/settings.local.json'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -207,6 +207,10 @@ export function createPermissionRequest(params: {
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use sendPermissionRequestViaMailbox() instead. This file-based
|
||||
* approach writes to an unauthenticated directory where any local process can
|
||||
* forge requests. Retained for backward compatibility but no longer called.
|
||||
*
|
||||
* Write a permission request to the pending directory with file locking
|
||||
* Called by worker agents when they need permission approval from the leader
|
||||
*
|
||||
@@ -250,6 +254,10 @@ export async function writePermissionRequest(
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated No longer called — permission requests are sent via mailbox.
|
||||
* The pending directory is an unauthenticated channel. Retained for backward
|
||||
* compatibility.
|
||||
*
|
||||
* Read all pending permission requests for a team
|
||||
* Called by the team leader to see what requests need attention
|
||||
*/
|
||||
@@ -312,6 +320,11 @@ export async function readPendingPermissions(
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated No longer called — permission responses are delivered via mailbox
|
||||
* (processMailboxPermissionResponse). The resolved directory is an unauthenticated
|
||||
* channel where any local process can forge approvals. Retained for backward
|
||||
* compatibility.
|
||||
*
|
||||
* Read a resolved permission request by ID
|
||||
* Called by workers to check if their request has been resolved
|
||||
*
|
||||
@@ -352,6 +365,10 @@ export async function readResolvedPermission(
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use sendPermissionResponseViaMailbox() instead. This file-based
|
||||
* approach writes to an unauthenticated directory where any local process can
|
||||
* forge approvals. Retained for backward compatibility but no longer called.
|
||||
*
|
||||
* Resolve a permission request
|
||||
* Called by the team leader (or worker in self-resolution cases)
|
||||
*
|
||||
@@ -536,6 +553,10 @@ export type PermissionResponse = {
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use processMailboxPermissionResponse() via useInboxPoller instead.
|
||||
* File-based polling reads from an unauthenticated directory where any local
|
||||
* process can forge approval files. Retained for backward compatibility.
|
||||
*
|
||||
* Poll for a permission response (worker-side convenience function)
|
||||
* Converts the resolved request into a simpler response format
|
||||
*
|
||||
@@ -564,6 +585,9 @@ export async function pollForResponse(
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated File-based response cleanup is no longer needed — responses are
|
||||
* delivered via mailbox. Retained for backward compatibility.
|
||||
*
|
||||
* Remove a worker's response after processing
|
||||
* This is an alias for deleteResolvedPermission for backward compatibility
|
||||
*/
|
||||
@@ -601,6 +625,9 @@ export function isSwarmWorker(): boolean {
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated File-based resolved permissions are no longer written. Responses
|
||||
* are delivered via mailbox. Retained for backward compatibility.
|
||||
*
|
||||
* Delete a resolved permission file
|
||||
* Called after a worker has processed the resolution
|
||||
*/
|
||||
@@ -635,8 +662,8 @@ export async function deleteResolvedPermission(
|
||||
}
|
||||
|
||||
/**
|
||||
* Submit a permission request (alias for writePermissionRequest)
|
||||
* Provided for backward compatibility with worker integration code
|
||||
* @deprecated Alias for writePermissionRequest, which is itself deprecated.
|
||||
* Use sendPermissionRequestViaMailbox() instead.
|
||||
*/
|
||||
export const submitPermissionRequest = writePermissionRequest
|
||||
|
||||
|
||||
106
src/utils/thinkingTokenExtractor.test.ts
Normal file
106
src/utils/thinkingTokenExtractor.test.ts
Normal file
@@ -0,0 +1,106 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import { ThinkingTokenAnalyzer } from './thinkingTokenExtractor.js'
|
||||
|
||||
describe('ThinkingTokenAnalyzer', () => {
|
||||
describe('extract', () => {
|
||||
it('extracts thinking and output separately', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'Let me think about this...' },
|
||||
{ type: 'text', text: 'Here is my answer.' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = ThinkingTokenAnalyzer.extract(message)
|
||||
|
||||
expect(result.thinking).toBeGreaterThan(0)
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
expect(result.total).toBe(result.thinking + result.output)
|
||||
})
|
||||
|
||||
it('handles no thinking', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [{ type: 'text', text: 'Hello world' }],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = ThinkingTokenAnalyzer.extract(message)
|
||||
|
||||
expect(result.thinking).toBe(0)
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it('handles redacted thinking', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'redacted_thinking', data: '[thinking hidden]' },
|
||||
{ type: 'text', text: 'Answer here.' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = ThinkingTokenAnalyzer.extract(message)
|
||||
|
||||
expect(result.thinking).toBeGreaterThan(0)
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe('analyze', () => {
|
||||
it('calculates percentages', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'Thinking1 Thinking2 Thinking3' },
|
||||
{ type: 'text', text: 'Output1 Output2' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
const analysis = ThinkingTokenAnalyzer.analyze(message)
|
||||
|
||||
expect(analysis.hasThinking).toBe(true)
|
||||
expect(analysis.thinkingPercentage).toBeGreaterThan(0)
|
||||
expect(analysis.outputPercentage).toBeGreaterThan(0)
|
||||
expect(analysis.reasoningComplexity).toBeTruthy()
|
||||
})
|
||||
})
|
||||
|
||||
describe('hasSignificantThinking', () => {
|
||||
it('detects significant thinking', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'x'.repeat(500) },
|
||||
{ type: 'text', text: 'short' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(true)
|
||||
})
|
||||
|
||||
it('rejects minimal thinking', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'a' },
|
||||
{ type: 'text', text: 'much longer output text here with more content' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
expect(ThinkingTokenAnalyzer.hasSignificantThinking(message, 20)).toBe(false)
|
||||
})
|
||||
})
|
||||
})
|
||||
192
src/utils/thinkingTokenExtractor.ts
Normal file
192
src/utils/thinkingTokenExtractor.ts
Normal file
@@ -0,0 +1,192 @@
|
||||
/**
|
||||
* Thinking Token Extractor - Production-grade thinking token analysis
|
||||
*
|
||||
* Extracts and analyzes thinking tokens from assistant messages.
|
||||
* Provides detailed breakdown, statistics, and insights.
|
||||
*/
|
||||
|
||||
import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
|
||||
import { jsonStringify } from './slowOperations.js'
|
||||
import type { AssistantMessage, Message } from '../types/message.js'
|
||||
|
||||
export interface ThinkingBlock {
|
||||
type: 'thinking' | 'redacted_thinking'
|
||||
content: string
|
||||
tokens: number
|
||||
}
|
||||
|
||||
export interface OutputBlock {
|
||||
type: 'text' | 'tool_use'
|
||||
content: string
|
||||
tokens: number
|
||||
}
|
||||
|
||||
export interface ThinkingTokenBreakdown {
|
||||
thinking: number
|
||||
output: number
|
||||
total: number
|
||||
thinkingBlocks: ThinkingBlock[]
|
||||
outputBlocks: OutputBlock[]
|
||||
}
|
||||
|
||||
export interface ThinkingAnalysis {
|
||||
hasThinking: boolean
|
||||
thinkingPercentage: number
|
||||
outputPercentage: number
|
||||
blockCount: number
|
||||
avgThinkingBlockSize: number
|
||||
avgOutputBlockSize: number
|
||||
totalTextLength: number
|
||||
reasoningComplexity: 'low' | 'medium' | 'high'
|
||||
}
|
||||
|
||||
export class ThinkingTokenAnalyzer {
|
||||
/**
|
||||
* Extract detailed thinking vs output breakdown
|
||||
*/
|
||||
static extract(message: AssistantMessage): ThinkingTokenBreakdown {
|
||||
const thinkingBlocks: ThinkingBlock[] = []
|
||||
const outputBlocks: OutputBlock[] = []
|
||||
let thinking = 0
|
||||
let output = 0
|
||||
|
||||
for (const block of message.message.content) {
|
||||
if (block.type === 'thinking') {
|
||||
const tokens = roughTokenCountEstimation(block.thinking)
|
||||
thinking += tokens
|
||||
thinkingBlocks.push({
|
||||
type: 'thinking',
|
||||
content: block.thinking,
|
||||
tokens,
|
||||
})
|
||||
} else if (block.type === 'redacted_thinking') {
|
||||
const tokens = roughTokenCountEstimation(block.data)
|
||||
thinking += tokens
|
||||
thinkingBlocks.push({
|
||||
type: 'redacted_thinking',
|
||||
content: block.data,
|
||||
tokens,
|
||||
})
|
||||
} else if (block.type === 'text') {
|
||||
const tokens = roughTokenCountEstimation(block.text)
|
||||
output += tokens
|
||||
outputBlocks.push({
|
||||
type: 'text',
|
||||
content: block.text,
|
||||
tokens,
|
||||
})
|
||||
} else if (block.type === 'tool_use') {
|
||||
const content = jsonStringify(block.input)
|
||||
const tokens = roughTokenCountEstimation(content)
|
||||
output += tokens
|
||||
outputBlocks.push({
|
||||
type: 'tool_use',
|
||||
content,
|
||||
tokens,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
thinking,
|
||||
output,
|
||||
total: thinking + output,
|
||||
thinkingBlocks,
|
||||
outputBlocks,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple extraction for quick use
|
||||
*/
|
||||
static extractSimple(message: AssistantMessage): ThinkingTokenBreakdown {
|
||||
return this.extract(message)
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze thinking patterns and provide insights
|
||||
*/
|
||||
static analyze(message: AssistantMessage): ThinkingAnalysis {
|
||||
const breakdown = this.extract(message)
|
||||
const { thinking, output, total, thinkingBlocks, outputBlocks } = breakdown
|
||||
|
||||
const hasThinking = thinking > 0
|
||||
const thinkingPercentage = total > 0 ? (thinking / total) * 100 : 0
|
||||
const outputPercentage = total > 0 ? (output / total) * 100 : 0
|
||||
|
||||
const avgThinkingBlockSize = thinkingBlocks.length > 0
|
||||
? thinkingBlocks.reduce((sum, b) => sum + b.tokens, 0) / thinkingBlocks.length
|
||||
: 0
|
||||
|
||||
const avgOutputBlockSize = outputBlocks.length > 0
|
||||
? outputBlocks.reduce((sum, b) => sum + b.tokens, 0) / outputBlocks.length
|
||||
: 0
|
||||
|
||||
const totalTextLength = [...thinkingBlocks, ...outputBlocks].reduce(
|
||||
(sum, b) => sum + b.content.length,
|
||||
0,
|
||||
)
|
||||
|
||||
// Complexity based on thinking percentage and block count
|
||||
let reasoningComplexity: 'low' | 'medium' | 'high' = 'low'
|
||||
if (thinkingPercentage > 30 || thinkingBlocks.length > 5) {
|
||||
reasoningComplexity = 'high'
|
||||
} else if (thinkingPercentage > 10 || thinkingBlocks.length > 2) {
|
||||
reasoningComplexity = 'medium'
|
||||
}
|
||||
|
||||
return {
|
||||
hasThinking,
|
||||
thinkingPercentage: Math.round(thinkingPercentage * 10) / 10,
|
||||
outputPercentage: Math.round(outputPercentage * 10) / 10,
|
||||
blockCount: thinkingBlocks.length + outputBlocks.length,
|
||||
avgThinkingBlockSize: Math.round(avgThinkingBlockSize),
|
||||
avgOutputBlockSize: Math.round(avgOutputBlockSize),
|
||||
totalTextLength,
|
||||
reasoningComplexity,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if message has significant thinking
|
||||
*/
|
||||
static hasSignificantThinking(
|
||||
message: AssistantMessage,
|
||||
thresholdPercent = 20,
|
||||
): boolean {
|
||||
const analysis = this.analyze(message)
|
||||
return analysis.thinkingPercentage >= thresholdPercent
|
||||
}
|
||||
|
||||
/**
|
||||
* Get thinking-only messages from an array
|
||||
*/
|
||||
static filterThinkingMessages(messages: Message[]): AssistantMessage[] {
|
||||
return messages
|
||||
.filter((m): m is AssistantMessage => m.type === 'assistant')
|
||||
.filter(m => this.hasSignificantThinking(m))
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate total thinking tokens across messages
|
||||
*/
|
||||
static totalThinkingTokens(messages: Message[]): number {
|
||||
return messages
|
||||
.filter((m): m is AssistantMessage => m.type === 'assistant')
|
||||
.reduce((sum, m) => sum + this.extract(m).thinking, 0)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Legacy export for backward compatibility
|
||||
*/
|
||||
export function extractThinkingTokens(
|
||||
message: AssistantMessage,
|
||||
): { thinking: number; output: number; total: number } {
|
||||
const result = ThinkingTokenAnalyzer.extract(message)
|
||||
return {
|
||||
thinking: result.thinking,
|
||||
output: result.output,
|
||||
total: result.total,
|
||||
}
|
||||
}
|
||||
69
src/utils/thinkingTokens.test.ts
Normal file
69
src/utils/thinkingTokens.test.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import { extractThinkingTokens } from './tokens.js'
|
||||
|
||||
describe('extractThinkingTokens', () => {
|
||||
it('extracts thinking and output separately', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'thinking', thinking: 'Let me think about this...' },
|
||||
{ type: 'text', text: 'Here is my answer.' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = extractThinkingTokens(message)
|
||||
|
||||
expect(result.thinking).toBeGreaterThan(0)
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
expect(result.total).toBe(result.thinking + result.output)
|
||||
})
|
||||
|
||||
it('handles no thinking', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [{ type: 'text', text: 'Hello world' }],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = extractThinkingTokens(message)
|
||||
|
||||
expect(result.thinking).toBe(0)
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it('handles redacted thinking', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'redacted_thinking', data: '[thinking hidden]' },
|
||||
{ type: 'text', text: 'Answer here.' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = extractThinkingTokens(message)
|
||||
|
||||
expect(result.thinking).toBeGreaterThan(0)
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it('handles tool use', () => {
|
||||
const message = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
content: [
|
||||
{ type: 'tool_use', id: 'tool_1', name: 'bash', input: { cmd: 'echo test' } },
|
||||
{ type: 'text', text: 'Ran command.' },
|
||||
],
|
||||
},
|
||||
} as any
|
||||
|
||||
const result = extractThinkingTokens(message)
|
||||
|
||||
expect(result.output).toBeGreaterThan(0)
|
||||
})
|
||||
})
|
||||
84
src/utils/tokenAnalytics.test.ts
Normal file
84
src/utils/tokenAnalytics.test.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
import { describe, expect, it, beforeEach } from 'bun:test'
|
||||
import { TokenUsageTracker } from './tokenAnalytics.js'
|
||||
|
||||
describe('TokenUsageTracker', () => {
|
||||
let tracker: TokenUsageTracker
|
||||
|
||||
beforeEach(() => {
|
||||
tracker = new TokenUsageTracker(100)
|
||||
})
|
||||
|
||||
it('records token usage', () => {
|
||||
tracker.record({
|
||||
input_tokens: 1000,
|
||||
output_tokens: 500,
|
||||
cache_read_input_tokens: 200,
|
||||
cache_creation_input_tokens: 100,
|
||||
model: 'claude-sonnet-4-5-20250514',
|
||||
})
|
||||
|
||||
expect(tracker.size).toBe(1)
|
||||
})
|
||||
|
||||
it('calculates analytics', () => {
|
||||
tracker.record({
|
||||
input_tokens: 1000,
|
||||
output_tokens: 500,
|
||||
model: 'claude-sonnet-4-5-20250514',
|
||||
})
|
||||
|
||||
tracker.record({
|
||||
input_tokens: 2000,
|
||||
output_tokens: 300,
|
||||
model: 'claude-sonnet-4-5-20250514',
|
||||
})
|
||||
|
||||
const analytics = tracker.getAnalytics()
|
||||
|
||||
expect(analytics.totalRequests).toBe(2)
|
||||
expect(analytics.totalInputTokens).toBe(3000)
|
||||
expect(analytics.totalOutputTokens).toBe(800)
|
||||
expect(analytics.averageInputPerRequest).toBe(1500)
|
||||
expect(analytics.averageOutputPerRequest).toBe(400)
|
||||
})
|
||||
|
||||
it('tracks cache hit rate', () => {
|
||||
tracker.record({
|
||||
input_tokens: 1000,
|
||||
output_tokens: 500,
|
||||
cache_read_input_tokens: 500, // 33% cache
|
||||
model: 'claude-sonnet-4-5-20250514',
|
||||
})
|
||||
|
||||
const analytics = tracker.getAnalytics()
|
||||
|
||||
expect(analytics.cacheHitRate).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it('tracks most used model', () => {
|
||||
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' })
|
||||
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'sonnet' })
|
||||
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'opus' })
|
||||
|
||||
expect(tracker.getAnalytics().mostUsedModel).toBe('sonnet')
|
||||
})
|
||||
|
||||
it('respects max entries limit', () => {
|
||||
const smallTracker = new TokenUsageTracker(3)
|
||||
|
||||
smallTracker.record({ input_tokens: 1, output_tokens: 1, model: 'a' })
|
||||
smallTracker.record({ input_tokens: 2, output_tokens: 2, model: 'b' })
|
||||
smallTracker.record({ input_tokens: 3, output_tokens: 3, model: 'c' })
|
||||
smallTracker.record({ input_tokens: 4, output_tokens: 4, model: 'd' })
|
||||
smallTracker.record({ input_tokens: 5, output_tokens: 5, model: 'e' })
|
||||
|
||||
expect(smallTracker.size).toBe(3)
|
||||
})
|
||||
|
||||
it('clears history', () => {
|
||||
tracker.record({ input_tokens: 1000, output_tokens: 100, model: 'test' })
|
||||
tracker.clear()
|
||||
|
||||
expect(tracker.size).toBe(0)
|
||||
})
|
||||
})
|
||||
211
src/utils/tokenAnalytics.ts
Normal file
211
src/utils/tokenAnalytics.ts
Normal file
@@ -0,0 +1,211 @@
|
||||
/**
|
||||
* Token Analytics - Historical token usage tracking and analysis
|
||||
*
|
||||
* Tracks token usage patterns over time for cost optimization
|
||||
* and capacity planning.
|
||||
*/
|
||||
|
||||
import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
||||
|
||||
export interface TokenUsageEntry {
|
||||
timestamp: number
|
||||
inputTokens: number
|
||||
outputTokens: number
|
||||
cacheReadTokens: number
|
||||
cacheCreationTokens: number
|
||||
model: string
|
||||
}
|
||||
|
||||
export interface TokenAnalytics {
|
||||
totalRequests: number
|
||||
totalInputTokens: number
|
||||
totalOutputTokens: number
|
||||
totalCacheRead: number
|
||||
totalCacheCreation: number
|
||||
averageInputPerRequest: number
|
||||
averageOutputPerRequest: number
|
||||
cacheHitRate: number
|
||||
mostUsedModel: string
|
||||
requestsLastHour: number
|
||||
requestsLastDay: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Historical Token Analytics Tracker
|
||||
*
|
||||
* Tracks token usage patterns over time for analytics,
|
||||
* cost optimization, and capacity planning.
|
||||
*/
|
||||
export class TokenUsageTracker {
|
||||
private history: TokenUsageEntry[] = []
|
||||
private readonly maxEntries: number
|
||||
|
||||
constructor(maxEntries = 1000) {
|
||||
this.maxEntries = maxEntries
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a token usage event from API response.
|
||||
*/
|
||||
record(usage: {
|
||||
input_tokens: number
|
||||
output_tokens: number
|
||||
cache_read_input_tokens?: number
|
||||
cache_creation_input_tokens?: number
|
||||
model: string
|
||||
}): void {
|
||||
const entry: TokenUsageEntry = {
|
||||
timestamp: Date.now(),
|
||||
inputTokens: usage.input_tokens,
|
||||
outputTokens: usage.output_tokens,
|
||||
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
||||
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
||||
model: usage.model,
|
||||
}
|
||||
|
||||
this.history.push(entry)
|
||||
|
||||
if (this.history.length > this.maxEntries) {
|
||||
this.history = this.history.slice(-this.maxEntries)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get analytics summary for all recorded usage.
|
||||
*/
|
||||
getAnalytics(): TokenAnalytics {
|
||||
if (this.history.length === 0) {
|
||||
return {
|
||||
totalRequests: 0,
|
||||
totalInputTokens: 0,
|
||||
totalOutputTokens: 0,
|
||||
totalCacheRead: 0,
|
||||
totalCacheCreation: 0,
|
||||
averageInputPerRequest: 0,
|
||||
averageOutputPerRequest: 0,
|
||||
cacheHitRate: 0,
|
||||
mostUsedModel: 'unknown',
|
||||
requestsLastHour: 0,
|
||||
requestsLastDay: 0,
|
||||
}
|
||||
}
|
||||
|
||||
const now = Date.now()
|
||||
const hourAgo = now - 60 * 60 * 1000
|
||||
const dayAgo = now - 24 * 60 * 60 * 1000
|
||||
|
||||
let totalInput = 0
|
||||
let totalOutput = 0
|
||||
let totalCacheRead = 0
|
||||
let totalCacheCreation = 0
|
||||
const modelCounts = new Map<string, number>()
|
||||
let requestsLastHour = 0
|
||||
let requestsLastDay = 0
|
||||
|
||||
for (const entry of this.history) {
|
||||
totalInput += entry.inputTokens
|
||||
totalOutput += entry.outputTokens
|
||||
totalCacheRead += entry.cacheReadTokens
|
||||
totalCacheCreation += entry.cacheCreationTokens
|
||||
|
||||
modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1)
|
||||
|
||||
if (entry.timestamp >= hourAgo) requestsLastHour++
|
||||
if (entry.timestamp >= dayAgo) requestsLastDay++
|
||||
}
|
||||
|
||||
let mostUsedModel = 'unknown'
|
||||
let maxCount = 0
|
||||
for (const [model, count] of modelCounts) {
|
||||
if (count > maxCount) {
|
||||
maxCount = count
|
||||
mostUsedModel = model
|
||||
}
|
||||
}
|
||||
|
||||
const totalRequests = this.history.length
|
||||
const totalCache = totalCacheRead + totalCacheCreation
|
||||
const totalTokens = totalInput + totalOutput + totalCache
|
||||
const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0
|
||||
|
||||
return {
|
||||
totalRequests,
|
||||
totalInputTokens: totalInput,
|
||||
totalOutputTokens: totalOutput,
|
||||
totalCacheRead,
|
||||
totalCacheCreation,
|
||||
averageInputPerRequest: Math.round(totalInput / totalRequests),
|
||||
averageOutputPerRequest: Math.round(totalOutput / totalRequests),
|
||||
cacheHitRate: Math.round(cacheHitRate),
|
||||
mostUsedModel,
|
||||
requestsLastHour,
|
||||
requestsLastDay,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get recent entries within time window.
|
||||
*/
|
||||
getRecent(windowMs: number): TokenUsageEntry[] {
|
||||
const cutoff = Date.now() - windowMs
|
||||
return this.history.filter(e => e.timestamp >= cutoff)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get entries for a specific model
|
||||
*/
|
||||
getByModel(model: string): TokenUsageEntry[] {
|
||||
return this.history.filter(e => e.model === model)
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate cost estimate (approximate)
|
||||
*/
|
||||
estimateCost(): { input: number; output: number; cache: number } {
|
||||
const analytics = this.getAnalytics()
|
||||
|
||||
// Approximate pricing (adjust as needed)
|
||||
const inputCost = analytics.totalInputTokens * 0.00015
|
||||
const outputCost = analytics.totalOutputTokens * 0.0006
|
||||
const cacheCost = analytics.totalCacheRead * 0.000075
|
||||
|
||||
return {
|
||||
input: Math.round(inputCost * 100) / 100,
|
||||
output: Math.round(outputCost * 100) / 100,
|
||||
cache: Math.round(cacheCost * 100) / 100,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear history.
|
||||
*/
|
||||
clear(): void {
|
||||
this.history = []
|
||||
}
|
||||
|
||||
/**
|
||||
* Get history size.
|
||||
*/
|
||||
get size(): number {
|
||||
return this.history.length
|
||||
}
|
||||
|
||||
/**
|
||||
* Export history as JSON
|
||||
*/
|
||||
export(): string {
|
||||
return JSON.stringify(this.history, null, 2)
|
||||
}
|
||||
|
||||
/**
|
||||
* Import history from JSON
|
||||
*/
|
||||
import(json: string): void {
|
||||
try {
|
||||
const entries = JSON.parse(json) as TokenUsageEntry[]
|
||||
this.history = entries.slice(-this.maxEntries)
|
||||
} catch {
|
||||
// Invalid JSON, ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
||||
import { roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js'
|
||||
import { roughTokenCountEstimation, roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js'
|
||||
import type { AssistantMessage, Message } from '../types/message.js'
|
||||
import { SYNTHETIC_MESSAGES, SYNTHETIC_MODEL } from './messages.js'
|
||||
import { jsonStringify } from './slowOperations.js'
|
||||
@@ -198,6 +198,198 @@ export function getAssistantMessageContentLength(
|
||||
return contentLength
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract thinking tokens from an assistant message.
|
||||
* Returns breakdown of thinking vs output tokens.
|
||||
*/
|
||||
export function extractThinkingTokens(
|
||||
message: AssistantMessage,
|
||||
): { thinking: number; output: number; total: number } {
|
||||
let thinking = 0
|
||||
let output = 0
|
||||
|
||||
for (const block of message.message.content) {
|
||||
if (block.type === 'thinking') {
|
||||
thinking += roughTokenCountEstimation(block.thinking)
|
||||
} else if (block.type === 'redacted_thinking') {
|
||||
thinking += roughTokenCountEstimation(block.data)
|
||||
} else if (block.type === 'text') {
|
||||
output += roughTokenCountEstimation(block.text)
|
||||
} else if (block.type === 'tool_use') {
|
||||
output += roughTokenCountEstimation(jsonStringify(block.input))
|
||||
}
|
||||
}
|
||||
|
||||
return { thinking, output, total: thinking + output }
|
||||
}
|
||||
|
||||
/**
|
||||
* Token usage history entry for tracking patterns over time.
|
||||
*/
|
||||
export interface TokenUsageEntry {
|
||||
timestamp: number
|
||||
inputTokens: number
|
||||
outputTokens: number
|
||||
cacheReadTokens: number
|
||||
cacheCreationTokens: number
|
||||
model: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Token analytics summary from historical data.
|
||||
*/
|
||||
export interface TokenAnalytics {
|
||||
totalRequests: number
|
||||
totalInputTokens: number
|
||||
totalOutputTokens: number
|
||||
totalCacheRead: number
|
||||
totalCacheCreation: number
|
||||
averageInputPerRequest: number
|
||||
averageOutputPerRequest: number
|
||||
cacheHitRate: number
|
||||
mostUsedModel: string
|
||||
requestsLastHour: number
|
||||
requestsLastDay: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Historical Token Analytics Tracker
|
||||
*
|
||||
* Tracks token usage patterns over time for analytics,
|
||||
* cost optimization, and capacity planning.
|
||||
*/
|
||||
export class TokenUsageTracker {
|
||||
private history: TokenUsageEntry[] = []
|
||||
private readonly maxEntries: number
|
||||
|
||||
constructor(maxEntries = 1000) {
|
||||
this.maxEntries = maxEntries
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a token usage event from API response.
|
||||
*/
|
||||
record(usage: {
|
||||
input_tokens: number
|
||||
output_tokens: number
|
||||
cache_read_input_tokens?: number
|
||||
cache_creation_input_tokens?: number
|
||||
model: string
|
||||
}): void {
|
||||
const entry: TokenUsageEntry = {
|
||||
timestamp: Date.now(),
|
||||
inputTokens: usage.input_tokens,
|
||||
outputTokens: usage.output_tokens,
|
||||
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
||||
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
||||
model: usage.model,
|
||||
}
|
||||
|
||||
this.history.push(entry)
|
||||
|
||||
// Trim old entries
|
||||
if (this.history.length > this.maxEntries) {
|
||||
this.history = this.history.slice(-this.maxEntries)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get analytics summary for all recorded usage.
|
||||
*/
|
||||
getAnalytics(): TokenAnalytics {
|
||||
if (this.history.length === 0) {
|
||||
return {
|
||||
totalRequests: 0,
|
||||
totalInputTokens: 0,
|
||||
totalOutputTokens: 0,
|
||||
totalCacheRead: 0,
|
||||
totalCacheCreation: 0,
|
||||
averageInputPerRequest: 0,
|
||||
averageOutputPerRequest: 0,
|
||||
cacheHitRate: 0,
|
||||
mostUsedModel: 'unknown',
|
||||
requestsLastHour: 0,
|
||||
requestsLastDay: 0,
|
||||
}
|
||||
}
|
||||
|
||||
const now = Date.now()
|
||||
const hourAgo = now - 60 * 60 * 1000
|
||||
const dayAgo = now - 24 * 60 * 60 * 1000
|
||||
|
||||
let totalInput = 0
|
||||
let totalOutput = 0
|
||||
let totalCacheRead = 0
|
||||
let totalCacheCreation = 0
|
||||
let modelCounts = new Map<string, number>()
|
||||
let requestsLastHour = 0
|
||||
let requestsLastDay = 0
|
||||
|
||||
for (const entry of this.history) {
|
||||
totalInput += entry.inputTokens
|
||||
totalOutput += entry.outputTokens
|
||||
totalCacheRead += entry.cacheReadTokens
|
||||
totalCacheCreation += entry.cacheCreationTokens
|
||||
|
||||
modelCounts.set(entry.model, (modelCounts.get(entry.model) ?? 0) + 1)
|
||||
|
||||
if (entry.timestamp >= hourAgo) requestsLastHour++
|
||||
if (entry.timestamp >= dayAgo) requestsLastDay++
|
||||
}
|
||||
|
||||
// Find most used model
|
||||
let mostUsedModel = 'unknown'
|
||||
let maxCount = 0
|
||||
for (const [model, count] of modelCounts) {
|
||||
if (count > maxCount) {
|
||||
maxCount = count
|
||||
mostUsedModel = model
|
||||
}
|
||||
}
|
||||
|
||||
const totalRequests = this.history.length
|
||||
const totalCache = totalCacheRead + totalCacheCreation
|
||||
const totalTokens = totalInput + totalOutput + totalCache
|
||||
const cacheHitRate = totalTokens > 0 ? (totalCacheRead / totalTokens) * 100 : 0
|
||||
|
||||
return {
|
||||
totalRequests,
|
||||
totalInputTokens: totalInput,
|
||||
totalOutputTokens: totalOutput,
|
||||
totalCacheRead,
|
||||
totalCacheCreation,
|
||||
averageInputPerRequest: Math.round(totalInput / totalRequests),
|
||||
averageOutputPerRequest: Math.round(totalOutput / totalRequests),
|
||||
cacheHitRate: Math.round(cacheHitRate),
|
||||
mostUsedModel,
|
||||
requestsLastHour,
|
||||
requestsLastDay,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get recent entries within time window.
|
||||
*/
|
||||
getRecent(windowMs: number): TokenUsageEntry[] {
|
||||
const cutoff = Date.now() - windowMs
|
||||
return this.history.filter(e => e.timestamp >= cutoff)
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear history.
|
||||
*/
|
||||
clear(): void {
|
||||
this.history = []
|
||||
}
|
||||
|
||||
/**
|
||||
* Get history size.
|
||||
*/
|
||||
get size(): number {
|
||||
return this.history.length
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current context window size in tokens.
|
||||
*
|
||||
|
||||
38
src/utils/urlRedaction.test.ts
Normal file
38
src/utils/urlRedaction.test.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
|
||||
import { redactUrlForDisplay } from './urlRedaction.ts'
|
||||
|
||||
describe('redactUrlForDisplay', () => {
|
||||
test('redacts credentials and sensitive query params for valid URLs', () => {
|
||||
const redacted = redactUrlForDisplay(
|
||||
'http://user:pass@localhost:11434/v1?api_key=secret&foo=bar',
|
||||
)
|
||||
|
||||
expect(redacted).toBe(
|
||||
'http://redacted:redacted@localhost:11434/v1?api_key=redacted&foo=bar',
|
||||
)
|
||||
})
|
||||
|
||||
test('redacts token-like query parameter names', () => {
|
||||
const redacted = redactUrlForDisplay(
|
||||
'https://example.com/v1?x_access_token=abc123&model=qwen2.5-coder',
|
||||
)
|
||||
|
||||
expect(redacted).toBe(
|
||||
'https://example.com/v1?x_access_token=redacted&model=qwen2.5-coder',
|
||||
)
|
||||
})
|
||||
|
||||
test('falls back to regex redaction for malformed URLs', () => {
|
||||
const redacted = redactUrlForDisplay(
|
||||
'//user:pass@localhost:11434?token=abc&mode=test',
|
||||
)
|
||||
|
||||
expect(redacted).toBe('//redacted@localhost:11434?token=redacted&mode=test')
|
||||
})
|
||||
|
||||
test('keeps non-sensitive URLs unchanged', () => {
|
||||
const url = 'http://localhost:11434/v1?model=llama3.1:8b'
|
||||
expect(redactUrlForDisplay(url)).toBe(url)
|
||||
})
|
||||
})
|
||||
48
src/utils/urlRedaction.ts
Normal file
48
src/utils/urlRedaction.ts
Normal file
@@ -0,0 +1,48 @@
|
||||
const SENSITIVE_URL_QUERY_PARAM_TOKENS = [
|
||||
'api_key',
|
||||
'apikey',
|
||||
'key',
|
||||
'token',
|
||||
'access_token',
|
||||
'refresh_token',
|
||||
'signature',
|
||||
'sig',
|
||||
'secret',
|
||||
'password',
|
||||
'passwd',
|
||||
'pwd',
|
||||
'auth',
|
||||
'authorization',
|
||||
]
|
||||
|
||||
function shouldRedactUrlQueryParam(name: string): boolean {
|
||||
const lower = name.toLowerCase()
|
||||
return SENSITIVE_URL_QUERY_PARAM_TOKENS.some(token => lower.includes(token))
|
||||
}
|
||||
|
||||
export function redactUrlForDisplay(rawUrl: string): string {
|
||||
try {
|
||||
const parsed = new URL(rawUrl)
|
||||
if (parsed.username) {
|
||||
parsed.username = 'redacted'
|
||||
}
|
||||
if (parsed.password) {
|
||||
parsed.password = 'redacted'
|
||||
}
|
||||
|
||||
for (const key of parsed.searchParams.keys()) {
|
||||
if (shouldRedactUrlQueryParam(key)) {
|
||||
parsed.searchParams.set(key, 'redacted')
|
||||
}
|
||||
}
|
||||
|
||||
return parsed.toString()
|
||||
} catch {
|
||||
return rawUrl
|
||||
.replace(/\/\/[^/@\s]+(?::[^/@\s]*)?@/g, '//redacted@')
|
||||
.replace(
|
||||
/([?&](?:token|access_token|refresh_token|api_key|apikey|key|password|passwd|pwd|auth|authorization|signature|sig|secret)=)[^&#]*/gi,
|
||||
'$1redacted',
|
||||
)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user