fix(ripgrep): use @vscode/ripgrep package as the builtin source (#911 ) (#932 )

The vendored-binary lookup at vendor/ripgrep/<arch>-<platform>/rg never resolved in this fork — that directory does not ship — so users without a system rg had no working fallback. Switch to the @vscode/ripgrep package so Microsoft maintains the platform/arch matrix and the binary is delivered via npm. - src/utils/ripgrep.ts: replace hand-rolled vendor-path resolution with rgPath from @vscode/ripgrep. Lazy require so a missing package falls through to the system rg branch instead of throwing at import. Drop builtinExists from the config args; builtinCommand is now a string-or-null. The system override (USE_BUILTIN_RIPGREP=0), the Bun-compiled standalone embedded mode, the macOS codesign hook, and all retry/timeout/error logic are preserved untouched. - scripts/build.ts: mark @vscode/ripgrep as external. The package resolves rgPath via __dirname at runtime, so bundling would freeze the build host's absolute path into dist/cli.mjs. - src/utils/ripgrep.test.ts: update for the new config shape and add tests covering USE_BUILTIN_RIPGREP=0, embedded mode, last-resort fallback, and null builtin path. Tested locally on Linux (Bun 1.3.13). macOS (codesign hook) and Windows (rg.exe extension) need contributor verification.
feat: add streaming token counter (#797 )
2026-04-30 00:58:46 +08:00 · 2026-04-29 16:17:00 +08:00 · 2026-04-29 15:49:46 +08:00 · 2026-04-29 14:53:01 +08:00 · 2026-04-29 10:29:59 +08:00 · 2026-04-28 23:35:25 +08:00
50 changed files with 5572 additions and 103 deletions
--- a/README.md
+++ b/README.md
@@ -25,12 +25,18 @@ OpenClaude is also mirrored to GitLawb:
  <a href="https://bankr.bot">
    <img src="https://bankr.bot/favicon.svg" alt="Bankr.bot logo" width="96">
  </a>
+  &nbsp;&nbsp;&nbsp;&nbsp;
+  <a href="https://atomic.chat/">
+    <img src="docs/assets/atomic-chat-logo.png" alt="Atomic Chat logo" width="96">
+  </a>
 </p>

 <p align="center">
  <a href="https://gitlawb.com"><strong>GitLawb</strong></a>
  &nbsp;&nbsp;&nbsp;&nbsp;
  <a href="https://bankr.bot"><strong>Bankr.bot</strong></a>
+  &nbsp;&nbsp;&nbsp;&nbsp;
+  <a href="https://atomic.chat/"><strong>Atomic Chat</strong></a>
 </p>

 ## Star History
@@ -170,7 +176,7 @@ For best results, use models with strong tool/function calling support.

 OpenClaude can route different agents to different models through settings-based routing. This is useful for cost optimization or splitting work by model strength.

-Add to `~/.claude/settings.json`:
+Add to `~/.openclaude.json`:

 ```json
 {
--- a/bun.lock
+++ b/bun.lock
@@ -28,6 +28,7 @@
        "@opentelemetry/sdk-trace-base": "2.6.1",
        "@opentelemetry/sdk-trace-node": "2.6.1",
        "@opentelemetry/semantic-conventions": "1.40.0",
+        "@vscode/ripgrep": "^1.17.1",
        "ajv": "8.18.0",
        "auto-bind": "5.0.1",
        "axios": "1.15.0",
@@ -461,6 +462,8 @@

    "@types/react": ["@types/react@19.2.14", "", { "dependencies": { "csstype": "^3.2.2" } }, "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w=="],

+    "@vscode/ripgrep": ["@vscode/ripgrep@1.17.1", "", { "dependencies": { "https-proxy-agent": "^7.0.2", "proxy-from-env": "^1.1.0", "yauzl": "^2.9.2" } }, "sha512-xTs7DGyAO3IsJYOCTBP8LnTvPiYVKEuyv8s0xyJDBXfs8rhBfqnZPvb6xDT+RnwWzcXqW27xLS/aGrkjX7lNWw=="],
+
    "accepts": ["accepts@2.0.0", "", { "dependencies": { "mime-types": "^3.0.0", "negotiator": "^1.0.0" } }, "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng=="],

    "agent-base": ["agent-base@7.1.4", "", {}, "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ=="],
@@ -491,6 +494,8 @@

    "bowser": ["bowser@2.14.1", "", {}, "sha512-tzPjzCxygAKWFOJP011oxFHs57HzIhOEracIgAePE4pqB3LikALKnSzUyU4MGs9/iCEUuHlAJTjTc5M+u7YEGg=="],

+    "buffer-crc32": ["buffer-crc32@0.2.13", "", {}, "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ=="],
+
    "buffer-equal-constant-time": ["buffer-equal-constant-time@1.0.1", "", {}, "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA=="],

    "bun-types": ["bun-types@1.3.11", "", { "dependencies": { "@types/node": "*" } }, "sha512-1KGPpoxQWl9f6wcZh57LvrPIInQMn2TQ7jsgxqpRzg+l0QPOFvJVH7HmvHo/AiPgwXy+/Thf6Ov3EdVn1vOabg=="],
@@ -609,6 +614,8 @@

    "fast-xml-parser": ["fast-xml-parser@5.5.8", "", { "dependencies": { "fast-xml-builder": "^1.1.4", "path-expression-matcher": "^1.2.0", "strnum": "^2.2.0" }, "bin": { "fxparser": "src/cli/cli.js" } }, "sha512-Z7Fh2nVQSb2d+poDViM063ix2ZGt9jmY1nWhPfHBOK2Hgnb/OW3P4Et3P/81SEej0J7QbWtJqxO05h8QYfK7LQ=="],

+    "fd-slicer": ["fd-slicer@1.1.0", "", { "dependencies": { "pend": "~1.2.0" } }, "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g=="],
+
    "fflate": ["fflate@0.8.2", "", {}, "sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A=="],

    "figures": ["figures@6.1.0", "", { "dependencies": { "is-unicode-supported": "^2.0.0" } }, "sha512-d+l3qxjSesT4V7v2fh+QnmFnUWv9lSpjarhShNTgBOfA0ttejbQUAlHLitbjkoRiDulW0OPoQPYIGhIC8ohejg=="],
@@ -787,6 +794,8 @@

    "path-to-regexp": ["path-to-regexp@8.4.1", "", {}, "sha512-fvU78fIjZ+SBM9YwCknCvKOUKkLVqtWDVctl0s7xIqfmfb38t2TT4ZU2gHm+Z8xGwgW+QWEU3oQSAzIbo89Ggw=="],

+    "pend": ["pend@1.2.0", "", {}, "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg=="],
+
    "picomatch": ["picomatch@4.0.4", "", {}, "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A=="],

    "pkce-challenge": ["pkce-challenge@5.0.1", "", {}, "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ=="],
@@ -801,7 +810,7 @@

    "proxy-addr": ["proxy-addr@2.0.7", "", { "dependencies": { "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg=="],

-    "proxy-from-env": ["proxy-from-env@2.1.0", "", {}, "sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA=="],
+    "proxy-from-env": ["proxy-from-env@1.1.0", "", {}, "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="],

    "qrcode": ["qrcode@1.5.4", "", { "dependencies": { "dijkstrajs": "^1.0.1", "pngjs": "^5.0.0", "yargs": "^15.3.1" }, "bin": { "qrcode": "bin/qrcode" } }, "sha512-1ca71Zgiu6ORjHqFBDpnSMTR2ReToX4l1Au1VFLyVeBTFavzQnv5JxMFr3ukHVKpSrSA2MCk0lNJSykjUfz7Zg=="],

@@ -953,6 +962,8 @@

    "yargs-parser": ["yargs-parser@21.1.1", "", {}, "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw=="],

+    "yauzl": ["yauzl@2.10.0", "", { "dependencies": { "buffer-crc32": "~0.2.3", "fd-slicer": "~1.1.0" } }, "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g=="],
+
    "yoctocolors": ["yoctocolors@2.1.2", "", {}, "sha512-CzhO+pFNo8ajLM2d2IW/R93ipy99LWjtwblvC1RsoSUMZgyLbYFr221TnSNT7GjGdYui6P459mw9JH/g/zW2ug=="],

    "zod": ["zod@3.25.76", "", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="],
@@ -1369,6 +1380,8 @@

    "@smithy/uuid/tslib": ["tslib@2.8.1", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="],

+    "axios/proxy-from-env": ["proxy-from-env@2.1.0", "", {}, "sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA=="],
+
    "cli-highlight/chalk": ["chalk@4.1.2", "", { "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" } }, "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA=="],

    "cli-highlight/yargs": ["yargs@16.2.0", "", { "dependencies": { "cliui": "^7.0.2", "escalade": "^3.1.1", "get-caller-file": "^2.0.5", "require-directory": "^2.1.1", "string-width": "^4.2.0", "y18n": "^5.0.5", "yargs-parser": "^20.2.2" } }, "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw=="],
@@ -1429,6 +1442,8 @@

    "@aws-sdk/nested-clients/@smithy/util-base64/@smithy/util-buffer-from": ["@smithy/util-buffer-from@4.2.2", "", { "dependencies": { "@smithy/is-array-buffer": "^4.2.2", "tslib": "^2.6.2" } }, "sha512-FDXD7cvUoFWwN6vtQfEta540Y/YBe5JneK3SoZg9bThSoOAC/eGeYEua6RkBgKjGa/sz6Y+DuBZj3+YEY21y4Q=="],

+    "@mendable/firecrawl-js/axios/proxy-from-env": ["proxy-from-env@2.1.0", "", {}, "sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA=="],
+
    "@opentelemetry/exporter-trace-otlp-grpc/@opentelemetry/core/@opentelemetry/semantic-conventions": ["@opentelemetry/semantic-conventions@1.28.0", "", {}, "sha512-lp4qAiMTD4sNWW4DbKLBkfiMZ4jbAboJIGOQr5DvciMRI494OapieI9qiODpOt0XBr1LjIDy1xAGAnVs5supTA=="],

    "@opentelemetry/exporter-trace-otlp-grpc/@opentelemetry/otlp-transformer/@opentelemetry/api-logs": ["@opentelemetry/api-logs@0.57.2", "", { "dependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-uIX52NnTM0iBh84MShlpouI7UKqkZ7MrUszTmaypHBu4r7NofznSnQRfJ+uUeDtQDj6w8eFGg5KBLDAwAPz1+A=="],
@@ -1509,6 +1524,8 @@

    "cliui/wrap-ansi/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="],

+    "firecrawl/axios/proxy-from-env": ["proxy-from-env@2.1.0", "", {}, "sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA=="],
+
    "form-data/mime-types/mime-db": ["mime-db@1.52.0", "", {}, "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="],

    "qrcode/yargs/cliui": ["cliui@6.0.0", "", { "dependencies": { "string-width": "^4.2.0", "strip-ansi": "^6.0.0", "wrap-ansi": "^6.2.0" } }, "sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ=="],
--- a/docs/assets/atomic-chat-logo.png
+++ b/docs/assets/atomic-chat-logo.png
--- a/package.json
+++ b/package.json
@@ -74,6 +74,7 @@
    "@opentelemetry/sdk-trace-base": "2.6.1",
    "@opentelemetry/sdk-trace-node": "2.6.1",
    "@opentelemetry/semantic-conventions": "1.40.0",
+    "@vscode/ripgrep": "^1.17.1",
    "ajv": "8.18.0",
    "auto-bind": "5.0.1",
    "axios": "1.15.0",
--- a/scripts/build.ts
+++ b/scripts/build.ts
@@ -472,6 +472,11 @@ ${exports}
    '@aws-sdk/credential-providers',
    '@azure/identity',
    'google-auth-library',
+    // @vscode/ripgrep ships a platform-specific binary alongside its
+    // index.js and resolves the path via __dirname at runtime. Bundling
+    // would freeze the build host's absolute path into dist/cli.mjs, so we
+    // keep it external and rely on the npm package being installed.
+    '@vscode/ripgrep',
  ],
 })

--- a/src/components/PromptInput/PromptInput.tsx
+++ b/src/components/PromptInput/PromptInput.tsx
@@ -111,7 +111,7 @@ import { BackgroundTasksDialog } from '../tasks/BackgroundTasksDialog.js';
 import { shouldHideTasksFooter } from '../tasks/taskStatusUtils.js';
 import { TeamsDialog } from '../teams/TeamsDialog.js';
 import VimTextInput from '../VimTextInput.js';
-import { getModeFromInput, getValueFromInput } from './inputModes.js';
+import { detectModeEntry, getModeFromInput, getValueFromInput } from './inputModes.js';
 import { FOOTER_TEMPORARY_STATUS_TIMEOUT, Notifications } from './Notifications.js';
 import PromptInputFooter from './PromptInputFooter.js';
 import type { SuggestionItem } from './PromptInputFooterSuggestions.js';
@@ -878,24 +878,22 @@ function PromptInput({
    abortPromptSuggestion();
    abortSpeculation(setAppState);

-    // Check if this is a single character insertion at the start
-    const isSingleCharInsertion = value.length === input.length + 1;
-    const insertedAtStart = cursorOffset === 0;
-    const mode = getModeFromInput(value);
-    if (insertedAtStart && mode !== 'prompt') {
-      if (isSingleCharInsertion) {
-        onModeChange(mode);
-        return;
-      }
-      // Multi-char insertion into empty input (e.g. tab-accepting "! gcloud auth login")
-      if (input.length === 0) {
-        onModeChange(mode);
-        const valueWithoutMode = getValueFromInput(value).replaceAll('\t', '    ');
-        pushToBuffer(input, cursorOffset, pastedContents);
-        trackAndSetInput(valueWithoutMode);
-        setCursorOffset(valueWithoutMode.length);
-        return;
-      }
+    // Strip the mode character from the buffer when entering bash mode — the
+    // mode itself is shown via the prompt prefix in the UI. Without this,
+    // typing `!` into empty input would enter bash mode but leave the literal
+    // `!` in the buffer (issue #662).
+    const modeEntry = detectModeEntry({
+      value,
+      prevInputLength: input.length,
+      cursorOffset,
+    });
+    if (modeEntry) {
+      onModeChange(modeEntry.mode);
+      const cleaned = modeEntry.strippedValue.replaceAll('\t', '    ');
+      pushToBuffer(input, cursorOffset, pastedContents);
+      trackAndSetInput(cleaned);
+      setCursorOffset(cleaned.length);
+      return;
    }
    const processedValue = value.replaceAll('\t', '    ');

--- a/src/components/PromptInput/inputModes.test.ts
+++ b/src/components/PromptInput/inputModes.test.ts
@@ -0,0 +1,104 @@
+import { describe, expect, it } from 'bun:test'
+import {
+  detectModeEntry,
+  getModeFromInput,
+  getValueFromInput,
+  isInputModeCharacter,
+  prependModeCharacterToInput,
+} from './inputModes.js'
+
+describe('inputModes', () => {
+  describe('getModeFromInput', () => {
+    it('returns bash mode for input starting with !', () => {
+      expect(getModeFromInput('!')).toBe('bash')
+      expect(getModeFromInput('!ls')).toBe('bash')
+    })
+
+    it('returns prompt mode for non-bash input', () => {
+      expect(getModeFromInput('')).toBe('prompt')
+      expect(getModeFromInput('hello')).toBe('prompt')
+      expect(getModeFromInput(' !')).toBe('prompt')
+    })
+  })
+
+  describe('getValueFromInput', () => {
+    it('strips the leading ! when entering bash mode', () => {
+      expect(getValueFromInput('!')).toBe('')
+      expect(getValueFromInput('!ls -la')).toBe('ls -la')
+    })
+
+    it('returns input unchanged in prompt mode', () => {
+      expect(getValueFromInput('')).toBe('')
+      expect(getValueFromInput('hello')).toBe('hello')
+    })
+  })
+
+  describe('isInputModeCharacter', () => {
+    it('returns true only for the bare ! character', () => {
+      expect(isInputModeCharacter('!')).toBe(true)
+      expect(isInputModeCharacter('!ls')).toBe(false)
+      expect(isInputModeCharacter('')).toBe(false)
+    })
+  })
+
+  describe('prependModeCharacterToInput', () => {
+    it('prepends ! when mode is bash', () => {
+      expect(prependModeCharacterToInput('ls', 'bash')).toBe('!ls')
+      expect(prependModeCharacterToInput('', 'bash')).toBe('!')
+    })
+
+    it('returns input unchanged in prompt mode', () => {
+      expect(prependModeCharacterToInput('hello', 'prompt')).toBe('hello')
+    })
+  })
+
+  describe('detectModeEntry', () => {
+    // Regression for #662 — typing `!` into empty input must switch to bash
+    // mode AND yield an empty stripped buffer. Before the fix the single-char
+    // path returned without stripping, leaving `!` visible in the buffer.
+    it('strips the mode character when typing ! into empty input', () => {
+      expect(
+        detectModeEntry({ value: '!', prevInputLength: 0, cursorOffset: 0 }),
+      ).toEqual({ mode: 'bash', strippedValue: '' })
+    })
+
+    it('strips the mode character when pasting !cmd into empty input', () => {
+      expect(
+        detectModeEntry({ value: '!ls -la', prevInputLength: 0, cursorOffset: 0 }),
+      ).toEqual({ mode: 'bash', strippedValue: 'ls -la' })
+    })
+
+    it('returns null when the cursor is not at the start', () => {
+      expect(
+        detectModeEntry({ value: '!', prevInputLength: 0, cursorOffset: 1 }),
+      ).toBeNull()
+    })
+
+    it('returns null when the value does not start with !', () => {
+      expect(
+        detectModeEntry({ value: 'hello', prevInputLength: 0, cursorOffset: 0 }),
+      ).toBeNull()
+    })
+
+    it('returns null when typing ! after existing text', () => {
+      // value="ab!" with prevInputLength=2 is a single-char insertion but does
+      // not start with ! — getModeFromInput returns 'prompt'.
+      expect(
+        detectModeEntry({ value: 'ab!', prevInputLength: 2, cursorOffset: 0 }),
+      ).toBeNull()
+    })
+
+    it('returns null when prepending ! to non-empty existing text', () => {
+      // Single-char insertion at start that produces "!ab" from "ab" — value
+      // length is 3, prevInputLength is 2, so isSingleCharInsertion is true
+      // and isMultiCharIntoEmpty is false. We accept the mode change here so
+      // that typing ! at the start of existing text still toggles mode.
+      const result = detectModeEntry({
+        value: '!ab',
+        prevInputLength: 2,
+        cursorOffset: 0,
+      })
+      expect(result).toEqual({ mode: 'bash', strippedValue: 'ab' })
+    })
+  })
+})
--- a/src/components/PromptInput/inputModes.ts
+++ b/src/components/PromptInput/inputModes.ts
@@ -31,3 +31,30 @@ export function getValueFromInput(input: string): string {
 export function isInputModeCharacter(input: string): boolean {
  return input === '!'
 }
+
+export type ModeEntryDecision = {
+  mode: HistoryMode
+  strippedValue: string
+}
+
+/**
+ * Decide whether an onChange `value` should switch the input mode (e.g.
+ * `prompt` → `bash`) and what the stripped buffer value should be.
+ *
+ * Returns null when no mode change applies. Returns a decision otherwise so
+ * callers run a single update path — no separate single-char vs multi-char
+ * branches that can drift apart.
+ */
+export function detectModeEntry(args: {
+  value: string
+  prevInputLength: number
+  cursorOffset: number
+}): ModeEntryDecision | null {
+  if (args.cursorOffset !== 0) return null
+  const mode = getModeFromInput(args.value)
+  if (mode === 'prompt') return null
+  const isSingleCharInsertion = args.value.length === args.prevInputLength + 1
+  const isMultiCharIntoEmpty = args.prevInputLength === 0
+  if (!isSingleCharInsertion && !isMultiCharIntoEmpty) return null
+  return { mode, strippedValue: getValueFromInput(args.value) }
+}
--- a/src/constants/querySource.ts
+++ b/src/constants/querySource.ts
@@ -0,0 +1,7 @@
+/**
+ * Stub — query source enum not included in source snapshot. See
+ * src/types/message.ts for the same scoping caveat (issue #473).
+ */
+
+/* eslint-disable @typescript-eslint/no-explicit-any */
+export type QuerySource = any
--- a/src/entrypoints/agentSdkTypes.ts
+++ b/src/entrypoints/agentSdkTypes.ts
@@ -442,7 +442,84 @@ export async function connectRemoteControl(
  throw new Error('not implemented')
 }

-// add exit reason types for removing the error within gracefulShutdown file 
+// add exit reason types for removing the error within gracefulShutdown file
 export type ExitReason = {
-  
-}
+
+}
+
+// ============================================================================
+// Stub re-exports — types not included in source snapshot.
+//
+// The upstream Anthropic SDK defines these in sub-files (sdk/coreTypes,
+// sdk/runtimeTypes, sdk/controlTypes, sdk/toolTypes) that are stubbed
+// in this open repo. Until the real definitions are restored, alias the
+// names to `any` so callers can resolve their imports and `tsc` becomes
+// actionable. See issue #473 for the typecheck-foundation effort.
+// ============================================================================
+
+/* eslint-disable @typescript-eslint/no-explicit-any */
+export type AnyZodRawShape = any
+export type ApiKeySource = any
+export type AsyncHookJSONOutput = any
+export type ConfigChangeHookInput = any
+export type CwdChangedHookInput = any
+export type ElicitationHookInput = any
+export type ElicitationResultHookInput = any
+export type FileChangedHookInput = any
+export type ForkSessionOptions = any
+export type ForkSessionResult = any
+export type GetSessionInfoOptions = any
+export type GetSessionMessagesOptions = any
+export type HookEvent = any
+export type HookInput = any
+export type HookJSONOutput = any
+export type InferShape<_T> = any
+export type InstructionsLoadedHookInput = any
+export type InternalOptions = any
+export type InternalQuery = any
+export type ListSessionsOptions = any
+export type McpSdkServerConfigWithInstance = any
+export type McpServerConfigForProcessTransport = any
+export type McpServerStatus = any
+export type ModelInfo = any
+export type ModelUsage = any
+export type NotificationHookInput = any
+export type Options = any
+export type PermissionDeniedHookInput = any
+export type PermissionMode = any
+export type PermissionRequestHookInput = any
+export type PermissionResult = any
+export type PermissionUpdate = any
+export type PostCompactHookInput = any
+export type PostToolUseFailureHookInput = any
+export type PostToolUseHookInput = any
+export type PreCompactHookInput = any
+export type PreToolUseHookInput = any
+export type Query = any
+export type RewindFilesResult = any
+export type SDKAssistantMessage = any
+export type SDKAssistantMessageError = any
+export type SDKCompactBoundaryMessage = any
+export type SdkMcpToolDefinition = any
+export type SDKPartialAssistantMessage = any
+export type SDKPermissionDenial = any
+export type SDKRateLimitInfo = any
+export type SDKStatus = any
+export type SDKStatusMessage = any
+export type SDKSystemMessage = any
+export type SDKToolProgressMessage = any
+export type SDKUserMessageReplay = any
+export type SessionEndHookInput = any
+export type SessionMessage = any
+export type SessionMutationOptions = any
+export type SessionStartHookInput = any
+export type SetupHookInput = any
+export type StopFailureHookInput = any
+export type StopHookInput = any
+export type SubagentStartHookInput = any
+export type SubagentStopHookInput = any
+export type SyncHookJSONOutput = any
+export type TaskCompletedHookInput = any
+export type TaskCreatedHookInput = any
+export type TeammateIdleHookInput = any
+export type UserPromptSubmitHookInput = any
--- a/src/entrypoints/sdk.d.ts
+++ b/src/entrypoints/sdk.d.ts
@@ -0,0 +1,518 @@
+// Type declarations for @gitlawb/openclaude SDK
+// Manually maintained — keep in sync with src/entrypoints/sdk/index.ts
+// Drift is caught by validate-externals.ts (runs in CI)
+
+// ============================================================================
+// Error
+// ============================================================================
+
+export class AbortError extends Error {
+  override readonly name: 'AbortError'
+}
+
+export class ClaudeError extends Error {
+  constructor(message: string)
+}
+
+export class SDKError extends ClaudeError {
+  constructor(message: string)
+}
+
+export class SDKAuthenticationError extends SDKError {
+  constructor(message?: string)
+}
+
+export class SDKBillingError extends SDKError {
+  constructor(message?: string)
+}
+
+export class SDKRateLimitError extends SDKError {
+  constructor(
+    message?: string,
+    readonly resetsAt?: number,
+    readonly rateLimitType?: string,
+  )
+}
+
+export class SDKInvalidRequestError extends SDKError {
+  constructor(message?: string)
+}
+
+export class SDKServerError extends SDKError {
+  constructor(message?: string)
+}
+
+export class SDKMaxOutputTokensError extends SDKError {
+  constructor(message?: string)
+}
+
+export type SDKAssistantMessageError =
+  | 'authentication_failed'
+  | 'billing_error'
+  | 'rate_limit'
+  | 'invalid_request'
+  | 'server_error'
+  | 'unknown'
+  | 'max_output_tokens'
+
+export function sdkErrorFromType(
+  errorType: SDKAssistantMessageError,
+  message?: string,
+): SDKError | ClaudeError
+
+// ============================================================================
+// Types
+// ============================================================================
+
+export type ApiKeySource = 'user' | 'project' | 'org' | 'temporary' | 'oauth' | 'none'
+
+export type RewindFilesResult = {
+  canRewind: boolean
+  error?: string
+  filesChanged?: string[]
+  insertions?: number
+  deletions?: number
+}
+
+export type McpServerStatus = {
+  name: string
+  status: 'connected' | 'failed' | 'needs-auth' | 'pending' | 'disabled'
+  serverInfo?: { name: string; version: string }
+  error?: string
+  scope?: string
+  tools?: {
+    name: string
+    description?: string
+    annotations?: {
+      readOnly?: boolean
+      destructive?: boolean
+      openWorld?: boolean
+    }
+  }[]
+}
+
+export type PermissionResult = ({
+  behavior: 'allow'
+  updatedInput?: Record<string, unknown>
+  updatedPermissions?: ({
+    type: 'addRules'
+    rules: { toolName: string; ruleContent?: string }[]
+    behavior: 'allow' | 'deny' | 'ask'
+    destination: 'userSettings' | 'projectSettings' | 'localSettings' | 'session' | 'cliArg'
+  }) | ({
+    type: 'replaceRules'
+    rules: { toolName: string; ruleContent?: string }[]
+    behavior: 'allow' | 'deny' | 'ask'
+    destination: 'userSettings' | 'projectSettings' | 'localSettings' | 'session' | 'cliArg'
+  }) | ({
+    type: 'removeRules'
+    rules: { toolName: string; ruleContent?: string }[]
+    behavior: 'allow' | 'deny' | 'ask'
+    destination: 'userSettings' | 'projectSettings' | 'localSettings' | 'session' | 'cliArg'
+  }) | ({
+    type: 'setMode'
+    mode: 'default' | 'acceptEdits' | 'bypassPermissions' | 'plan' | 'dontAsk'
+    destination: 'userSettings' | 'projectSettings' | 'localSettings' | 'session' | 'cliArg'
+  }) | ({
+    type: 'addDirectories'
+    directories: string[]
+    destination: 'userSettings' | 'projectSettings' | 'localSettings' | 'session' | 'cliArg'
+  }) | ({
+    type: 'removeDirectories'
+    directories: string[]
+    destination: 'userSettings' | 'projectSettings' | 'localSettings' | 'session' | 'cliArg'
+  })[]
+  toolUseID?: string
+  decisionClassification?: 'user_temporary' | 'user_permanent' | 'user_reject'
+}) | ({
+  behavior: 'deny'
+  message: string
+  interrupt?: boolean
+  toolUseID?: string
+  decisionClassification?: 'user_temporary' | 'user_permanent' | 'user_reject'
+})
+
+export type SDKSessionInfo = {
+  sessionId: string
+  summary: string
+  lastModified: number
+  fileSize?: number
+  customTitle?: string
+  firstPrompt?: string
+  gitBranch?: string
+  cwd?: string
+  tag?: string
+  createdAt?: number
+}
+
+export type ListSessionsOptions = {
+  dir?: string
+  limit?: number
+  offset?: number
+  includeWorktrees?: boolean
+}
+
+export type GetSessionInfoOptions = {
+  dir?: string
+}
+
+export type GetSessionMessagesOptions = {
+  dir?: string
+  limit?: number
+  offset?: number
+  includeSystemMessages?: boolean
+}
+
+export type SessionMutationOptions = {
+  dir?: string
+}
+
+export type ForkSessionOptions = {
+  dir?: string
+  upToMessageId?: string
+  title?: string
+}
+
+export type ForkSessionResult = {
+  sessionId: string
+}
+
+export type SessionMessage = {
+  role: 'user' | 'assistant' | 'system'
+  content: unknown
+  timestamp?: string
+  uuid?: string
+  parentUuid?: string | null
+  [key: string]: unknown
+}
+
+// Re-export precise SDK message types from generated types
+// These use camelCase field names and discriminated unions for full IntelliSense
+export type { SDKMessage as SDKMessage } from './sdk/coreTypes.generated.js'
+export type { SDKUserMessage as SDKUserMessage } from './sdk/coreTypes.generated.js'
+export type { SDKResultMessage as SDKResultMessage } from './sdk/coreTypes.generated.js'
+
+// ============================================================================
+// Query types
+// ============================================================================
+
+export type QueryPermissionMode =
+  | 'default'
+  | 'plan'
+  | 'auto-accept'
+  | 'bypass-permissions'
+  | 'bypassPermissions'
+  | 'acceptEdits'
+
+export type QueryOptions = {
+  cwd: string
+  additionalDirectories?: string[]
+  model?: string
+  sessionId?: string
+  /** Fork the session before resuming (requires sessionId). */
+  fork?: boolean
+  /** Alias for fork. When true, resumed session forks to a new session ID. */
+  forkSession?: boolean
+  /** Resume the most recent session for this cwd (no sessionId needed). */
+  continue?: boolean
+  resume?: string
+  /** When resuming, resume messages up to and including this message UUID. */
+  resumeSessionAt?: string
+  permissionMode?: QueryPermissionMode
+  abortController?: AbortController
+  executable?: string
+  allowDangerouslySkipPermissions?: boolean
+  disallowedTools?: string[]
+  hooks?: Record<string, unknown[]>
+  mcpServers?: Record<string, unknown>
+  settings?: {
+    env?: Record<string, string>
+    attribution?: { commit: string; pr: string }
+  }
+  /** Environment variables to apply during query execution. Overrides process.env. Takes precedence over settings.env. */
+  env?: Record<string, string | undefined>
+  /**
+   * Callback invoked before each tool use. Return `{ behavior: 'allow' }` to
+   * permit the call or `{ behavior: 'deny', message?: string }` to reject it.
+   *
+   * **Secure-by-default**: If neither `canUseTool` nor `onPermissionRequest`
+   * is provided, ALL tool uses are denied. You MUST provide at least one of
+   * these callbacks to allow tool execution.
+   */
+  canUseTool?: (
+    name: string,
+    input: unknown,
+    options?: { toolUseID?: string },
+  ) => Promise<{ behavior: 'allow' | 'deny'; message?: string; updatedInput?: unknown }>
+  /**
+   * Callback invoked when a tool needs permission approval. The host receives
+   * the request immediately and can resolve it by calling
+   * `query.respondToPermission(toolUseId, decision)` before the timeout.
+   * If omitted, tools that require permission fall through to the default
+   * permission logic immediately (no timeout).
+   */
+  onPermissionRequest?: (message: SDKPermissionRequestMessage) => void
+  systemPrompt?:
+    | string
+    | { type: 'preset'; preset: string; append?: string }
+    | { type: 'custom'; content: string }
+  /** Agent definitions to register with the query engine. */
+  agents?: Record<string, {
+    description: string
+    prompt: string
+    tools?: string[]
+    disallowedTools?: string[]
+    model?: string
+    maxTurns?: number
+  }>
+  settingSources?: string[]
+  /** When true, yields stream_event messages for token-by-token streaming. */
+  includePartialMessages?: boolean
+  /** @internal Timeout in ms for permission request resolution. Default 30000. */
+  _permissionTimeoutMs?: number
+  stderr?: (data: string) => void
+}
+
+export interface Query {
+  readonly sessionId: string
+  [Symbol.asyncIterator](): AsyncIterator<SDKMessage>
+  setModel(model: string): Promise<void>
+  setPermissionMode(mode: QueryPermissionMode): Promise<void>
+  close(): void
+  interrupt(): void
+  respondToPermission(toolUseId: string, decision: PermissionResult): void
+  /** Check if file rewind is possible. */
+  rewindFiles(): RewindFilesResult
+  /** Actually perform the file rewind. Returns files changed and diff stats. */
+  rewindFilesAsync(): Promise<RewindFilesResult>
+  supportedCommands(): string[]
+  supportedModels(): string[]
+  supportedAgents(): string[]
+  mcpServerStatus(): McpServerStatus[]
+  accountInfo(): Promise<{ apiKeySource: ApiKeySource; [key: string]: unknown }>
+  setMaxThinkingTokens(tokens: number): void
+}
+
+/**
+ * Permission request message emitted when a tool needs permission approval.
+ * Hosts can respond via respondToPermission() using the request_id.
+ */
+export type SDKPermissionRequestMessage = {
+  type: 'permission_request'
+  request_id: string
+  tool_name: string
+  tool_use_id: string
+  input: Record<string, unknown>
+  uuid: string
+  session_id: string
+}
+
+export type SDKPermissionTimeoutMessage = {
+  type: 'permission_timeout'
+  tool_name: string
+  tool_use_id: string
+  timed_out_after_ms: number
+  uuid: string
+  session_id: string
+}
+
+// ============================================================================
+// V2 API types
+// ============================================================================
+
+export type SDKSessionOptions = {
+  cwd: string
+  model?: string
+  permissionMode?: QueryPermissionMode
+  abortController?: AbortController
+  /**
+   * Callback invoked before each tool use. Return `{ behavior: 'allow' }` to
+   * permit the call or `{ behavior: 'deny', message?: string }` to reject it.
+   *
+   * **Secure-by-default**: If neither `canUseTool` nor `onPermissionRequest`
+   * is provided, ALL tool uses are denied. You MUST provide at least one of
+   * these callbacks to allow tool execution.
+   */
+  canUseTool?: (
+    name: string,
+    input: unknown,
+    options?: { toolUseID?: string },
+  ) => Promise<{ behavior: 'allow' | 'deny'; message?: string; updatedInput?: unknown }>
+  /** MCP server configurations for this session. */
+  mcpServers?: Record<string, unknown>
+  /**
+   * Callback invoked when a tool needs permission approval. The host receives
+   * the request immediately and can resolve it via respondToPermission().
+   */
+  onPermissionRequest?: (message: SDKPermissionRequestMessage) => void
+}
+
+export interface SDKSession {
+  sessionId: string
+  sendMessage(content: string): AsyncIterable<SDKMessage>
+  getMessages(): SDKMessage[]
+  interrupt(): void
+  /** Respond to a pending permission prompt. */
+  respondToPermission(toolUseId: string, decision: PermissionResult): void
+}
+
+// ============================================================================
+// MCP tool types
+// ============================================================================
+
+export interface SdkMcpToolDefinition<Schema = any> {
+  name: string
+  description: string
+  inputSchema: Schema
+  handler: (args: any, extra: unknown) => Promise<any>
+  annotations?: any
+  searchHint?: string
+  alwaysLoad?: boolean
+}
+
+// ============================================================================
+// Session functions
+// ============================================================================
+
+export function listSessions(
+  options?: ListSessionsOptions,
+): Promise<SDKSessionInfo[]>
+
+export function getSessionInfo(
+  sessionId: string,
+  options?: GetSessionInfoOptions,
+): Promise<SDKSessionInfo | undefined>
+
+export function getSessionMessages(
+  sessionId: string,
+  options?: GetSessionMessagesOptions,
+): Promise<SessionMessage[]>
+
+export function renameSession(
+  sessionId: string,
+  title: string,
+  options?: SessionMutationOptions,
+): Promise<void>
+
+export function tagSession(
+  sessionId: string,
+  tag: string | null,
+  options?: SessionMutationOptions,
+): Promise<void>
+
+export function forkSession(
+  sessionId: string,
+  options?: ForkSessionOptions,
+): Promise<ForkSessionResult>
+
+export function deleteSession(
+  sessionId: string,
+  options?: SessionMutationOptions,
+): Promise<void>
+
+// ============================================================================
+// Query functions
+// ============================================================================
+
+export function query(params: {
+  prompt: string | AsyncIterable<SDKUserMessage>
+  options?: QueryOptions
+}): Query
+
+export function queryAsync(params: {
+  prompt: string | AsyncIterable<SDKUserMessage>
+  options?: QueryOptions
+}): Promise<Query>
+
+// ============================================================================
+// V2 API functions
+// ============================================================================
+
+export function unstable_v2_createSession(options: SDKSessionOptions): SDKSession
+
+export function unstable_v2_resumeSession(
+  sessionId: string,
+  options: SDKSessionOptions,
+): Promise<SDKSession>
+
+export function unstable_v2_prompt(
+  message: string,
+  options: SDKSessionOptions,
+): Promise<SDKResultMessage>
+
+// ============================================================================
+// MCP tool functions
+// ============================================================================
+
+export function tool<Schema = any>(
+  name: string,
+  description: string,
+  inputSchema: Schema,
+  handler: (args: any, extra: unknown) => Promise<any>,
+  extras?: {
+    annotations?: any
+    searchHint?: string
+    alwaysLoad?: boolean
+  },
+): SdkMcpToolDefinition<Schema>
+
+/**
+ * MCP server transport configuration types.
+ * Matches McpServerConfigForProcessTransport from coreTypes.generated.ts.
+ */
+export type SdkMcpStdioConfig = {
+  type?: "stdio"
+  command: string
+  args?: string[]
+  env?: Record<string, string>
+}
+
+export type SdkMcpSSEConfig = {
+  type: "sse"
+  url: string
+  headers?: Record<string, string>
+}
+
+export type SdkMcpHttpConfig = {
+  type: "http"
+  url: string
+  headers?: Record<string, string>
+}
+
+export type SdkMcpSdkConfig = {
+  type: "sdk"
+  name: string
+}
+
+export type SdkMcpServerConfig = SdkMcpStdioConfig | SdkMcpSSEConfig | SdkMcpHttpConfig | SdkMcpSdkConfig
+
+/**
+ * Scoped MCP server config with session scope.
+ * Returned by createSdkMcpServer() for use with mcpServers option.
+ */
+export type SdkScopedMcpServerConfig = SdkMcpServerConfig & {
+  scope: "session"
+}
+
+/**
+ * Wraps an MCP server configuration for use with the SDK.
+ * Adds the 'session' scope marker so the SDK knows this server
+ * should be connected per-session (not globally).
+ *
+ * @param config - MCP server config (stdio, sse, http, or sdk type)
+ * @returns Scoped config with scope: 'session' added
+ *
+ * @example
+ * ```typescript
+ * const server = createSdkMcpServer({
+ *   type: 'stdio',
+ *   command: 'npx',
+ *   args: ['-y', '@modelcontextprotocol/server-filesystem', '/tmp'],
+ * })
+ * const session = unstable_v2_createSession({
+ *   cwd: '/my/project',
+ *   mcpServers: { 'fs': server },
+ * })
+ * ```
+ */
+export function createSdkMcpServer(config: SdkMcpServerConfig): SdkScopedMcpServerConfig
--- a/src/entrypoints/sdk/controlTypes.ts
+++ b/src/entrypoints/sdk/controlTypes.ts
@@ -0,0 +1,10 @@
+/**
+ * Stub — control protocol types not included in source snapshot. See
+ * src/types/message.ts for the same scoping caveat (issue #473).
+ */
+
+/* eslint-disable @typescript-eslint/no-explicit-any */
+export type SDKControlRequest = any
+export type SDKControlResponse = any
+export type SDKControlPermissionRequest = any
+export type StdoutMessage = any
--- a/src/entrypoints/sdk/coreSchemas.ts
+++ b/src/entrypoints/sdk/coreSchemas.ts
@@ -55,7 +55,7 @@ export const OutputFormatSchema = lazySchema(() =>
 // ============================================================================

 export const ApiKeySourceSchema = lazySchema(() =>
-  z.enum(['user', 'project', 'org', 'temporary', 'oauth']),
+  z.enum(['user', 'project', 'org', 'temporary', 'oauth', 'none']),
 )

 export const ConfigScopeSchema = lazySchema(() =>
@@ -1851,6 +1851,18 @@ export const SDKSessionInfoSchema = lazySchema(() =>
    .describe('Session metadata returned by listSessions and getSessionInfo.'),
 )

+export const SDKPermissionRequestMessageSchema = lazySchema(() =>
+  z.object({
+    type: z.literal('permission_request'),
+    request_id: z.string().describe('Unique request ID for this permission prompt'),
+    tool_name: z.string().describe('Name of the tool requesting permission'),
+    tool_use_id: z.string().describe('Tool use ID for matching with respondToPermission'),
+    input: z.record(z.string(), z.unknown()).describe('Tool input parameters'),
+    uuid: UUIDPlaceholder(),
+    session_id: z.string(),
+  }),
+)
+
 export const SDKMessageSchema = lazySchema(() =>
  z.union([
    SDKAssistantMessageSchema(),
@@ -1877,6 +1889,7 @@ export const SDKMessageSchema = lazySchema(() =>
    SDKRateLimitEventSchema(),
    SDKElicitationCompleteMessageSchema(),
    SDKPromptSuggestionMessageSchema(),
+    SDKPermissionRequestMessageSchema(),
  ]),
 )

--- a/src/entrypoints/sdk/coreTypes.generated.ts
+++ b/src/entrypoints/sdk/coreTypes.generated.ts
--- a/src/global.d.ts
+++ b/src/global.d.ts
@@ -0,0 +1,16 @@
+/**
+ * Build-time globals replaced by the bundler at build time.
+ *
+ * `scripts/build.ts` substitutes these via Bun's `define` option, so at
+ * runtime the references are inlined as string literals. This declaration
+ * exists only to make `tsc --noEmit` aware of them — without it, every
+ * `MACRO.*` access fires TS2304 "Cannot find name 'MACRO'".
+ */
+declare const MACRO: {
+  VERSION: string
+  DISPLAY_VERSION: string
+  BUILD_TIME: string
+  ISSUES_EXPLAINER: string
+  PACKAGE_URL: string
+  NATIVE_PACKAGE_URL: string | undefined
+}
--- a/src/query.ts
+++ b/src/query.ts
@@ -79,6 +79,7 @@ import { headlessProfilerCheckpoint } from './utils/headlessProfiler.js'
 import {
  getDefaultMainLoopModelSetting,
  getRuntimeMainLoopModel,
+  parseUserSpecifiedModel,
  renderModelName,
 } from './utils/model/model.js'
 import {
@@ -624,7 +625,7 @@ async function* queryLoop(
      getDefaultMainLoopModelSetting()
    let currentModel = getRuntimeMainLoopModel({
      permissionMode,
-      mainLoopModel: appStateMainLoopModel,
+      mainLoopModel: parseUserSpecifiedModel(appStateMainLoopModel),
      exceeds200kTokens:
        permissionMode === 'plan' &&
        doesMostRecentAssistantMessageExceed200k(messagesForQuery),
--- a/src/services/api/claude.ts
+++ b/src/services/api/claude.ts
@@ -1283,6 +1283,21 @@ async function* queryModel(
  let messagesForAPI = normalizeMessagesForAPI(messages, filteredTools)
  queryCheckpoint('query_message_normalization_end')

+  // Apply hybrid context strategy for optimal cache/fresh balance
+  if (feature('HYBRID_CONTEXT_STRATEGY')) {
+    const { applyHybridStrategy } = await import('../../utils/hybridContextStrategy.js')
+    // Cap at 200k to avoid edge case with very large context windows
+    const strategyResult = applyHybridStrategy(messagesForAPI, {
+      cacheWeight: 0.4,
+      freshWeight: 0.6,
+      maxTotalTokens: Math.min(
+        getContextWindowForModel(model, getSdkBetas()) - COMPACT_MAX_OUTPUT_TOKENS,
+        200000
+      ),
+    })
+    messagesForAPI = strategyResult.selectedMessages
+  }
+
  // Model-specific post-processing: strip tool-search-specific fields if the
  // selected model doesn't support tool search.
  //
--- a/src/services/api/codexShim.ts
+++ b/src/services/api/codexShim.ts
@@ -2,6 +2,7 @@ import { APIError } from '@anthropic-ai/sdk'
 import { buildAnthropicUsageFromRawUsage } from './cacheMetrics.js'
 import { compressToolHistory } from './compressToolHistory.js'
 import { fetchWithProxyRetry } from './fetchWithProxyRetry.js'
+import { stableStringify } from '../../utils/stableStringify.js'
 import type {
  ResolvedCodexCredentials,
  ResolvedProviderRequest,
@@ -559,7 +560,9 @@ export async function performCodexRequest(options: {
    {
      method: 'POST',
      headers,
-      body: JSON.stringify(body),
+      // WHY: byte-identity required for implicit prefix caching on
+      // OpenAI Responses API. See src/utils/stableStringify.ts.
+      body: stableStringify(body),
      signal: options.signal,
    },
  )
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -74,7 +74,12 @@ import {
  hasToolFieldMapping,
 } from './toolArgumentNormalization.js'
 import { logApiCallStart, logApiCallEnd } from '../../utils/requestLogging.js'
-import { createStreamState, processStreamChunk, getStreamStats } from '../../utils/streamingOptimizer.js'
+import {
+  createStreamState,
+  processStreamChunk,
+  getStreamStats,
+} from '../../utils/streamingOptimizer.js'
+import { stableStringify } from '../../utils/stableStringify.js'

 type SecretValueSource = Partial<{
  OPENAI_API_KEY: string
@@ -1852,12 +1857,17 @@ class OpenAIShimMessages {
      return false
    }

-    let serializedBody = JSON.stringify(
+    // WHY: byte-identity required for implicit prefix caching in
+    // OpenAI/Kimi/DeepSeek. stableStringify sorts object keys at every
+    // depth so spurious insertion-order differences across rebuilds of
+    // `body` (spread-merge, conditional assignments above) don't bust
+    // the provider's prefix hash.
+    let serializedBody = stableStringify(
      request.transport === 'responses' ? buildResponsesBody() : body,
    )

    const refreshSerializedBody = (): void => {
-      serializedBody = JSON.stringify(
+      serializedBody = stableStringify(
        request.transport === 'responses' ? buildResponsesBody() : body,
      )
    }
@@ -2036,7 +2046,7 @@ class OpenAIShimMessages {
            responsesResponse = await fetchWithProxyRetry(responsesUrl, {
              method: 'POST',
              headers,
-              body: JSON.stringify(responsesBody),
+              body: stableStringify(responsesBody),
              signal: options?.signal,
            })
          } catch (error) {
--- a/src/types/message.ts
+++ b/src/types/message.ts
@@ -0,0 +1,25 @@
+/**
+ * Stub — message type definitions not included in source snapshot.
+ *
+ * The upstream Anthropic source defines a rich Message discriminated union
+ * with structured Content blocks, role tags, tool_use payloads, and so on.
+ * That file is not mirrored to this open snapshot. This stub exists so
+ * `tsc --noEmit` can resolve `import { Message, ... } from 'src/types/message'`
+ * across the ~21 callers without fixing every transitive type the call
+ * sites use.
+ *
+ * Once the real definitions are restored upstream-side or reconstructed
+ * from runtime usage, replace these `any` aliases with proper types and
+ * delete this comment. See issue #473 for the typecheck-foundation effort.
+ */
+
+/* eslint-disable @typescript-eslint/no-explicit-any */
+export type Message = any
+export type AssistantMessage = any
+export type UserMessage = any
+export type SystemMessage = any
+export type SystemAPIErrorMessage = any
+export type AttachmentMessage = any
+export type ProgressMessage = any
+export type HookResultMessage = any
+export type NormalizedUserMessage = any
--- a/src/types/tools.ts
+++ b/src/types/tools.ts
@@ -0,0 +1,7 @@
+/**
+ * Stub — tool type definitions not included in source snapshot. See
+ * src/types/message.ts for the same scoping caveat (issue #473).
+ */
+
+/* eslint-disable @typescript-eslint/no-explicit-any */
+export type ShellProgress = any
--- a/src/types/utils.ts
+++ b/src/types/utils.ts
@@ -0,0 +1,15 @@
+/**
+ * Stub — utility type definitions not included in source snapshot. See
+ * src/types/message.ts for the same scoping caveat (issue #473).
+ */
+
+/* eslint-disable @typescript-eslint/no-explicit-any */
+export type DeepImmutable<T> = T extends any[]
+  ? readonly DeepImmutable<T[number]>[]
+  : T extends object
+    ? { readonly [K in keyof T]: DeepImmutable<T[K]> }
+    : T
+
+export type Permutations<T extends string, U extends string = T> = T extends T
+  ? T | `${T}${Permutations<Exclude<U, T>>}`
+  : never
--- a/src/utils/contextPreload.test.ts
+++ b/src/utils/contextPreload.test.ts
@@ -0,0 +1,104 @@
+import { describe, expect, it } from 'bun:test'
+import {
+  analyzeConversationPatterns,
+  predictContextNeeds,
+  preloadContext,
+  createPreloadStrategy,
+} from './contextPreload.js'
+
+function createMessage(role: string, content: string, createdAt: number = Date.now()): any {
+  return {
+    message: { role, content, id: 'test', type: 'message', created_at: createdAt },
+    sender: role,
+  }
+}
+
+describe('contextPreload', () => {
+  describe('analyzeConversationPatterns', () => {
+    it('extracts patterns from messages', () => {
+      const messages = [
+        createMessage('user', 'Fix the error in my code', 1000),
+        createMessage('assistant', 'I found the bug', 2000),
+      ]
+
+      const patterns = analyzeConversationPatterns(messages)
+
+      expect(patterns.length).toBeGreaterThanOrEqual(0)
+    })
+
+    it('detects debug patterns', () => {
+      const messages = [
+        createMessage('user', 'Debug this error please', 1000),
+        createMessage('assistant', 'Found it', 2000),
+      ]
+
+      const patterns = analyzeConversationPatterns(messages)
+
+      expect(patterns.some(p => p.userQuery === 'debug')).toBe(true)
+    })
+
+    it('detects code patterns', () => {
+      const messages = [
+        createMessage('user', 'Write a function for me', 1000),
+        createMessage('assistant', 'Here is the code', 2000),
+      ]
+
+      const patterns = analyzeConversationPatterns(messages)
+
+      expect(patterns.some(p => p.userQuery === 'code')).toBe(true)
+    })
+  })
+
+  describe('predictContextNeeds', () => {
+    it('predicts context needs based on query', () => {
+      const patterns = [{ userQuery: 'debug', neededContext: ['error_history'], frequency: 1 }]
+
+      const prediction = predictContextNeeds('Fix the bug', patterns, {
+        maxPreloadTokens: 10000,
+        confidenceThreshold: 0.3,
+      })
+
+      expect(prediction.confidence).toBeGreaterThan(0)
+      expect(prediction.predictedNeed.length).toBeGreaterThan(0)
+    })
+
+    it('returns non-empty predictedNeed when pattern matches', () => {
+      const patterns = [
+        { userQuery: 'debug', neededContext: ['error_history', 'stack_trace'], frequency: 2 },
+      ]
+
+      const prediction = predictContextNeeds('debug this error', patterns, {
+        maxPreloadTokens: 10000,
+        confidenceThreshold: 0.1,
+      })
+
+      expect(prediction.predictedNeed).toContain('error_history')
+    })
+  })
+
+  describe('preloadContext', () => {
+    it('preloads relevant context', () => {
+      const messages = [
+        createMessage('system', 'System prompt'),
+        createMessage('user', 'Debug error'),
+        createMessage('assistant', 'Fixed'),
+      ]
+
+      const prediction = { predictedNeed: ['error'], confidence: 0.8, suggestedMessages: [] }
+
+      const result = preloadContext(messages, prediction, { maxPreloadTokens: 5000 })
+
+      expect(result.length).toBeGreaterThan(0)
+    })
+  })
+
+  describe('createPreloadStrategy', () => {
+    it('creates strategy with all methods', () => {
+      const strategy = createPreloadStrategy({ maxPreloadTokens: 10000 })
+
+      expect(strategy.analyze).toBeDefined()
+      expect(strategy.predict).toBeDefined()
+      expect(strategy.preload).toBeDefined()
+    })
+  })
+})
--- a/src/utils/contextPreload.ts
+++ b/src/utils/contextPreload.ts
@@ -0,0 +1,145 @@
+/**
+ * Context Pre-loading - Production Grade
+ * 
+ * Proactively loads relevant context before it's needed.
+ * Prediction based on conversation patterns.
+ */
+
+import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
+import type { Message } from '../types/message.js'
+
+export interface PreloadConfig {
+  maxPreloadTokens: number
+  predictionWindow?: number
+  confidenceThreshold?: number
+}
+
+export interface PreloadPrediction {
+  predictedNeed: string[]
+  confidence: number
+  suggestedMessages: Message[]
+}
+
+export interface ConversationPattern {
+  userQuery: string
+  neededContext: string[]
+  frequency: number
+}
+
+const PATTERN_KEYWORDS: Record<string, string[]> = {
+  'code': ['code', 'function', 'implement', 'write'],
+  'debug': ['error', 'bug', 'fix', 'issue', 'debug'],
+  'refactor': ['refactor', 'improve', 'clean', 'optimize'],
+  'test': ['test', 'spec', 'coverage', 'verify'],
+  'explain': ['explain', 'what', 'how', 'why', 'describe'],
+  'search': ['find', 'search', 'look', 'grep', 'glob'],
+}
+
+export function analyzeConversationPatterns(messages: Message[]): ConversationPattern[] {
+  const patterns: ConversationPattern[] = []
+  const recentMessages = messages.slice(-10)
+
+  for (let i = 0; i < recentMessages.length - 1; i++) {
+    const userMsg = recentMessages[i]
+    const assistantMsg = recentMessages[i + 1]
+
+    const userContent = typeof userMsg.message?.content === 'string' ? userMsg.message.content : ''
+    const assistantContent = typeof assistantMsg.message?.content === 'string' ? assistantMsg.message.content : ''
+
+    for (const [category, keywords] of Object.entries(PATTERN_KEYWORDS)) {
+      if (keywords.some(k => userContent.toLowerCase().includes(k))) {
+        patterns.push({
+          userQuery: category,
+          neededContext: extractContextNeeds(assistantContent),
+          frequency: 1,
+        })
+      }
+    }
+  }
+
+  return patterns
+}
+
+function extractContextNeeds(content: string): string[] {
+  const needs: string[] = []
+  if (content.includes('file')) needs.push('file_context')
+  if (content.includes('function')) needs.push('function_defs')
+  if (content.includes('error')) needs.push('error_history')
+  if (content.includes('test')) needs.push('test_files')
+  return needs
+}
+
+export function predictContextNeeds(
+  currentQuery: string,
+  patterns: ConversationPattern[],
+  config: PreloadConfig,
+): PreloadPrediction {
+  const threshold = config.confidenceThreshold ?? 0.5
+  let matchedCategory = ''
+  let highestConfidence = 0
+
+  for (const [category, keywords] of Object.entries(PATTERN_KEYWORDS)) {
+    const matches = keywords.filter(k => currentQuery.toLowerCase().includes(k)).length
+    const confidence = matches / keywords.length
+
+    if (confidence > highestConfidence && confidence >= threshold) {
+      highestConfidence = confidence
+      matchedCategory = category
+    }
+  }
+
+  const relevantPatterns = patterns.filter(p => p.userQuery === matchedCategory)
+  const allNeeds = relevantPatterns.flatMap(p => p.neededContext)
+
+  return {
+    predictedNeed: [...new Set(allNeeds)],
+    confidence: highestConfidence,
+    suggestedMessages: [],
+  }
+}
+
+export function preloadContext(
+  availableContext: Message[],
+  prediction: PreloadPrediction,
+  config: PreloadConfig,
+): Message[] {
+  const targetTokens = config.maxPreloadTokens ?? 30000
+  const selected: Message[] = []
+  let usedTokens = 0
+
+  const priorityTypes = prediction.predictedNeed
+
+  const sorted = [...availableContext].sort((a, b) => {
+    const aContent = typeof a.message?.content === 'string' ? a.message.content : ''
+    const bContent = typeof b.message?.content === 'string' ? b.message.content : ''
+
+    const aPriority = priorityTypes.some(t => aContent.includes(t)) ? 1 : 0
+    const bPriority = priorityTypes.some(t => bContent.includes(t)) ? 1 : 0
+
+    if (bPriority !== aPriority) return bPriority - aPriority
+    return (b.message?.created_at ?? 0) - (a.message?.created_at ?? 0)
+  })
+
+  for (const msg of sorted) {
+    const tokens = roughTokenCountEstimation(
+      typeof msg.message?.content === 'string' ? msg.message.content : ''
+    )
+
+    if (usedTokens + tokens > targetTokens) break
+
+    selected.push(msg)
+    usedTokens += tokens
+  }
+
+  return selected
+}
+
+export function createPreloadStrategy(config: PreloadConfig) {
+  return {
+    analyze: analyzeConversationPatterns,
+    predict: (query: string, patterns: ConversationPattern[]) =>
+      predictContextNeeds(query, patterns, config),
+    preload: (context: Message[], prediction: PreloadPrediction) =>
+      preloadContext(context, prediction, config),
+  }
+}
--- a/src/utils/errors.ts
+++ b/src/utils/errors.ts
@@ -201,6 +201,95 @@ export type AxiosErrorKind =
  | 'http' // other axios error (may have status)
  | 'other' // not an axios error

+// ============================================================================
+// SDK-specific error classes
+// ============================================================================
+
+/**
+ * Base class for all SDK errors. Extends ClaudeError so that existing
+ * `catch (e) { if (e instanceof ClaudeError) … }` checks still work,
+ * while giving SDK consumers a more specific base to match against.
+ */
+export class SDKError extends ClaudeError {
+  constructor(message: string) {
+    super(message)
+    this.name = 'SDKError'
+  }
+}
+
+export class SDKAuthenticationError extends SDKError {
+  constructor(message?: string) {
+    super(message ?? 'Authentication failed')
+    this.name = 'SDKAuthenticationError'
+  }
+}
+
+export class SDKBillingError extends SDKError {
+  constructor(message?: string) {
+    super(message ?? 'Billing error - check subscription')
+    this.name = 'SDKBillingError'
+  }
+}
+
+export class SDKRateLimitError extends SDKError {
+  constructor(
+    message?: string,
+    public readonly resetsAt?: number,
+    public readonly rateLimitType?: string,
+  ) {
+    super(message ?? 'Rate limit exceeded')
+    this.name = 'SDKRateLimitError'
+  }
+}
+
+export class SDKInvalidRequestError extends SDKError {
+  constructor(message?: string) {
+    super(message ?? 'Invalid request')
+    this.name = 'SDKInvalidRequestError'
+  }
+}
+
+export class SDKServerError extends SDKError {
+  constructor(message?: string) {
+    super(message ?? 'Server error')
+    this.name = 'SDKServerError'
+  }
+}
+
+export class SDKMaxOutputTokensError extends SDKError {
+  constructor(message?: string) {
+    super(message ?? 'Max output tokens reached')
+    this.name = 'SDKMaxOutputTokensError'
+  }
+}
+
+export type SDKAssistantMessageError =
+  | 'authentication_failed'
+  | 'billing_error'
+  | 'rate_limit'
+  | 'invalid_request'
+  | 'server_error'
+  | 'unknown'
+  | 'max_output_tokens'
+
+/**
+ * Convert an SDKAssistantMessageError type string to the proper Error class.
+ */
+export function sdkErrorFromType(
+  errorType: SDKAssistantMessageError,
+  message?: string,
+): SDKError | ClaudeError {
+  switch (errorType) {
+    case 'authentication_failed': return new SDKAuthenticationError(message)
+    case 'billing_error': return new SDKBillingError(message)
+    case 'rate_limit': return new SDKRateLimitError(message)
+    case 'invalid_request': return new SDKInvalidRequestError(message)
+    case 'server_error': return new SDKServerError(message)
+    case 'max_output_tokens': return new SDKMaxOutputTokensError(message)
+    default: return new ClaudeError(message ?? 'Unknown error')
+  }
+}
+
 /**
 * Classify a caught error from an axios request into one of a few buckets.
 * Replaces the ~20-line isAxiosError → 401/403 → ECONNABORTED → ECONNREFUSED
--- a/src/utils/handlePromptSubmit.ts
+++ b/src/utils/handlePromptSubmit.ts
@@ -2,7 +2,7 @@ import type { UUID } from 'crypto'
 import { logEvent } from 'src/services/analytics/index.js'
 import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from 'src/services/analytics/metadata.js'
 import { type Command, getCommandName, isCommandEnabled } from '../commands.js'
-import { selectableUserMessagesFilter } from '../components/MessageSelector.js'
+import { selectableUserMessagesFilter } from './messageFilters.js'
 import type { SpinnerMode } from '../components/Spinner/types.js'
 import type { QuerySource } from '../constants/querySource.js'
 import { expandPastedTextRefs, parseReferences } from '../history.js'
--- a/src/utils/hybridContextStrategy.test.ts
+++ b/src/utils/hybridContextStrategy.test.ts
@@ -0,0 +1,230 @@
+import { describe, expect, it } from 'bun:test'
+import {
+  splitContext,
+  applyHybridStrategy,
+  optimizeForCost,
+  optimizeForAccuracy,
+  getHybridStats,
+} from './hybridContextStrategy.js'
+
+function createMessage(role: string, content: string, createdAt: number = Date.now()): any {
+  return {
+    message: { role, content, id: 'test', type: 'message', created_at: createdAt },
+    sender: role,
+  }
+}
+
+describe('hybridContextStrategy', () => {
+  describe('splitContext', () => {
+    it('splits context into cached and fresh', () => {
+      const messages = [
+        createMessage('system', 'System prompt', Date.now() - 86400000),
+        createMessage('user', 'Hello'),
+        createMessage('assistant', 'Hi there'),
+      ]
+
+      const split = splitContext(messages, {
+        cacheWeight: 0.4,
+        freshWeight: 0.6,
+        maxTotalTokens: 10000,
+      })
+
+      expect(split.cachedTokens).toBeGreaterThanOrEqual(0)
+      expect(split.freshTokens).toBeGreaterThanOrEqual(0)
+      expect(split.totalTokens).toBeGreaterThan(0)
+    })
+
+    it('respects weight configuration', () => {
+      const messages = [
+        createMessage('system', 'Old system', Date.now() - 86400000),
+        createMessage('user', 'Recent message', Date.now()),
+      ]
+
+      const split = splitContext(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 10000,
+      })
+
+      expect(split.cached).toBeDefined()
+      expect(split.fresh).toBeDefined()
+    })
+  })
+
+  describe('applyHybridStrategy', () => {
+    it('applies strategy and returns messages', () => {
+      const messages = [
+        createMessage('user', 'Message 1'),
+        createMessage('assistant', 'Response 1'),
+      ]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 10000,
+      })
+
+      expect(result.selectedMessages.length).toBeGreaterThan(0)
+      expect(['cache_heavy', 'fresh_heavy', 'balanced']).toContain(result.strategy)
+    })
+
+    it('calculates estimated cost', () => {
+      const messages = [
+        createMessage('user', 'Test message'),
+      ]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 10000,
+      })
+
+      expect(result.estimatedCost).toBeGreaterThanOrEqual(0)
+    })
+  })
+
+  describe('optimizeForCost', () => {
+    it('returns messages within budget', () => {
+      const messages = [
+        createMessage('user', 'Message 1'),
+        createMessage('assistant', 'Response 1'),
+      ]
+
+      const result = optimizeForCost(messages, 0.001)
+
+      expect(result.length).toBeGreaterThanOrEqual(0)
+    })
+  })
+
+  describe('optimizeForAccuracy', () => {
+    it('optimizes for accuracy with token limit', () => {
+      const messages = [
+        createMessage('user', 'Message 1'),
+        createMessage('assistant', 'Response 1'),
+      ]
+
+      const result = optimizeForAccuracy(messages, 5000)
+
+      expect(result.length).toBeGreaterThan(0)
+    })
+  })
+
+  describe('getHybridStats', () => {
+    it('returns statistics', () => {
+      const messages = [
+        createMessage('system', 'System', Date.now() - 86400000),
+        createMessage('user', 'Hello'),
+      ]
+
+      const split = splitContext(messages, { cacheWeight: 0.5, freshWeight: 0.5, maxTotalTokens: 10000 })
+      const stats = getHybridStats(split)
+
+      expect(stats.cacheRatio).toBeGreaterThanOrEqual(0)
+      expect(stats.freshRatio).toBeGreaterThanOrEqual(0)
+      expect(stats.totalTokens).toBeGreaterThan(0)
+    })
+  })
+
+  describe('tool_use/tool_result pairing', () => {
+    it('preserves tool_use and tool_result together', () => {
+      const toolUseId = 'tool-use-123'
+      const messages = [
+        {
+          type: 'assistant',
+          uuid: 'uuid-1',
+          message: {
+            role: 'assistant',
+            content: [{ type: 'tool_use', id: toolUseId, name: 'Read' }],
+            id: 'msg-1',
+            created_at: 1000,
+          },
+        },
+        {
+          type: 'user',
+          uuid: 'uuid-2',
+          message: {
+            role: 'user',
+            content: [{ type: 'tool_result', tool_use_id: toolUseId, content: 'file content' }],
+            id: 'msg-2',
+            created_at: 2000,
+          },
+        },
+        {
+          type: 'assistant',
+          uuid: 'uuid-3',
+          message: {
+            role: 'assistant',
+            content: 'Response after tool',
+            id: 'msg-3',
+            created_at: 3000,
+          },
+        },
+      ] as any[]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 10000,
+      })
+
+      const hasToolUse = result.selectedMessages.some(
+        m => Array.isArray(m.message?.content) && m.message.content.some((b: any) => b.type === 'tool_use')
+      )
+      const hasToolResult = result.selectedMessages.some(
+        m => Array.isArray(m.message?.content) && m.message.content.some((b: any) => b.type === 'tool_result')
+      )
+
+      expect(hasToolUse).toBe(true)
+      expect(hasToolResult).toBe(true)
+    })
+
+    it('accounts for large tool_use input in token counting', () => {
+      const largeInput = 'x'.repeat(5000)
+      const messages = [
+        {
+          type: 'assistant',
+          message: {
+            role: 'assistant',
+            content: [
+              { type: 'tool_use', id: 'tu1', name: 'Edit', input: { path: 'test.js', content: largeInput } },
+            ],
+            created_at: 1000,
+          },
+        },
+      ] as any[]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 20000,
+      })
+
+      expect(result.totalTokens).toBeGreaterThan(1000)
+    })
+
+    it('accounts for large thinking blocks in token counting', () => {
+      const longThinking = 'Thinking '.repeat(1000)
+      const messages = [
+        {
+          type: 'assistant',
+          message: {
+            role: 'assistant',
+            content: [
+              { type: 'thinking', thinking: longThinking },
+              { type: 'text', text: 'Final response' },
+            ],
+            created_at: 1000,
+          },
+        },
+      ] as any[]
+
+      const result = applyHybridStrategy(messages, {
+        cacheWeight: 0.5,
+        freshWeight: 0.5,
+        maxTotalTokens: 20000,
+      })
+
+      expect(result.totalTokens).toBeGreaterThan(500)
+    })
+  })
+})
--- a/src/utils/hybridContextStrategy.ts
+++ b/src/utils/hybridContextStrategy.ts
@@ -0,0 +1,306 @@
+/**
+ * Hybrid Context Strategy - Production Grade
+ * 
+ * Combines cached + new tokens intelligently.
+ * Optimizes for cost vs accuracy.
+ */
+
+import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
+import type { Message } from '../types/message.js'
+
+export interface HybridConfig {
+  cacheWeight: number
+  freshWeight: number
+  maxTotalTokens: number
+  costThreshold?: number
+}
+
+export interface ContextSplit {
+  cached: Message[]
+  fresh: Message[]
+  cachedTokens: number
+  freshTokens: number
+  totalTokens: number
+}
+
+export interface HybridStrategyResult {
+  selectedMessages: Message[]
+  totalTokens: number
+  strategy: 'cache_heavy' | 'fresh_heavy' | 'balanced'
+  estimatedCost: number
+}
+
+const DEFAULT_CONFIG: Required<HybridConfig> = {
+  cacheWeight: 0.4,
+  freshWeight: 0.6,
+  maxTotalTokens: 100000,
+  costThreshold: 0.01,
+}
+
+// Keep enough for: tool_use -> tool_result -> assistant -> user -> next
+const MIN_TAILMessages = 5
+
+function getMessageChain(
+  messages: Message[],
+): { chains: Message[][]; orphans: Message[] } {
+  const toolUseIds = new Set<string>()
+  const toolUseMessages = new Map<string, Message[]>()
+  const allMessagesByUuid = new Map<string, Message[]>()
+
+  for (const msg of messages) {
+    const uuid = msg.uuid ?? ''
+    if (uuid) {
+      const existing = allMessagesByUuid.get(uuid) ?? []
+      existing.push(msg)
+      allMessagesByUuid.set(uuid, existing)
+    }
+
+    const content = msg.message?.content
+    if (Array.isArray(content)) {
+      for (const block of content) {
+        if (block?.type === 'tool_use' && block?.id) {
+          toolUseIds.add(block.id)
+          const existing = toolUseMessages.get(block.id) ?? []
+          existing.push(msg)
+          toolUseMessages.set(block.id, existing)
+        }
+      }
+    }
+  }
+
+  const chains: Message[][] = []
+  const orphans: Message[] = []
+
+  for (const [toolUseId, msgs] of toolUseMessages) {
+    const chainMessages: Message[] = [...msgs]
+
+    for (const msg of messages) {
+      const content = msg.message?.content
+      if (Array.isArray(content)) {
+        for (const block of content) {
+          if (block?.type === 'tool_result' && block?.tool_use_id === toolUseId) {
+            chainMessages.push(msg)
+          }
+        }
+      }
+    }
+
+    chains.push(chainMessages)
+  }
+
+  const chainMessageUuids = new Set<string>()
+  for (const chain of chains) {
+    for (const msg of chain) {
+      if (msg.uuid) chainMessageUuids.add(msg.uuid)
+    }
+  }
+
+  for (const [uuid, msgs] of allMessagesByUuid) {
+    if (!chainMessageUuids.has(uuid)) {
+      orphans.push(...msgs)
+    }
+  }
+
+  return { chains, orphans }
+}
+
+function getCacheAge(message: Message): number {
+  const created = message.message?.created_at ?? 0
+  if (created === 0) return 1000
+  return (Date.now() - created) / (1000 * 60 * 60)
+}
+
+function getMessageTokenCount(message: Message): number {
+  const content = message.message?.content
+  if (typeof content === 'string') {
+    return roughTokenCountEstimation(content)
+  }
+  if (Array.isArray(content)) {
+    let tokens = 0
+    for (const block of content) {
+      if (typeof block !== 'object' || block === null) continue
+
+      const b = block as Record<string, unknown>
+
+      if (b.type === 'text' && typeof b.text === 'string') {
+        tokens += roughTokenCountEstimation(b.text)
+      } else if (b.type === 'tool_use') {
+        const inputSize = JSON.stringify(b.input ?? {}).length
+        tokens += Math.ceil(inputSize / 4) + 20
+      } else if (b.type === 'tool_result') {
+        if (typeof b.content === 'string') {
+          tokens += roughTokenCountEstimation(b.content)
+        } else if (Array.isArray(b.content)) {
+          for (const rc of b.content) {
+            if (typeof rc === 'object' && rc !== null && 'text' in rc) {
+              tokens += roughTokenCountEstimation((rc as { text: string }).text)
+            }
+          }
+        } else {
+          tokens += 50
+        }
+        if (b.is_error === true) tokens += 10
+      } else if (b.type === 'thinking' && typeof b.thinking === 'string') {
+        tokens += roughTokenCountEstimation(b.thinking)
+      }
+    }
+    return tokens
+  }
+  return 0
+}
+
+function calculateCacheValue(message: Message): number {
+  const content = typeof message.message?.content === 'string' ? message.message.content : ''
+  const age = getCacheAge(message)
+
+  let value = 0.5
+
+  if (content.includes('error') || content.includes('fail')) value += 0.3
+  if (content.includes('function') || content.includes('class')) value += 0.2
+  if (content.includes('important') || content.includes('key')) value += 0.15
+
+  if (age < 1) value += 0.2
+  else if (age < 6) value += 0.1
+  else value -= 0.2
+
+  if (message.message?.role === 'system') value += 0.1
+
+  return Math.max(0, Math.min(1, value))
+}
+
+export function splitContext(
+  messages: Message[],
+  config: HybridConfig,
+): ContextSplit {
+  const cfg = { ...DEFAULT_CONFIG, ...config }
+
+  const sorted = [...messages].sort((a, b) => {
+    const aValue = calculateCacheValue(a)
+    const bValue = calculateCacheValue(b)
+    return bValue - aValue
+  })
+
+  const cached: Message[] = []
+  const fresh: Message[] = []
+  let cachedTokens = 0
+  let freshTokens = 0
+
+  const cacheTarget = Math.floor(cfg.maxTotalTokens * cfg.cacheWeight)
+  const freshTarget = Math.floor(cfg.maxTotalTokens * cfg.freshWeight)
+
+  for (const msg of sorted) {
+    const tokens = getMessageTokenCount(msg)
+    const age = getCacheAge(msg)
+
+    if (age > 24 && cachedTokens < cacheTarget) {
+      if (cachedTokens + tokens <= cacheTarget) {
+        cached.push(msg)
+        cachedTokens += tokens
+        continue
+      }
+    }
+
+    if (freshTokens + tokens <= freshTarget) {
+      fresh.push(msg)
+      freshTokens += tokens
+    }
+  }
+
+  return {
+    cached,
+    fresh,
+    cachedTokens,
+    freshTokens,
+    totalTokens: cachedTokens + freshTokens,
+  }
+}
+
+export function applyHybridStrategy(
+  messages: Message[],
+  config: HybridConfig,
+): HybridStrategyResult {
+  const cfg = { ...DEFAULT_CONFIG, ...config }
+  
+  // Preserve message chains (tool_use/tool_result pairs)
+  const { chains, orphans } = getMessageChain(messages)
+  
+  // Always preserve the conversation tail (last N messages)
+  const tailMessages = messages.slice(-MIN_TAILMessages)
+  const coreMessages = messages.slice(0, -MIN_TAILMessages)
+  
+  const split = splitContext(coreMessages, cfg)
+
+  let strategy: HybridStrategyResult['strategy'] = 'balanced'
+  if (split.cachedTokens > split.freshTokens * 1.5) {
+    strategy = 'cache_heavy'
+  } else if (split.freshTokens > split.cachedTokens * 1.5) {
+    strategy = 'fresh_heavy'
+  }
+
+  const allSelected = [
+    ...chains.flat(),
+    ...split.cached,
+    ...split.fresh,
+    ...tailMessages
+  ]
+
+  const seenUuids = new Set<string>()
+  const selectedMessages: Message[] = []
+  for (const msg of allSelected) {
+    const uuid = msg.uuid ?? msg.message?.id ?? ''
+    if (!seenUuids.has(uuid)) {
+      seenUuids.add(uuid)
+      selectedMessages.push(msg)
+    }
+  }
+
+  selectedMessages.sort(
+    (a, b) => (a.message?.created_at ?? 0) - (b.message?.created_at ?? 0)
+  )
+
+  let totalTokens = 0
+  for (const msg of selectedMessages) {
+    totalTokens += getMessageTokenCount(msg)
+  }
+
+  const estimatedCost = totalTokens * 0.000001 * 0.5
+
+  return {
+    selectedMessages,
+    totalTokens,
+    strategy,
+    estimatedCost,
+  }
+}
+
+export function optimizeForCost(messages: Message[], budget: number): Message[] {
+  const result = applyHybridStrategy(messages, {
+    cacheWeight: 0.7,
+    freshWeight: 0.3,
+    maxTotalTokens: Math.floor(budget * 1000),
+    costThreshold: budget,
+  })
+  return result.selectedMessages
+}
+
+export function optimizeForAccuracy(messages: Message[], maxTokens: number): Message[] {
+  const result = applyHybridStrategy(messages, {
+    cacheWeight: 0.3,
+    freshWeight: 0.7,
+    maxTotalTokens: maxTokens,
+  })
+  return result.selectedMessages
+}
+
+export function getHybridStats(split: ContextSplit) {
+  const cacheRatio = split.totalTokens > 0 ? split.cachedTokens / split.totalTokens : 0
+  const freshRatio = split.totalTokens > 0 ? split.freshTokens / split.totalTokens : 0
+
+  return {
+    cacheRatio: Math.round(cacheRatio * 100),
+    freshRatio: Math.round(freshRatio * 100),
+    totalTokens: split.totalTokens,
+    messageCount: split.cached.length + split.fresh.length,
+    efficiency: split.totalTokens / (split.cachedTokens + split.freshTokens + 1),
+  }
+}
--- a/src/utils/messageFilters.ts
+++ b/src/utils/messageFilters.ts
@@ -0,0 +1,81 @@
+import type { ContentBlockParam, TextBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
+import type { Message, UserMessage } from '../types/message.js'
+import {
+  BASH_STDERR_TAG,
+  BASH_STDOUT_TAG,
+  LOCAL_COMMAND_STDERR_TAG,
+  LOCAL_COMMAND_STDOUT_TAG,
+  TASK_NOTIFICATION_TAG,
+  TEAMMATE_MESSAGE_TAG,
+  TICK_TAG,
+} from '../constants/xml.js'
+import { isSyntheticMessage, isToolUseResultMessage } from './messages.js'
+
+function isTextBlock(block: ContentBlockParam): block is TextBlockParam {
+  return block.type === 'text'
+}
+
+export function selectableUserMessagesFilter(message: Message): message is UserMessage {
+  if (message.type !== 'user') {
+    return false
+  }
+  if (Array.isArray(message.message.content) && message.message.content[0]?.type === 'tool_result') {
+    return false
+  }
+  if (isSyntheticMessage(message)) {
+    return false
+  }
+  if (message.isMeta) {
+    return false
+  }
+  if (message.isCompactSummary || message.isVisibleInTranscriptOnly) {
+    return false
+  }
+  const content = message.message.content
+  const lastBlock = typeof content === 'string' ? null : content[content.length - 1]
+  const messageText = typeof content === 'string' ? content.trim() : lastBlock && isTextBlock(lastBlock) ? lastBlock.text.trim() : ''
+
+  // Filter out non-user-authored messages (command outputs, task notifications, ticks).
+  if (messageText.indexOf(`<${LOCAL_COMMAND_STDOUT_TAG}>`) !== -1 || messageText.indexOf(`<${LOCAL_COMMAND_STDERR_TAG}>`) !== -1 || messageText.indexOf(`<${BASH_STDOUT_TAG}>`) !== -1 || messageText.indexOf(`<${BASH_STDERR_TAG}>`) !== -1 || messageText.indexOf(`<${TASK_NOTIFICATION_TAG}>`) !== -1 || messageText.indexOf(`<${TICK_TAG}>`) !== -1 || messageText.indexOf(`<${TEAMMATE_MESSAGE_TAG}`) !== -1) {
+    return false
+  }
+  return true
+}
+
+/**
+ * Checks if all messages after the given index are synthetic (interruptions, cancels, etc.)
+ * or non-meaningful content. Returns true if there's nothing meaningful to confirm -
+ * for example, if the user hit enter then immediately cancelled.
+ */
+export function messagesAfterAreOnlySynthetic(messages: Message[], fromIndex: number): boolean {
+  for (let i = fromIndex + 1; i < messages.length; i++) {
+    const msg = messages[i]
+    if (!msg) continue
+
+    // Skip known non-meaningful message types
+    if (isSyntheticMessage(msg)) continue
+    if (isToolUseResultMessage(msg)) continue
+    if (msg.type === 'progress') continue
+    if (msg.type === 'system') continue
+    if (msg.type === 'attachment') continue
+    if (msg.type === 'user' && msg.isMeta) continue
+
+    // Assistant with actual content = meaningful
+    if (msg.type === 'assistant') {
+      const content = msg.message.content
+      if (Array.isArray(content)) {
+        const hasMeaningfulContent = content.some(block => block.type === 'text' && block.text.trim() || block.type === 'tool_use')
+        if (hasMeaningfulContent) return false
+      }
+      continue
+    }
+
+    // User messages that aren't synthetic or meta = meaningful
+    if (msg.type === 'user') {
+      return false
+    }
+
+    // Other types (e.g., tombstone) are non-meaningful, continue
+  }
+  return true
+}
--- a/src/utils/model/configs.ts
+++ b/src/utils/model/configs.ts
@@ -158,6 +158,19 @@ export const CLAUDE_OPUS_4_6_CONFIG = {
  minimax: 'MiniMax-M2.5',
 } as const satisfies ModelConfig

+export const CLAUDE_OPUS_4_7_CONFIG = {
+  firstParty: 'claude-opus-4-7',
+  bedrock: 'us.anthropic.claude-opus-4-7-v1',
+  vertex: 'claude-opus-4-7',
+  foundry: 'claude-opus-4-7',
+  openai: 'gpt-4o',
+  gemini: 'gemini-2.5-pro',
+  github: 'github:copilot',
+  codex: 'gpt-5.5',
+  'nvidia-nim': 'nvidia/llama-3.1-nemotron-70b-instruct',
+  minimax: 'MiniMax-M2.5',
+} as const satisfies ModelConfig
+
 export const CLAUDE_SONNET_4_6_CONFIG = {
  firstParty: 'claude-sonnet-4-6',
  bedrock: 'us.anthropic.claude-sonnet-4-6',
@@ -184,6 +197,7 @@ export const ALL_MODEL_CONFIGS = {
  opus41: CLAUDE_OPUS_4_1_CONFIG,
  opus45: CLAUDE_OPUS_4_5_CONFIG,
  opus46: CLAUDE_OPUS_4_6_CONFIG,
+  opus47: CLAUDE_OPUS_4_7_CONFIG,
 } as const satisfies Record<string, ModelConfig>

 export type ModelKey = keyof typeof ALL_MODEL_CONFIGS
--- a/src/utils/model/model.ts
+++ b/src/utils/model/model.ts
@@ -83,7 +83,8 @@ export function isNonCustomOpusModel(model: ModelName): boolean {
    model === getModelStrings().opus40 ||
    model === getModelStrings().opus41 ||
    model === getModelStrings().opus45 ||
-    model === getModelStrings().opus46
+    model === getModelStrings().opus46 ||
+    model === getModelStrings().opus47
  )
 }

@@ -204,12 +205,12 @@ export function getDefaultOpusModel(): ModelName {
    return process.env.OPENAI_MODEL || 'grok-4'
  }
  // 3P providers (Bedrock, Vertex, Foundry) — kept as a separate branch
-  // even when values match, since 3P availability lags firstParty and
-  // these will diverge again at the next model launch.
+  // since 3P availability lags firstParty and these will diverge again at
+  // the next model launch. Keep 3P on Opus 4.6 until they roll out 4.7.
  if (getAPIProvider() !== 'firstParty') {
    return getModelStrings().opus46
  }
-  return getModelStrings().opus46
+  return getModelStrings().opus47
 }

 // @[MODEL LAUNCH]: Update the default Sonnet model (3P providers may lag so keep defaults unchanged).
@@ -407,7 +408,10 @@ export function getDefaultMainLoopModel(): ModelName {
 export function firstPartyNameToCanonical(name: ModelName): ModelShortName {
  name = name.toLowerCase()
  // Special cases for Claude 4+ models to differentiate versions
-  // Order matters: check more specific versions first (4-5 before 4)
+  // Order matters: check more specific versions first (4-7 before 4-6 before 4-5 before 4)
+  if (name.includes('claude-opus-4-7')) {
+    return 'claude-opus-4-7'
+  }
  if (name.includes('claude-opus-4-6')) {
    return 'claude-opus-4-6'
  }
@@ -478,9 +482,9 @@ export function getClaudeAiUserDefaultModelDescription(
 ): string {
  if (isMaxSubscriber() || isTeamPremiumSubscriber()) {
    if (isOpus1mMergeEnabled()) {
-      return `Opus 4.6 with 1M context · Most capable for complex work${fastMode ? getOpus46PricingSuffix(true) : ''}`
+      return `Opus 4.7 with 1M context · Most capable for complex work${fastMode ? getOpus46PricingSuffix(true) : ''}`
    }
-    return `Opus 4.6 · Most capable for complex work${fastMode ? getOpus46PricingSuffix(true) : ''}`
+    return `Opus 4.7 · Most capable for complex work${fastMode ? getOpus46PricingSuffix(true) : ''}`
  }
  return 'Sonnet 4.6 · Best for everyday tasks'
 }
@@ -489,7 +493,7 @@ export function renderDefaultModelSetting(
  setting: ModelName | ModelAlias,
 ): string {
  if (setting === 'opusplan') {
-    return 'Opus 4.6 in plan mode, else Sonnet 4.6'
+    return 'Opus 4.7 in plan mode, else Sonnet 4.6'
  }
  return renderModelName(parseUserSpecifiedModel(setting))
 }
@@ -582,10 +586,14 @@ export function getPublicModelDisplayName(model: ModelName): string | null {
      return 'GPT-5.4'
    case 'gpt-5.3-codex-spark':
      return 'GPT-5.3 Codex Spark'
-    case getModelStrings().opus46:
-      return 'Opus 4.6'
+    case getModelStrings().opus47 + '[1m]':
+      return 'Opus 4.7 (1M context)'
+    case getModelStrings().opus47:
+      return 'Opus 4.7'
    case getModelStrings().opus46 + '[1m]':
      return 'Opus 4.6 (1M context)'
+    case getModelStrings().opus46:
+      return 'Opus 4.6'
    case getModelStrings().opus45:
      return 'Opus 4.5'
    case getModelStrings().opus41:
@@ -825,6 +833,9 @@ export function getMarketingNameForModel(modelId: string): string | undefined {
  const has1m = modelId.toLowerCase().includes('[1m]')
  const canonical = getCanonicalName(modelId)

+  if (canonical.includes('claude-opus-4-7')) {
+    return has1m ? 'Opus 4.7 (with 1M context)' : 'Opus 4.7'
+  }
  if (canonical.includes('claude-opus-4-6')) {
    return has1m ? 'Opus 4.6 (with 1M context)' : 'Opus 4.6'
  }
--- a/src/utils/model/modelOptions.ts
+++ b/src/utils/model/modelOptions.ts
@@ -159,6 +159,16 @@ function getOpus41Option(): ModelOption {
  }
 }

+function getOpus47Option(fastMode = false): ModelOption {
+  const is3P = getAPIProvider() !== 'firstParty'
+  return {
+    value: is3P ? getModelStrings().opus47 : 'opus',
+    label: 'Opus',
+    description: `Opus 4.7 · Most capable for complex work${getOpus46PricingSuffix(fastMode)}`,
+    descriptionForModel: 'Opus 4.7 - most capable for complex work',
+  }
+}
+
 function getOpus46Option(fastMode = false): ModelOption {
  const is3P = getAPIProvider() !== 'firstParty'
  return {
@@ -241,7 +251,7 @@ function getMaxOpusOption(fastMode = false): ModelOption {
  return {
    value: 'opus',
    label: 'Opus',
-    description: `Opus 4.6 · Most capable for complex work${fastMode ? getOpus46PricingSuffix(true) : ''}`,
+    description: `Opus 4.7 · Most capable for complex work${fastMode ? getOpus46PricingSuffix(true) : ''}`,
  }
 }

@@ -269,9 +279,9 @@ function getMergedOpus1MOption(fastMode = false): ModelOption {
  return {
    value: is3P ? getModelStrings().opus46 + '[1m]' : 'opus[1m]',
    label: 'Opus (1M context)',
-    description: `Opus 4.6 with 1M context · Most capable for complex work${!is3P && fastMode ? getOpus46PricingSuffix(fastMode) : ''}`,
+    description: `${is3P ? 'Opus 4.6' : 'Opus 4.7'} with 1M context · Most capable for complex work${!is3P && fastMode ? getOpus46PricingSuffix(fastMode) : ''}`,
    descriptionForModel:
-      'Opus 4.6 with 1M context - most capable for complex work',
+      `${is3P ? 'Opus 4.6' : 'Opus 4.7'} with 1M context - most capable for complex work`,
  }
 }

@@ -291,7 +301,7 @@ function getOpusPlanOption(): ModelOption {
  return {
    value: 'opusplan',
    label: 'Opus Plan Mode',
-    description: 'Use Opus 4.6 in plan mode, Sonnet 4.6 otherwise',
+    description: 'Use Opus 4.7 in plan mode, Sonnet 4.6 otherwise',
  }
 }

@@ -504,7 +514,7 @@ function getModelOptionsBase(fastMode = false): ModelOption[] {
    }
  }

-  // PAYG 1P API: Default (Sonnet) + Sonnet 1M + Opus 4.6 + Opus 1M + Haiku
+  // PAYG 1P API: Default (Sonnet) + Sonnet 1M + Opus 4.7 + Opus 4.6 + Opus 1M + Haiku
  if (getAPIProvider() === 'firstParty') {
    const payg1POptions = [getDefaultOptionForUser(fastMode)]
    if (checkSonnet1mAccess()) {
@@ -513,6 +523,7 @@ function getModelOptionsBase(fastMode = false): ModelOption[] {
    if (isOpus1mMergeEnabled()) {
      payg1POptions.push(getMergedOpus1MOption(fastMode))
    } else {
+      payg1POptions.push(getOpus47Option(fastMode))
      payg1POptions.push(getOpus46Option(fastMode))
      if (checkOpus1mAccess()) {
        payg1POptions.push(getOpus46_1MOption(fastMode))
@@ -546,8 +557,9 @@ function getModelOptionsBase(fastMode = false): ModelOption[] {
  if (customOpus !== undefined) {
    payg3pOptions.push(customOpus)
  } else {
-    // Add Opus 4.1, Opus 4.6 and Opus 4.6 1M
+    // Add Opus 4.1, Opus 4.7, Opus 4.6 and Opus 4.6 1M
    payg3pOptions.push(getOpus41Option()) // This is the default opus
+    payg3pOptions.push(getOpus47Option(fastMode))
    payg3pOptions.push(getOpus46Option(fastMode))
    if (checkOpus1mAccess()) {
      payg3pOptions.push(getOpus46_1MOption(fastMode))
--- a/src/utils/model/modelSupportOverrides.ts
+++ b/src/utils/model/modelSupportOverrides.ts
@@ -23,6 +23,23 @@ const TIERS = [
  },
 ] as const

+function buildCapabilityOverrideCacheKey(
+  model: string,
+  capability: ModelCapabilityOverride,
+): string {
+  const envParts = TIERS.flatMap(tier => [
+    process.env[tier.modelEnvVar] ?? '',
+    process.env[tier.capabilitiesEnvVar] ?? '',
+  ])
+
+  return [
+    model.toLowerCase(),
+    capability,
+    getAPIProvider(),
+    ...envParts,
+  ].join('\0')
+}
+
 /**
 * Check whether a 3p model capability override is set for a model that matches one of
 * the pinned ANTHROPIC_DEFAULT_*_MODEL env vars.
@@ -46,5 +63,5 @@ export const get3PModelCapabilityOverride = memoize(
    }
    return undefined
  },
-  (model, capability) => `${model.toLowerCase()}:${capability}`,
+  buildCapabilityOverrideCacheKey,
 )
--- a/src/utils/model/validateModel.ts
+++ b/src/utils/model/validateModel.ts
@@ -202,6 +202,9 @@ function get3PFallbackSuggestion(model: string): string | undefined {
    return undefined
  }
  const lowerModel = model.toLowerCase()
+  if (lowerModel.includes('opus-4-7') || lowerModel.includes('opus_4_7')) {
+    return getModelStrings().opus46
+  }
  if (lowerModel.includes('opus-4-6') || lowerModel.includes('opus_4_6')) {
    return getModelStrings().opus41
  }
--- a/src/utils/modelCost.ts
+++ b/src/utils/modelCost.ts
@@ -11,6 +11,7 @@ import {
  CLAUDE_OPUS_4_1_CONFIG,
  CLAUDE_OPUS_4_5_CONFIG,
  CLAUDE_OPUS_4_6_CONFIG,
+  CLAUDE_OPUS_4_7_CONFIG,
  CLAUDE_OPUS_4_CONFIG,
  CLAUDE_SONNET_4_5_CONFIG,
  CLAUDE_SONNET_4_6_CONFIG,
@@ -123,6 +124,8 @@ export const MODEL_COSTS: Record<ModelShortName, ModelCosts> = {
    COST_TIER_5_25,
  [firstPartyNameToCanonical(CLAUDE_OPUS_4_6_CONFIG.firstParty)]:
    COST_TIER_5_25,
+  [firstPartyNameToCanonical(CLAUDE_OPUS_4_7_CONFIG.firstParty)]:
+    COST_TIER_5_25,
 }

 /**
--- a/src/utils/ripgrep.test.ts
+++ b/src/utils/ripgrep.test.ts
@@ -5,16 +5,15 @@ import { resolveRipgrepConfig, wrapRipgrepUnavailableError } from './ripgrep.js'

 const MOCK_BUILTIN_PATH = path.normalize(
  process.platform === 'win32'
-    ? `vendor/ripgrep/${process.arch}-win32/rg.exe`
-    : `vendor/ripgrep/${process.arch}-${process.platform}/rg`,
+    ? `node_modules/@vscode/ripgrep/bin/rg.exe`
+    : `node_modules/@vscode/ripgrep/bin/rg`,
 )

-test('ripgrepCommand falls back to system rg when builtin binary is missing', () => {
+test('falls back to system rg when @vscode/ripgrep cannot be resolved', () => {
  const config = resolveRipgrepConfig({
    userWantsSystemRipgrep: false,
    bundledMode: false,
-    builtinCommand: MOCK_BUILTIN_PATH,
-    builtinExists: false,
+    builtinCommand: null,
    systemExecutablePath: '/usr/bin/rg',
    processExecPath: '/fake/bun',
  })
@@ -26,12 +25,11 @@ test('ripgrepCommand falls back to system rg when builtin binary is missing', ()
  })
 })

-test('ripgrepCommand keeps builtin mode when bundled binary exists', () => {
+test('uses builtin @vscode/ripgrep path when the package resolves', () => {
  const config = resolveRipgrepConfig({
    userWantsSystemRipgrep: false,
    bundledMode: false,
    builtinCommand: MOCK_BUILTIN_PATH,
-    builtinExists: true,
    systemExecutablePath: '/usr/bin/rg',
    processExecPath: '/fake/bun',
  })
@@ -43,10 +41,59 @@ test('ripgrepCommand keeps builtin mode when bundled binary exists', () => {
  })
 })

+test('honors USE_BUILTIN_RIPGREP=0 by selecting system rg even when builtin is available', () => {
+  const config = resolveRipgrepConfig({
+    userWantsSystemRipgrep: true,
+    bundledMode: false,
+    builtinCommand: MOCK_BUILTIN_PATH,
+    systemExecutablePath: '/usr/bin/rg',
+    processExecPath: '/fake/bun',
+  })
+
+  expect(config).toMatchObject({
+    mode: 'system',
+    command: 'rg',
+    args: [],
+  })
+})
+
+test('keeps embedded mode for Bun-compiled standalone executables', () => {
+  const config = resolveRipgrepConfig({
+    userWantsSystemRipgrep: false,
+    bundledMode: true,
+    builtinCommand: null,
+    systemExecutablePath: '/usr/bin/rg',
+    processExecPath: '/opt/openclaude/bin/openclaude',
+  })
+
+  expect(config).toMatchObject({
+    mode: 'embedded',
+    command: '/opt/openclaude/bin/openclaude',
+    args: ['--no-config'],
+    argv0: 'rg',
+  })
+})
+
+test('falls through to system rg as a last resort even when not on PATH', () => {
+  const config = resolveRipgrepConfig({
+    userWantsSystemRipgrep: false,
+    bundledMode: false,
+    builtinCommand: null,
+    systemExecutablePath: 'rg',
+    processExecPath: '/fake/bun',
+  })
+
+  expect(config).toMatchObject({
+    mode: 'system',
+    command: 'rg',
+    args: [],
+  })
+})
+
 test('wrapRipgrepUnavailableError explains missing packaged fallback', () => {
  const error = wrapRipgrepUnavailableError(
    { code: 'ENOENT', message: 'spawn rg ENOENT' },
-    { mode: 'builtin', command: 'C:\\fake\\vendor\\ripgrep\\rg.exe', args: [] },
+    { mode: 'builtin', command: 'C:\\fake\\node_modules\\@vscode\\ripgrep\\bin\\rg.exe', args: [] },
    'win32',
  )

--- a/src/utils/ripgrep.ts
+++ b/src/utils/ripgrep.ts
@@ -5,7 +5,6 @@ import memoize from 'lodash-es/memoize.js'
 import { homedir } from 'os'
 import * as path from 'path'
 import { logEvent } from 'src/services/analytics/index.js'
-import { fileURLToPath } from 'url'
 import { isInBundledMode } from './bundledMode.js'
 import { logForDebugging } from './debug.js'
 import { isEnvDefinedFalsy } from './envUtils.js'
@@ -15,13 +14,6 @@ import { logError } from './log.js'
 import { getPlatform } from './platform.js'
 import { countCharInString } from './stringUtils.js'

-const __filename = fileURLToPath(import.meta.url)
-// we use node:path.join instead of node:url.resolve because the former doesn't encode spaces
-const __dirname = path.join(
-  __filename,
-  process.env.NODE_ENV === 'test' ? '../../../' : '../',
-)
-
 type RipgrepConfig = {
  mode: 'system' | 'builtin' | 'embedded'
  command: string
@@ -35,11 +27,31 @@ function isErrnoException(error: unknown): error is NodeJS.ErrnoException {
  return error instanceof Error
 }

+/**
+ * Returns the ripgrep binary path provided by the @vscode/ripgrep package.
+ * The package downloads a platform/arch-specific binary at npm install time
+ * (cached under the package's bin/ directory). Returns null when the package
+ * cannot be resolved — for example when running as a Bun-compiled standalone
+ * executable that doesn't ship node_modules.
+ */
+function resolveBuiltinRgPath(): string | null {
+  try {
+    // Lazy require so the resolution failure path stays graceful at import
+    // time. The package only exports `rgPath`, so we do not need the rest.
+    const mod = require('@vscode/ripgrep') as { rgPath?: string }
+    if (mod.rgPath && existsSync(mod.rgPath)) {
+      return mod.rgPath
+    }
+  } catch {
+    // Falls through to null — caller decides the fallback.
+  }
+  return null
+}
+
 type ResolveRipgrepConfigArgs = {
  userWantsSystemRipgrep: boolean
  bundledMode: boolean
-  builtinCommand: string
-  builtinExists: boolean
+  builtinCommand: string | null
  systemExecutablePath: string
  processExecPath?: string
 }
@@ -48,7 +60,6 @@ export function resolveRipgrepConfig({
  userWantsSystemRipgrep,
  bundledMode,
  builtinCommand,
-  builtinExists,
  systemExecutablePath,
  processExecPath = process.execPath,
 }: ResolveRipgrepConfigArgs): RipgrepConfig {
@@ -66,7 +77,7 @@ export function resolveRipgrepConfig({
    }
  }

-  if (builtinExists) {
+  if (builtinCommand) {
    return { mode: 'builtin', command: builtinCommand, args: [] }
  }

@@ -74,7 +85,9 @@ export function resolveRipgrepConfig({
    return { mode: 'system', command: 'rg', args: [] }
  }

-  return { mode: 'builtin', command: builtinCommand, args: [] }
+  // Last resort — leaves error reporting to the executor when no binary
+  // can be located. wrapRipgrepUnavailableError() surfaces an install hint.
+  return { mode: 'system', command: 'rg', args: [] }
 }

 const getRipgrepConfig = memoize((): RipgrepConfig => {
@@ -82,19 +95,13 @@ const getRipgrepConfig = memoize((): RipgrepConfig => {
    process.env.USE_BUILTIN_RIPGREP,
  )
  const bundledMode = isInBundledMode()
-  const rgRoot = path.resolve(__dirname, 'vendor', 'ripgrep')
-  const builtinCommand =
-    process.platform === 'win32'
-      ? path.resolve(rgRoot, `${process.arch}-win32`, 'rg.exe')
-      : path.resolve(rgRoot, `${process.arch}-${process.platform}`, 'rg')
-  const builtinExists = existsSync(builtinCommand)
+  const builtinCommand = resolveBuiltinRgPath()
  const { cmd: systemExecutablePath } = findExecutable('rg', [])

  return resolveRipgrepConfig({
    userWantsSystemRipgrep,
    bundledMode,
    builtinCommand,
-    builtinExists,
    systemExecutablePath,
  })
 })
--- a/src/utils/secureStorage/platformStorage.test.ts
+++ b/src/utils/secureStorage/platformStorage.test.ts
@@ -97,13 +97,22 @@ describe("Secure Storage Platform Implementations", () => {
      expect(options2.input).toContain("token'quote");
    });

-    test("delete() includes assembly load", () => {
+    test("delete() skips legacy PasswordVault by default", () => {
+      windowsCredentialStorage.delete();
+      expect(mockExecaSync).toHaveBeenCalledTimes(1);
+      const script = mockExecaSync.mock.calls[0][1][1];
+      expect(script).not.toContain("System.Runtime.WindowsRuntime");
+    });
+
+    test("delete() includes legacy assembly load when explicitly enabled", () => {
+      process.env.OPENCLAUDE_ENABLE_LEGACY_WINDOWS_PASSWORDVAULT = "1";
      windowsCredentialStorage.delete();
      const script = mockExecaSync.mock.calls[1][1][1];
      expect(script).toContain("Add-Type -AssemblyName System.Runtime.WindowsRuntime");
    });

    test("escapes double quotes in username", () => {
+      process.env.OPENCLAUDE_ENABLE_LEGACY_WINDOWS_PASSWORDVAULT = "1";
      process.env.USER = 'user"name';
      windowsCredentialStorage.read();
      const script = mockExecaSync.mock.calls[1][1][1];
@@ -111,7 +120,17 @@ describe("Secure Storage Platform Implementations", () => {
      expect(script).not.toContain('user"name');
    });

-    test("read() falls back to legacy PasswordVault when the DPAPI payload is invalid JSON", () => {
+    test("read() does not touch legacy PasswordVault by default", () => {
+      mockExecaSync.mockImplementationOnce(() => ({ exitCode: 1, stdout: "" }));
+
+      const result = windowsCredentialStorage.read();
+
+      expect(result).toBeNull();
+      expect(mockExecaSync).toHaveBeenCalledTimes(1);
+    });
+
+    test("read() falls back to legacy PasswordVault when explicitly enabled", () => {
+      process.env.OPENCLAUDE_ENABLE_LEGACY_WINDOWS_PASSWORDVAULT = "1";
      mockExecaSync
        .mockImplementationOnce(() => ({ exitCode: 0, stdout: "{not-json" }))
        .mockImplementationOnce(() => ({
@@ -126,6 +145,7 @@ describe("Secure Storage Platform Implementations", () => {
    });

    test("read() fails closed when the legacy PasswordVault payload is invalid JSON", () => {
+      process.env.OPENCLAUDE_ENABLE_LEGACY_WINDOWS_PASSWORDVAULT = "1";
      mockExecaSync
        .mockImplementationOnce(() => ({ exitCode: 1, stdout: "" }))
        .mockImplementationOnce(() => ({ exitCode: 0, stdout: "{not-json" }));
--- a/src/utils/secureStorage/windowsCredentialStorage.ts
+++ b/src/utils/secureStorage/windowsCredentialStorage.ts
@@ -30,6 +30,10 @@ function getWindowsSecureStorageFilePath(): string {
  return join(getClaudeConfigHomeDir(), `${resourceName}.secure.dpapi`)
 }

+function shouldUseLegacyPasswordVault(): boolean {
+  return process.env.OPENCLAUDE_ENABLE_LEGACY_WINDOWS_PASSWORDVAULT === '1'
+}
+
 function runPowerShell(
  script: string,
  options?: { input?: string },
@@ -61,6 +65,10 @@ function getFailureWarning(
 }

 function readLegacyPasswordVault(): SecureStorageData | null {
+  if (!shouldUseLegacyPasswordVault()) {
+    return null
+  }
+
  const resourceName = getLegacyResourceName().replace(/"/g, '`"')
  const username = getUsername().replace(/"/g, '`"')
  const script = `
@@ -204,21 +212,23 @@ export const windowsCredentialStorage: SecureStorage = {
    `
    const removeDpapiResult = runPowerShell(removeDpapiScript)

-    const resourceName = getLegacyResourceName().replace(/"/g, '`"')
-    const username = getUsername().replace(/"/g, '`"')
-    const removeLegacyScript = `
-      Add-Type -AssemblyName System.Runtime.WindowsRuntime
-      try {
-        $vault = New-Object Windows.Security.Credentials.PasswordVault
-        $cred = $vault.Retrieve("${resourceName}", "${username}")
-        $vault.Remove($cred)
-      } catch {
-        exit 0
-      }
-    `
-    const removeLegacyResult = runPowerShell(removeLegacyScript)
+    if (shouldUseLegacyPasswordVault()) {
+      const resourceName = getLegacyResourceName().replace(/"/g, '`"')
+      const username = getUsername().replace(/"/g, '`"')
+      const removeLegacyScript = `
+        Add-Type -AssemblyName System.Runtime.WindowsRuntime
+        try {
+          $vault = New-Object Windows.Security.Credentials.PasswordVault
+          $cred = $vault.Retrieve("${resourceName}", "${username}")
+          $vault.Remove($cred)
+        } catch {
+          exit 0
+        }
+      `
+      const removeLegacyResult = runPowerShell(removeLegacyScript)

-    void removeLegacyResult
+      void removeLegacyResult
+    }

    return (removeDpapiResult?.exitCode ?? 1) === 0
  },
--- a/src/utils/serializationStability.test.ts
+++ b/src/utils/serializationStability.test.ts
@@ -0,0 +1,142 @@
+import { describe, expect, test } from 'bun:test'
+import { sortKeysDeep, stableStringify } from './stableStringify.js'
+
+// These tests pin byte-level stability of serialization helpers. The
+// invariant that matters for implicit prefix caching in OpenAI / Kimi /
+// DeepSeek / Codex — and for Anthropic cache_control breakpoints — is:
+// semantically-equal inputs must produce byte-identical output across
+// invocations and across key-order permutations.
+
+describe('stableStringify', () => {
+  test('two invocations with the same object produce identical strings', () => {
+    const a = stableStringify({ b: 1, a: 2 })
+    const b = stableStringify({ b: 1, a: 2 })
+    expect(a).toBe(b)
+  })
+
+  test('key order at the top level does not affect output', () => {
+    expect(stableStringify({ a: 1, b: 2 })).toBe(stableStringify({ b: 2, a: 1 }))
+  })
+
+  test('key order at nested depths does not affect output', () => {
+    const x = { outer: { z: 1, a: 2, m: { b: 3, a: 4 } } }
+    const y = { outer: { m: { a: 4, b: 3 }, a: 2, z: 1 } }
+    expect(stableStringify(x)).toBe(stableStringify(y))
+  })
+
+  test('array element order IS preserved (semantic in API contracts)', () => {
+    expect(stableStringify({ messages: ['a', 'b', 'c'] })).not.toBe(
+      stableStringify({ messages: ['c', 'b', 'a'] }),
+    )
+  })
+
+  test('arrays of objects have keys sorted inside each element', () => {
+    const out = stableStringify({
+      tools: [
+        { name: 'Bash', description: 'run' },
+        { description: 'read', name: 'Read' },
+      ],
+    })
+    expect(out).toBe(
+      '{"tools":[{"description":"run","name":"Bash"},{"description":"read","name":"Read"}]}',
+    )
+  })
+
+  test('undefined values are omitted (matches JSON.stringify)', () => {
+    const out = stableStringify({ a: undefined, b: 1 })
+    expect(out).toBe('{"b":1}')
+  })
+
+  test('primitive and null pass through unchanged', () => {
+    expect(stableStringify(null)).toBe('null')
+    expect(stableStringify(42)).toBe('42')
+    expect(stableStringify('x')).toBe('"x"')
+    expect(stableStringify(true)).toBe('true')
+  })
+
+  test('throws TypeError on circular structures (same behavior as JSON.stringify)', () => {
+    const obj: Record<string, unknown> = {}
+    obj.self = obj
+    // The exact message varies by engine (V8: "Converting circular structure
+    // to JSON", Bun: "JSON.stringify cannot serialize cyclic structures.").
+    // We only pin the error class — same contract as native JSON.stringify.
+    expect(() => stableStringify(obj)).toThrow(TypeError)
+    expect(() => JSON.stringify(obj)).toThrow(TypeError)
+  })
+
+  test('throws TypeError on circular references nested deep in the graph', () => {
+    const inner: Record<string, unknown> = { val: 1 }
+    const outer = { a: { b: inner } }
+    inner.cycle = outer
+    expect(() => stableStringify(outer)).toThrow(TypeError)
+  })
+
+  test('does not throw on DAGs (same object referenced from multiple keys)', () => {
+    const shared = { x: 1 }
+    // Native JSON.stringify handles this fine — stableStringify must too.
+    expect(() => stableStringify({ a: shared, b: shared })).not.toThrow()
+    expect(stableStringify({ a: shared, b: shared })).toBe(
+      '{"a":{"x":1},"b":{"x":1}}',
+    )
+  })
+})
+
+describe('sortKeysDeep', () => {
+  test('returns an object with sorted keys at every depth', () => {
+    const sorted = sortKeysDeep({
+      b: 1,
+      a: { y: 2, x: { d: 3, c: 4 } },
+    }) as Record<string, unknown>
+    expect(Object.keys(sorted)).toEqual(['a', 'b'])
+    expect(Object.keys(sorted.a as Record<string, unknown>)).toEqual([
+      'x',
+      'y',
+    ])
+  })
+
+  test('arrays are preserved element-wise', () => {
+    const sorted = sortKeysDeep([
+      { b: 1, a: 2 },
+      { d: 3, c: 4 },
+    ]) as Array<Record<string, unknown>>
+    expect(Object.keys(sorted[0]!)).toEqual(['a', 'b'])
+    expect(Object.keys(sorted[1]!)).toEqual(['c', 'd'])
+  })
+})
+
+describe('prefix caching invariants — end-to-end', () => {
+  // This is the real payload shape that an OpenAI-compatible body
+  // takes on its way to the upstream provider. We exercise it via
+  // stableStringify to verify that rebuilding the body with different
+  // key insertion orders yields the same bytes.
+  const bodyA = {
+    model: 'gpt-4o-mini',
+    stream: true,
+    messages: [
+      { role: 'system', content: 'you are helpful' },
+      { role: 'user', content: 'hi' },
+    ],
+    tools: [{ name: 't', description: 'x' }],
+    temperature: 0.7,
+    top_p: 1,
+  }
+  const bodyB = {
+    top_p: 1,
+    temperature: 0.7,
+    tools: [{ description: 'x', name: 't' }],
+    messages: [
+      { content: 'you are helpful', role: 'system' },
+      { content: 'hi', role: 'user' },
+    ],
+    stream: true,
+    model: 'gpt-4o-mini',
+  }
+
+  test('two spread-merged request bodies produce identical stable bytes', () => {
+    expect(stableStringify(bodyA)).toBe(stableStringify(bodyB))
+  })
+
+  test('calling stableStringify twice yields identical bytes (idempotent)', () => {
+    expect(stableStringify(bodyA)).toBe(stableStringify(bodyA))
+  })
+})
--- a/src/utils/stableStringify.test.ts
+++ b/src/utils/stableStringify.test.ts
@@ -0,0 +1,199 @@
+import { describe, expect, test } from 'bun:test'
+import { sortKeysDeep, stableStringify } from './stableStringify'
+
+/**
+ * Contract: `stableStringify(input)` must equal `JSON.stringify(input)`
+ * for every value where the latter is well-defined, except that object
+ * keys are emitted in lexicographic order at every depth. These tests
+ * focus on the native pre-processing semantics — `toJSON(key)` and
+ * primitive-wrapper unboxing — that the deep-sort path must preserve.
+ */
+
+describe('stableStringify — toJSON semantics', () => {
+  test('Date at top level → ISO string', () => {
+    const d = new Date('2024-01-02T03:04:05.678Z')
+    expect(stableStringify(d)).toBe(JSON.stringify(d))
+  })
+
+  test('Date nested in object → ISO string + sorted keys', () => {
+    const d = new Date('2024-01-02T03:04:05.678Z')
+    const input = { z: 1, when: d, a: 'x' }
+    expect(stableStringify(input)).toBe(
+      `{"a":"x","␟when␟":"PLACEHOLDER","z":1}`
+        .replace('␟when␟', 'when')
+        .replace('"PLACEHOLDER"', JSON.stringify(d.toISOString())),
+    )
+  })
+
+  test('Date inside an array → each element converted', () => {
+    const a = new Date('2024-01-02T03:04:05.678Z')
+    const b = new Date('2025-06-07T08:09:10.111Z')
+    const input = [a, b]
+    expect(stableStringify(input)).toBe(JSON.stringify(input))
+  })
+
+  test('URL value serializes via URL.prototype.toJSON', () => {
+    const u = new URL('https://example.com/path?q=1')
+    expect(stableStringify(u)).toBe(JSON.stringify(u))
+    expect(stableStringify({ url: u })).toBe(JSON.stringify({ url: u }))
+  })
+
+  test('custom class with toJSON returning a plain object → keys sorted', () => {
+    class Thing {
+      toJSON() {
+        return { z: 1, a: 2, m: 3 }
+      }
+    }
+    const out = stableStringify(new Thing())
+    expect(out).toBe('{"a":2,"m":3,"z":1}')
+  })
+
+  test('toJSON(key) receives the property name for object values', () => {
+    const seen: string[] = []
+    class Trace {
+      toJSON(k: string) {
+        seen.push(k)
+        return k
+      }
+    }
+    const t = new Trace()
+    stableStringify({ alpha: t, beta: t })
+    // Object keys are sorted, so toJSON is invoked alpha-first.
+    expect(seen).toEqual(['alpha', 'beta'])
+  })
+
+  test('toJSON(key) receives the array index as a string for array elements', () => {
+    const seen: string[] = []
+    class Trace {
+      toJSON(k: string) {
+        seen.push(k)
+        return k
+      }
+    }
+    const t = new Trace()
+    stableStringify([t, t, t])
+    expect(seen).toEqual(['0', '1', '2'])
+  })
+
+  test('toJSON(key) receives empty string at top level', () => {
+    let captured: string | undefined
+    class Trace {
+      toJSON(k: string) {
+        captured = k
+        return 'ok'
+      }
+    }
+    stableStringify(new Trace())
+    expect(captured).toBe('')
+  })
+
+  test('toJSON returning undefined drops the property (matches native)', () => {
+    class Hidden {
+      toJSON() {
+        return undefined
+      }
+    }
+    const input = { a: 1, gone: new Hidden(), b: 2 }
+    expect(stableStringify(input)).toBe(JSON.stringify(input))
+    expect(stableStringify(input)).toBe('{"a":1,"b":2}')
+  })
+
+  test('nested mix: object with a Date field and a regular field → keys sorted, Date as ISO', () => {
+    const d = new Date('2024-01-02T03:04:05.678Z')
+    const input = { z: { when: d, a: 1 }, a: 'first' }
+    expect(stableStringify(input)).toBe(
+      `{"a":"first","z":{"a":1,"when":${JSON.stringify(d.toISOString())}}}`,
+    )
+  })
+})
+
+describe('stableStringify — primitive wrapper unboxing', () => {
+  test('new Number at top level → numeric primitive', () => {
+    const n = new Number(42)
+    expect(stableStringify(n)).toBe(JSON.stringify(n))
+    expect(stableStringify(n)).toBe('42')
+  })
+
+  test('new String at top level → string primitive', () => {
+    const s = new String('hello')
+    expect(stableStringify(s)).toBe(JSON.stringify(s))
+    expect(stableStringify(s)).toBe('"hello"')
+  })
+
+  test('new Boolean at top level → boolean primitive', () => {
+    const b = new Boolean(true)
+    expect(stableStringify(b)).toBe(JSON.stringify(b))
+    expect(stableStringify(b)).toBe('true')
+  })
+
+  test('new Boolean(false) at top level → false', () => {
+    const b = new Boolean(false)
+    expect(stableStringify(b)).toBe(JSON.stringify(b))
+    expect(stableStringify(b)).toBe('false')
+  })
+
+  test('boxed wrappers as object values → primitives + sorted keys', () => {
+    const input = {
+      z: new Number(1),
+      a: new String('x'),
+      m: new Boolean(false),
+    }
+    expect(stableStringify(input)).toBe('{"a":"x","m":false,"z":1}')
+    // Native form: same primitive shape (without sort guarantee).
+    expect(JSON.parse(stableStringify(input))).toEqual(JSON.parse(JSON.stringify(input)))
+  })
+})
+
+describe('stableStringify — cycles vs DAGs', () => {
+  test('top-level cycle throws TypeError (regression guard)', () => {
+    const obj: Record<string, unknown> = { a: 1 }
+    obj.self = obj
+    expect(() => stableStringify(obj)).toThrow(TypeError)
+  })
+
+  test('deep cycle throws TypeError', () => {
+    const a: Record<string, unknown> = { name: 'a' }
+    const b: Record<string, unknown> = { name: 'b' }
+    a.next = b
+    b.back = a
+    expect(() => stableStringify(a)).toThrow(TypeError)
+  })
+
+  test('toJSON returning an ancestor still triggers the cycle check', () => {
+    type Node = { name: string; child?: { toJSON(): Node } }
+    const parent: Node = { name: 'parent' }
+    parent.child = {
+      toJSON() {
+        return parent
+      },
+    }
+    expect(() => stableStringify(parent)).toThrow(TypeError)
+  })
+
+  test('DAG (same object referenced twice via different keys) does NOT throw', () => {
+    const shared = { v: 1 }
+    const input = { left: shared, right: shared }
+    expect(() => stableStringify(input)).not.toThrow()
+    expect(stableStringify(input)).toBe('{"left":{"v":1},"right":{"v":1}}')
+  })
+
+  test('DAG of arrays does NOT throw', () => {
+    const shared = [1, 2, 3]
+    const input = { a: shared, b: shared }
+    expect(() => stableStringify(input)).not.toThrow()
+    expect(stableStringify(input)).toBe('{"a":[1,2,3],"b":[1,2,3]}')
+  })
+})
+
+describe('sortKeysDeep — same toJSON/unbox semantics', () => {
+  test('returns the post-toJSON, post-unbox sorted shape', () => {
+    const d = new Date('2024-01-02T03:04:05.678Z')
+    const out = sortKeysDeep({ z: 1, a: new Number(7), when: d }) as Record<
+      string,
+      unknown
+    >
+    expect(out).toEqual({ a: 7, when: d.toISOString(), z: 1 })
+    // Key order in the returned object is lexicographic.
+    expect(Object.keys(out)).toEqual(['a', 'when', 'z'])
+  })
+})
--- a/src/utils/stableStringify.ts
+++ b/src/utils/stableStringify.ts
@@ -0,0 +1,132 @@
+/**
+ * Deterministic JSON serialization.
+ *
+ * WHY: OpenAI / Kimi / DeepSeek / Codex all use **implicit prefix caching**
+ * — the server hashes the request prefix and reuses cached reasoning if
+ * the bytes match exactly. Even a trivial key-order difference between
+ * two otherwise-identical requests invalidates the hash and forces a
+ * full re-parse.
+ *
+ * This is also a pre-requisite for Anthropic / Bedrock / Vertex
+ * `cache_control` breakpoints: ephemeral cache entries match on exact
+ * content, so a re-ordered object literal busts the breakpoint.
+ *
+ * `JSON.stringify` is nondeterministic across engines and across
+ * successive iterations when objects carry keys added at different
+ * times (V8 preserves insertion order, which is the common failure
+ * mode when building a body from spread-merged configs).
+ *
+ * This helper recursively sorts object keys. Arrays preserve order
+ * (element order IS semantically significant in message/content arrays).
+ *
+ * Complements `sortKeysDeep` in src/services/remoteManagedSettings and
+ * src/services/policyLimits. Those two are INTENTIONALLY separate:
+ *   - remoteManagedSettings: matches Python `json.dumps(sort_keys=True)`
+ *     byte-for-byte to validate server-computed checksums. Must NOT
+ *     drop undefined (Python preserves null).
+ *   - policyLimits: uses `localeCompare` (keeps legacy behavior; locale-
+ *     sensitive but stable for a given runtime).
+ *   - this module (stableStringify): byte-identity for API body caching.
+ *     Drops undefined to match `JSON.stringify` — the openaiShim/codexShim
+ *     body is always downstream of `JSON.stringify` semantics.
+ * Do not consolidate without auditing the 3 callers — each has a
+ * different server-compat contract.
+ */
+
+/**
+ * Returns a byte-stable JSON string representation.
+ * - Object keys are emitted in lexicographic order at every depth.
+ * - Array element order is preserved.
+ * - Undefined values are dropped (matching `JSON.stringify`).
+ * - Indentation matches the `space` argument (0 by default → compact).
+ *
+ * Native `JSON.stringify` pre-processing is preserved before sorting:
+ *   - `toJSON(key)` is invoked on objects that define it (own or
+ *     inherited — covers `Date`, `URL`, and any user class). The `key`
+ *     argument is the property name for nested object values, the array
+ *     index as a string for array elements, and `''` for the top-level
+ *     call, matching native semantics.
+ *   - Boxed primitive wrappers (`new Number(...)`, `new String(...)`,
+ *     `new Boolean(...)`) are unboxed to their primitive form.
+ * Both happen BEFORE the array/object branches dispatch, so the value
+ * actually walked is the post-conversion form. If `toJSON` returns
+ * `undefined`, the value is dropped from its parent (matching native
+ * `JSON.stringify`).
+ *
+ * Single-pass: `deepSort` walks the (possibly converted) value tree
+ * once, building a sorted clone. A `WeakSet` of ancestors tracks the
+ * current path through the object graph so that circular references
+ * throw `TypeError` (same contract as native `JSON.stringify`). The
+ * cycle check runs on the post-`toJSON` value, so a `toJSON` impl that
+ * returns an ancestor still throws. Ancestors are always removed in a
+ * `finally` block when unwinding out of each object branch (even on
+ * exception), so DAG inputs — where the same object is reachable via
+ * multiple keys — are handled correctly and do not throw.
+ */
+export function stableStringify(value: unknown, space?: number): string {
+  return JSON.stringify(deepSort(value, new WeakSet(), ''), null, space)
+}
+
+/**
+ * Returns a deep-sorted clone of the input: object keys lexicographic
+ * at every depth, arrays preserved. Useful when callers need to feed
+ * the sorted shape into a downstream serializer (e.g., when they must
+ * call `JSON.stringify` with a custom spacing or replacer).
+ *
+ * Applies the same `toJSON(key)` invocation and primitive-wrapper
+ * unboxing as `stableStringify`, so the returned shape mirrors what
+ * native `JSON.stringify` would have walked.
+ */
+export function sortKeysDeep<T>(value: T): T {
+  return deepSort(value, new WeakSet(), '') as T
+}
+
+function deepSort(
+  value: unknown,
+  ancestors: WeakSet<object>,
+  key: string,
+): unknown {
+  // Step 1: invoke toJSON(key) if present — matches native pre-processing.
+  if (
+    value !== null &&
+    typeof value === 'object' &&
+    typeof (value as { toJSON?: unknown }).toJSON === 'function'
+  ) {
+    value = (value as { toJSON: (k: string) => unknown }).toJSON(key)
+  }
+
+  // Step 2: unbox primitive wrappers.
+  if (value instanceof Number) value = Number(value)
+  else if (value instanceof String) value = String(value)
+  else if (value instanceof Boolean) value = Boolean(value.valueOf())
+
+  // Step 3: primitives short-circuit (post-toJSON the value may now be one).
+  if (value === null || typeof value !== 'object') return value
+
+  // Step 4: arrays — element key is the index as a string.
+  if (Array.isArray(value)) {
+    return value.map((v, i) => deepSort(v, ancestors, String(i)))
+  }
+
+  // Step 5: cycle check on the post-toJSON value.
+  if (ancestors.has(value as object)) {
+    throw new TypeError('Converting circular structure to JSON')
+  }
+  ancestors.add(value as object)
+  try {
+    const sorted: Record<string, unknown> = {}
+    for (const k of Object.keys(value as Record<string, unknown>).sort()) {
+      const child = deepSort(
+        (value as Record<string, unknown>)[k],
+        ancestors,
+        k,
+      )
+      if (child === undefined) continue
+      sorted[k] = child
+    }
+    return sorted
+  } finally {
+    ancestors.delete(value as object)
+  }
+}
+
--- a/src/utils/streamingTokenCounter.test.ts
+++ b/src/utils/streamingTokenCounter.test.ts
@@ -0,0 +1,165 @@
+import { describe, expect, it } from 'bun:test'
+import { StreamingTokenCounter } from './streamingTokenCounter.js'
+
+describe('StreamingTokenCounter', () => {
+  describe('start', () => {
+    it('resets state and sets input tokens', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start(1000)
+      expect(counter.total).toBe(1000)
+    })
+  })
+
+  describe('addChunk', () => {
+    it('accumulates content', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start(500)
+      counter.addChunk('Hello world ')
+      expect(counter.characterCount).toBe(12)
+    })
+
+    it('accumulates multiple chunks', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start(500)
+      counter.addChunk('Hello ')
+      counter.addChunk('world ')
+      expect(counter.characterCount).toBeGreaterThanOrEqual(10)
+    })
+
+    it('handles empty chunks', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start(50)
+      counter.addChunk(undefined)
+      counter.addChunk('')
+      expect(counter.output).toBe(0)
+      expect(counter.total).toBe(50)
+    })
+
+    it('updates cached token count at word boundaries during streaming', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start(100)
+      counter.addChunk('Hello ')
+      const afterFirst = counter.output
+      expect(afterFirst).toBeGreaterThan(0)
+      counter.addChunk('world ')
+      const afterSecond = counter.output
+      expect(afterSecond).toBeGreaterThan(afterFirst)
+    })
+
+    it('advances count past space after word boundary', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start()
+      counter.addChunk('Hello ') // counts Hello
+      const count1 = counter.output
+
+      counter.addChunk('world') // short chunk, no space - shouldn't advance
+      const count2 = counter.output
+      expect(count2).toBe(count1)
+
+      counter.addChunk(' ') // space triggers count
+      const count3 = counter.output
+      expect(count3).toBeGreaterThan(count2)
+    })
+  })
+
+  describe('finalize', () => {
+    it('counts all content after finalize', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start(500)
+      counter.addChunk('Hello world')
+      counter.finalize()
+      expect(counter.output).toBeGreaterThan(0)
+    })
+
+    it('counts tokens after finalize', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start(100)
+      counter.addChunk('Hello ')
+      counter.addChunk('world ')
+      counter.finalize()
+      expect(counter.output).toBeGreaterThan(0)
+      expect(counter.total).toBe(100 + counter.output)
+    })
+  })
+
+  describe('total', () => {
+    it('sums input and output after finalize', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start(500)
+      counter.addChunk('Test content ')
+      counter.finalize()
+      expect(counter.total).toBeGreaterThanOrEqual(500)
+    })
+  })
+
+  describe('tokensPerSecond', () => {
+    it('calculates tokens per second', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start()
+      counter.addChunk('123456789 ')
+      expect(typeof counter.tokensPerSecond).toBe('number')
+    })
+  })
+
+  describe('estimateRemainingTokens', () => {
+    it('returns positive when under target', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start(500)
+      counter.addChunk('Hello ')
+      counter.finalize()
+      expect(counter.estimateRemainingTokens(1000)).toBeGreaterThan(0)
+    })
+
+    it('returns 0 when at or over target', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start(500)
+      counter.addChunk('Hello ')
+      counter.finalize()
+      expect(counter.estimateRemainingTokens(1)).toBe(0)
+    })
+  })
+
+  describe('estimateRemainingTimeMs', () => {
+    it('returns estimate based on rate', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start()
+      counter.addChunk('Hello world ')
+      expect(counter.estimateRemainingTimeMs(100)).toBeGreaterThanOrEqual(0)
+    })
+  })
+
+  describe('characterCount', () => {
+    it('returns accumulated character count', () => {
+      const counter = new StreamingTokenCounter()
+      counter.addChunk('Hello')
+      expect(counter.characterCount).toBe(5)
+    })
+
+    it('accumulates content from chunks', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start(100)
+      counter.addChunk('Hello ')
+      counter.addChunk('world ')
+      expect(counter.characterCount).toBeGreaterThan(0)
+    })
+  })
+
+  describe('reset', () => {
+    it('clears all state', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start(500)
+      counter.addChunk('Hello world ')
+      counter.reset()
+      expect(counter.characterCount).toBe(0)
+    })
+
+    it('resets correctly', () => {
+      const counter = new StreamingTokenCounter()
+      counter.start(100)
+      counter.addChunk('test ')
+      counter.reset()
+      expect(counter.characterCount).toBe(0)
+      expect(counter.total).toBe(0)
+    })
+  })
+})
--- a/src/utils/streamingTokenCounter.ts
+++ b/src/utils/streamingTokenCounter.ts
@@ -0,0 +1,133 @@
+/**
+ * Streaming Token Counter - Accurate token counting during generation
+ * 
+ * Accumulates raw content and counts tokens at consistent boundaries
+ * to avoid dependency on arbitrary chunk boundaries.
+ */
+
+import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
+
+export class StreamingTokenCounter {
+  private inputTokens = 0
+  private accumulatedContent = ''
+  private lastCountedIndex = 0
+  private cachedOutputTokens = 0
+  private startTime = 0
+
+  /**
+   * Start tracking a new stream
+   * @param initialInputTokens - Token count for system prompt + history
+   */
+  start(initialInputTokens?: number): void {
+    this.reset()
+    this.startTime = Date.now()
+    this.inputTokens = initialInputTokens ?? 0
+  }
+
+  /**
+   * Add content from a streaming chunk
+   * Accumulates raw content, counting only at word boundaries
+   * to avoid instability from arbitrary chunk boundaries.
+   */
+  addChunk(deltaContent?: string): void {
+    if (deltaContent) {
+      this.accumulatedContent += deltaContent
+      this.recountAtWordBoundary()
+    }
+  }
+
+  /**
+   * Recount tokens at word boundaries for stability.
+   * Only counts after whitespace to avoid mid-word splits.
+   */
+  private recountAtWordBoundary(): void {
+    const content = this.accumulatedContent
+    const unprocessedContent = content.slice(this.lastCountedIndex)
+
+    const searchStart = unprocessedContent[0] === ' ' ? 1 : 0
+    const nextSpaceIndex = unprocessedContent.indexOf(' ', searchStart)
+
+    const shouldCount =
+      nextSpaceIndex > 0 ||
+      unprocessedContent.length > 50 ||
+      unprocessedContent.length === 0
+
+    let boundaryIndex: number
+    if (nextSpaceIndex > 0) {
+      boundaryIndex = this.lastCountedIndex + nextSpaceIndex
+    } else if (unprocessedContent.length > 50) {
+      boundaryIndex = content.length
+    } else {
+      return
+    }
+
+    const toCount = content.slice(0, boundaryIndex)
+    this.cachedOutputTokens = roughTokenCountEstimation(toCount)
+    this.lastCountedIndex = boundaryIndex
+  }
+
+  /**
+   * Flush remaining content and finalize count.
+   * Call this when stream completes.
+   */
+  finalize(): number {
+    if (this.accumulatedContent.length > this.lastCountedIndex) {
+      this.cachedOutputTokens = roughTokenCountEstimation(this.accumulatedContent)
+      this.lastCountedIndex = this.accumulatedContent.length
+    }
+    return this.cachedOutputTokens
+  }
+
+  /** Get total tokens (input + output) */
+  get total(): number {
+    return this.inputTokens + this.cachedOutputTokens
+  }
+
+  /** Get output tokens only */
+  get output(): number {
+    return this.cachedOutputTokens
+  }
+
+  /** Get elapsed time in milliseconds */
+  get elapsedMs(): number {
+    return this.startTime > 0 ? Date.now() - this.startTime : 0
+  }
+
+  /** Get tokens per second generation rate */
+  get tokensPerSecond(): number {
+    if (this.elapsedMs === 0) return 0
+    return (this.cachedOutputTokens / this.elapsedMs) * 1000
+  }
+
+  /** Get estimated total generation time based on current rate */
+  getEstimatedGenerationTimeMs(): number {
+    if (this.tokensPerSecond === 0) return 0
+    return Math.round((this.cachedOutputTokens / this.tokensPerSecond) * 1000)
+  }
+
+  /** Estimate remaining tokens until target output size */
+  estimateRemainingTokens(targetOutputTokens: number): number {
+    return Math.max(0, targetOutputTokens - this.cachedOutputTokens)
+  }
+
+  /** Estimate remaining time based on target output tokens */
+  estimateRemainingTimeMs(targetOutputTokens: number): number {
+    if (this.tokensPerSecond === 0) return 0
+    const remaining = this.estimateRemainingTokens(targetOutputTokens)
+    return Math.round((remaining / this.tokensPerSecond) * 1000)
+  }
+
+  /** Get character count for raw content */
+  get characterCount(): number {
+    return this.accumulatedContent.length
+  }
+
+  /** Reset counter */
+  reset(): void {
+    this.inputTokens = 0
+    this.accumulatedContent = ''
+    this.lastCountedIndex = 0
+    this.cachedOutputTokens = 0
+    this.startTime = 0
+  }
+}
--- a/src/utils/thinking.test.ts
+++ b/src/utils/thinking.test.ts
@@ -1,5 +1,12 @@
-import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
-import { modelSupportsThinking } from './thinking.js'
+import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'
+import { resetSettingsCache } from './settings/settingsCache.js'
+
+mock.module('./model/providers.js', () => ({
+  getAPIProvider: () =>
+    process.env.CLAUDE_CODE_USE_OPENAI === '1' ? 'openai' : 'firstParty',
+}))
+
+const { modelSupportsThinking } = await import('./thinking.js')

 const ENV_KEYS = [
  'CLAUDE_CODE_USE_OPENAI',
@@ -14,6 +21,13 @@ const ENV_KEYS = [
  'OPENAI_MODEL',
  'NVIDIA_NIM',
  'MINIMAX_API_KEY',
+  'XAI_API_KEY',
+  'ANTHROPIC_DEFAULT_OPUS_MODEL',
+  'ANTHROPIC_DEFAULT_OPUS_MODEL_SUPPORTED_CAPABILITIES',
+  'ANTHROPIC_DEFAULT_SONNET_MODEL',
+  'ANTHROPIC_DEFAULT_SONNET_MODEL_SUPPORTED_CAPABILITIES',
+  'ANTHROPIC_DEFAULT_HAIKU_MODEL',
+  'ANTHROPIC_DEFAULT_HAIKU_MODEL_SUPPORTED_CAPABILITIES',
  'USER_TYPE',
 ]

@@ -24,6 +38,7 @@ beforeEach(() => {
    originalEnv[key] = process.env[key]
    delete process.env[key]
  }
+  resetSettingsCache()
 })

 afterEach(() => {
@@ -34,6 +49,7 @@ afterEach(() => {
      process.env[key] = originalEnv[key]
    }
  }
+  resetSettingsCache()
 })

 describe('modelSupportsThinking — Z.AI GLM', () => {
@@ -61,4 +77,19 @@ describe('modelSupportsThinking — Z.AI GLM', () => {

    expect(modelSupportsThinking('glm-50')).toBe(false)
  })
-})
+
+  test('does not reuse stale capability overrides after env changes', () => {
+    process.env.CLAUDE_CODE_USE_OPENAI = '1'
+    process.env.OPENAI_BASE_URL = 'https://dashscope.aliyuncs.com/compatible-mode/v1'
+    process.env.ANTHROPIC_DEFAULT_SONNET_MODEL = 'GLM-5.1'
+    process.env.ANTHROPIC_DEFAULT_SONNET_MODEL_SUPPORTED_CAPABILITIES = ''
+
+    expect(modelSupportsThinking('GLM-5.1')).toBe(false)
+
+    delete process.env.ANTHROPIC_DEFAULT_SONNET_MODEL
+    delete process.env.ANTHROPIC_DEFAULT_SONNET_MODEL_SUPPORTED_CAPABILITIES
+    process.env.OPENAI_BASE_URL = 'https://api.z.ai/api/coding/paas/v4'
+
+    expect(modelSupportsThinking('GLM-5.1')).toBe(true)
+  })
+})
--- a/src/utils/thinking.ts
+++ b/src/utils/thinking.ts
@@ -131,7 +131,7 @@ export function modelSupportsAdaptiveThinking(model: string): boolean {
  }
  const canonical = getCanonicalName(model)
  // Supported by a subset of Claude 4 models
-  if (canonical.includes('opus-4-6') || canonical.includes('sonnet-4-6')) {
+  if (canonical.includes('opus-4-7') || canonical.includes('opus-4-6') || canonical.includes('sonnet-4-6')) {
    return true
  }
  // Exclude any other known legacy models (allowlist above catches 4-6 variants first)
--- a/src/utils/user.test.ts
+++ b/src/utils/user.test.ts
@@ -10,9 +10,12 @@ function installCommonMocks(options?: {
  oauthEmail?: string
  gitEmail?: string
 }) {
-  mock.module('../bootstrap/state.js', () => ({
-    getSessionId: () => 'session-test',
-  }))
+  // NOTE: Do NOT mock ../bootstrap/state.js here.
+  // mock.module() is process-global in bun:test and mock.restore() does NOT
+  // undo it. Mocking state.js leaks getSessionId = () => 'session-test' into
+  // every other test file that imports state.js (e.g. SDK CON-1 tests).
+  // The dynamic import (importFreshUserModule) will use the real state.js,
+  // which is fine — these tests only assert email, not sessionId.

  mock.module('./auth.js', () => ({
    getOauthAccountInfo: () =>
--- a/src/utils/validation.ts
+++ b/src/utils/validation.ts
@@ -0,0 +1,54 @@
+/**
+ * Shared validation utilities for SDK-facing APIs.
+ */
+
+/**
+ * Validate an array of items using a per-item validator.
+ * Throws TypeError with the index and missing field if validation fails.
+ */
+export function validateArrayOf<T>(
+  items: unknown[],
+  validator: (item: unknown, index: number) => T,
+  label: string,
+): T[] {
+  if (!Array.isArray(items)) {
+    throw new TypeError(`${label}: expected an array, got ${typeof items}`)
+  }
+  return items.map((item, i) => {
+    try {
+      return validator(item, i)
+    } catch (err) {
+      if (err instanceof TypeError) {
+        throw new TypeError(`${label}: item at index ${i} - ${err.message}`)
+      }
+      throw err
+    }
+  })
+}
+
+/**
+ * Assert that a value is a non-empty string.
+ */
+export function assertNonEmptyString(value: unknown, field: string): asserts value is string {
+  if (typeof value !== 'string' || value.length === 0) {
+    throw new TypeError(`missing or empty '${field}' (expected non-empty string)`)
+  }
+}
+
+/**
+ * Assert that a value is a non-null object (but not an array).
+ */
+export function assertObject(value: unknown, field: string): asserts value is Record<string, unknown> {
+  if (typeof value !== 'object' || value === null || Array.isArray(value)) {
+    throw new TypeError(`missing or invalid '${field}' (expected object)`)
+  }
+}
+
+/**
+ * Assert that a value is a function.
+ */
+export function assertFunction(value: unknown, field: string): asserts value is (...args: any[]) => any {
+  if (typeof value !== 'function') {
+    throw new TypeError(`missing or invalid '${field}' (expected function)`)
+  }
+}
--- a/tests/sdk/generated-types.test.ts
+++ b/tests/sdk/generated-types.test.ts
@@ -0,0 +1,279 @@
+import { describe, test, expect } from 'bun:test'
+import {
+  SDKAssistantMessageSchema,
+  SDKSystemMessageSchema,
+  SDKCompactBoundaryMessageSchema,
+  SDKMessageSchema,
+  SDKUserMessageSchema,
+  SDKResultMessageSchema,
+  SDKResultSuccessSchema,
+  SDKResultErrorSchema,
+  SDKSessionInfoSchema,
+  PermissionModeSchema,
+  ThinkingConfigSchema,
+  AgentDefinitionSchema,
+  McpServerStatusSchema,
+  ModelUsageSchema,
+  FastModeStateSchema,
+  HookInputSchema,
+  ExitReasonSchema,
+} from '../../src/entrypoints/sdk/coreSchemas.js'
+import { z } from 'zod/v4'
+
+/**
+ * Tests for generated SDK types from Zod schemas.
+ *
+ * These tests verify that:
+ * 1. All schemas materialize correctly (no lazy errors)
+ * 2. Schemas can parse valid data
+ * 3. Key discriminated fields are correct
+ * 4. The full SDKMessage union accepts all message variants
+ */
+describe('SDK Zod schemas (type generation source)', () => {
+  test('SDKAssistantMessageSchema accepts valid data', () => {
+    const schema = SDKAssistantMessageSchema()
+    const result = schema.safeParse({
+      type: 'assistant',
+      message: { role: 'assistant', content: [{ type: 'text', text: 'hi' }] },
+      parent_tool_use_id: null,
+      uuid: '12345678-1234-1234-1234-123456789012',
+      session_id: '12345678-1234-1234-1234-123456789012',
+    })
+    expect(result.success).toBe(true)
+  })
+
+  test('SDKSystemMessageSchema accepts valid data', () => {
+    const schema = SDKSystemMessageSchema()
+    const result = schema.safeParse({
+      type: 'system',
+      subtype: 'init',
+      apiKeySource: 'user',
+      claude_code_version: '0.3.0',
+      cwd: '/home/user/project',
+      tools: ['Read', 'Write'],
+      mcp_servers: [{ name: 'test', status: 'connected' }],
+      model: 'claude-sonnet-4-6',
+      permissionMode: 'default',
+      slash_commands: [],
+      output_style: 'default',
+      skills: [],
+      plugins: [],
+      uuid: '12345678-1234-1234-1234-123456789012',
+      session_id: '12345678-1234-1234-1234-123456789012',
+    })
+    expect(result.success).toBe(true)
+  })
+
+  test('SDKCompactBoundaryMessageSchema accepts valid data', () => {
+    const schema = SDKCompactBoundaryMessageSchema()
+    const result = schema.safeParse({
+      type: 'system',
+      subtype: 'compact_boundary',
+      compact_metadata: {
+        trigger: 'manual',
+        pre_tokens: 1000,
+      },
+      uuid: '12345678-1234-1234-1234-123456789012',
+      session_id: '12345678-1234-1234-1234-123456789012',
+    })
+    expect(result.success).toBe(true)
+  })
+
+  test('SDKCompactBoundaryMessageSchema accepts preserved_segment', () => {
+    const schema = SDKCompactBoundaryMessageSchema()
+    const result = schema.safeParse({
+      type: 'system',
+      subtype: 'compact_boundary',
+      compact_metadata: {
+        trigger: 'auto',
+        pre_tokens: 50000,
+        preserved_segment: {
+          head_uuid: 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa',
+          anchor_uuid: 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb',
+          tail_uuid: 'cccccccc-cccc-cccc-cccc-cccccccccccc',
+        },
+      },
+      uuid: '12345678-1234-1234-1234-123456789012',
+      session_id: '12345678-1234-1234-1234-123456789012',
+    })
+    expect(result.success).toBe(true)
+  })
+
+  test('SDKUserMessageSchema accepts valid data', () => {
+    const schema = SDKUserMessageSchema()
+    const result = schema.safeParse({
+      type: 'user',
+      message: { role: 'user', content: 'hello' },
+      parent_tool_use_id: null,
+    })
+    expect(result.success).toBe(true)
+  })
+
+  test('SDKResultSuccessSchema accepts valid data', () => {
+    const schema = SDKResultSuccessSchema()
+    const result = schema.safeParse({
+      type: 'result',
+      subtype: 'success',
+      duration_ms: 1500,
+      duration_api_ms: 1200,
+      is_error: false,
+      num_turns: 1,
+      result: 'Done',
+      stop_reason: 'end_turn',
+      total_cost_usd: 0.01,
+      usage: { input_tokens: 100, output_tokens: 50 },
+      modelUsage: {},
+      permission_denials: [],
+      uuid: '12345678-1234-1234-1234-123456789012',
+      session_id: '12345678-1234-1234-1234-123456789012',
+    })
+    expect(result.success).toBe(true)
+  })
+
+  test('SDKResultErrorSchema accepts valid data', () => {
+    const schema = SDKResultErrorSchema()
+    const result = schema.safeParse({
+      type: 'result',
+      subtype: 'error_during_execution',
+      duration_ms: 100,
+      duration_api_ms: 80,
+      is_error: true,
+      num_turns: 1,
+      stop_reason: null,
+      total_cost_usd: 0.001,
+      usage: { input_tokens: 50, output_tokens: 10 },
+      modelUsage: {},
+      permission_denials: [],
+      errors: ['Something went wrong'],
+      uuid: '12345678-1234-1234-1234-123456789012',
+      session_id: '12345678-1234-1234-1234-123456789012',
+    })
+    expect(result.success).toBe(true)
+  })
+
+  test('SDKMessageSchema accepts all message types', () => {
+    const schema = SDKMessageSchema()
+
+    const messages = [
+      {
+        type: 'assistant',
+        message: {},
+        parent_tool_use_id: null,
+        uuid: '12345678-1234-1234-1234-123456789012',
+        session_id: '12345678-1234-1234-1234-123456789012',
+      },
+      {
+        type: 'user',
+        message: {},
+        parent_tool_use_id: null,
+      },
+      {
+        type: 'system',
+        subtype: 'init',
+        apiKeySource: 'user',
+        claude_code_version: '0.3.0',
+        cwd: '/tmp',
+        tools: [],
+        mcp_servers: [],
+        model: 'sonnet',
+        permissionMode: 'default',
+        slash_commands: [],
+        output_style: 'default',
+        skills: [],
+        plugins: [],
+        uuid: '12345678-1234-1234-1234-123456789012',
+        session_id: '12345678-1234-1234-1234-123456789012',
+      },
+      {
+        type: 'system',
+        subtype: 'compact_boundary',
+        compact_metadata: { trigger: 'manual', pre_tokens: 100 },
+        uuid: '12345678-1234-1234-1234-123456789012',
+        session_id: '12345678-1234-1234-1234-123456789012',
+      },
+    ]
+
+    for (const msg of messages) {
+      const result = schema.safeParse(msg)
+      expect(result.success).toBe(true)
+    }
+  })
+
+  test('SDKSessionInfoSchema accepts valid data', () => {
+    const schema = SDKSessionInfoSchema()
+    const result = schema.safeParse({
+      sessionId: '12345678-1234-1234-1234-123456789012',
+      summary: 'Test session',
+      lastModified: Date.now(),
+    })
+    expect(result.success).toBe(true)
+  })
+
+  test('PermissionModeSchema accepts valid modes', () => {
+    const schema = PermissionModeSchema()
+    const modes = ['default', 'acceptEdits', 'bypassPermissions', 'plan', 'dontAsk']
+    for (const mode of modes) {
+      expect(schema.safeParse(mode).success).toBe(true)
+    }
+    expect(schema.safeParse('invalid').success).toBe(false)
+  })
+
+  test('ThinkingConfigSchema accepts all variants', () => {
+    const schema = ThinkingConfigSchema()
+    expect(schema.safeParse({ type: 'adaptive' }).success).toBe(true)
+    expect(schema.safeParse({ type: 'enabled' }).success).toBe(true)
+    expect(schema.safeParse({ type: 'enabled', budgetTokens: 10000 }).success).toBe(true)
+    expect(schema.safeParse({ type: 'disabled' }).success).toBe(true)
+    expect(schema.safeParse({ type: 'unknown' }).success).toBe(false)
+  })
+
+  test('FastModeStateSchema accepts valid states', () => {
+    const schema = FastModeStateSchema()
+    expect(schema.safeParse('off').success).toBe(true)
+    expect(schema.safeParse('cooldown').success).toBe(true)
+    expect(schema.safeParse('on').success).toBe(true)
+    expect(schema.safeParse('unknown').success).toBe(false)
+  })
+
+  test('ExitReasonSchema accepts valid reasons', () => {
+    const schema = ExitReasonSchema()
+    const reasons = ['clear', 'resume', 'logout', 'prompt_input_exit', 'other', 'bypass_permissions_disabled']
+    for (const r of reasons) {
+      expect(schema.safeParse(r).success).toBe(true)
+    }
+    expect(schema.safeParse('invalid').success).toBe(false)
+  })
+
+  test('ModelUsageSchema accepts valid data', () => {
+    const schema = ModelUsageSchema()
+    const result = schema.safeParse({
+      inputTokens: 100,
+      outputTokens: 50,
+      cacheReadInputTokens: 200,
+      cacheCreationInputTokens: 300,
+      webSearchRequests: 1,
+      costUSD: 0.01,
+      contextWindow: 200000,
+      maxOutputTokens: 8192,
+    })
+    expect(result.success).toBe(true)
+  })
+
+  test('AgentDefinitionSchema accepts valid data', () => {
+    const schema = AgentDefinitionSchema()
+    const result = schema.safeParse({
+      description: 'Test agent',
+      prompt: 'You are a test agent',
+    })
+    expect(result.success).toBe(true)
+  })
+
+  test('McpServerStatusSchema accepts valid data', () => {
+    const schema = McpServerStatusSchema()
+    const result = schema.safeParse({
+      name: 'test-server',
+      status: 'connected',
+    })
+    expect(result.success).toBe(true)
+  })
+})
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -1,10 +1,14 @@
 {
  "compilerOptions": {
-    "target": "ES2022",
+    "target": "ES2023",
+    "lib": ["ES2023", "DOM"],
    "module": "ESNext",
    "moduleResolution": "bundler",
    "jsx": "react-jsx",
    "strict": true,
+    "noImplicitAny": false,
+    "noEmit": true,
+    "allowImportingTsExtensions": true,
    "esModuleInterop": true,
    "skipLibCheck": true,
    "forceConsistentCasingInFileNames": true,
Author	SHA1	Message	Date
KRATOS	ee0d930093	fix(ripgrep): use @vscode/ripgrep package as the builtin source (#911 ) (#932 ) The vendored-binary lookup at vendor/ripgrep/<arch>-<platform>/rg never resolved in this fork — that directory does not ship — so users without a system rg had no working fallback. Switch to the @vscode/ripgrep package so Microsoft maintains the platform/arch matrix and the binary is delivered via npm. - src/utils/ripgrep.ts: replace hand-rolled vendor-path resolution with rgPath from @vscode/ripgrep. Lazy require so a missing package falls through to the system rg branch instead of throwing at import. Drop builtinExists from the config args; builtinCommand is now a string-or-null. The system override (USE_BUILTIN_RIPGREP=0), the Bun-compiled standalone embedded mode, the macOS codesign hook, and all retry/timeout/error logic are preserved untouched. - scripts/build.ts: mark @vscode/ripgrep as external. The package resolves rgPath via __dirname at runtime, so bundling would freeze the build host's absolute path into dist/cli.mjs. - src/utils/ripgrep.test.ts: update for the new config shape and add tests covering USE_BUILTIN_RIPGREP=0, embedded mode, last-resort fallback, and null builtin path. Tested locally on Linux (Bun 1.3.13). macOS (codesign hook) and Windows (rg.exe extension) need contributor verification.	2026-04-30 00:58:46 +08:00
ArkhAngelLifeJiggy	0ca4333537	feat: add streaming token counter (#797 ) * feat: add streaming token counter - Add StreamingTokenCounter for real-time token counting during generation - Tracks output tokens as they arrive from stream - Calculates tokens per second rate - Add tests (4 passing) PR 4A: Streaming Token Counter (Features 1.2, 1.7) * refactor: move StreamingTokenCounter to separate file - Extract StreamingTokenCounter from tokens.ts to streamingTokenCounter.ts - Add getEstimatedRemainingTokens() method - Update test import * fix: word-boundary token counting for stable stream totals - Accumulate raw content, count only at word boundaries - Eliminates instability from arbitrary chunk boundaries - Add finalize() to flush remaining content on stream end - Add characterCount getter for raw content tracking - Rename getEstimatedRemainingTokens -> getEstimatedGenerationTimeMs - Add comprehensive tests * fix: update streamingTokens test for word-boundary API - Add finalize() call before checking output tokens - Use characterCount for interim checks - Add spaces to trigger word boundary counting * fix: add estimateRemainingTokens/Time methods - Add estimateRemainingTokens(target) method - Add estimateRemainingTimeMs(target) method - Covers non-blocking: now properly estimates remaining tokens * fix: PR 797 - fix word boundary counting, consolidate tests Blockers (Vasanthdev2004): - recountAtWordBoundary now searches forward from lastCountedIndex+1 - Finds NEXT space after already-counted region, not before it - Provides accurate live token counts during streaming, not just finalize() Non-blocking (gnanam1990): - Delete streamingTokens.test.ts, merge tests into streamingTokenCounter.test.ts - Added interim-counting test to verify counting updates during streaming * fix: PR 797 - fix word boundary advancement after space Blocking: - Fix recountAtWordBoundary to skip past space when searching for next boundary - After counting at a space, indexOf(' ') returns 0 (the space itself) - Now starts search from index 1 to find the NEXT word boundary - Short chunks now properly trigger count advancement Non-blocking: - Add test verifying count increases after each word boundary - Add test for space-skipping behavior	2026-04-29 16:17:00 +08:00
ArkhAngelLifeJiggy	92d297e50e	feat: context preloading and hybrid context strategy (#860 ) * feat: context preloading and hybrid context strategy PR 2D - Section 2.7, 2.8: - Add contextPreload.ts with pattern-based prediction - Add hybridContextStrategy.ts with cache/fresh balancing - Optimize for cost vs accuracy - Add comprehensive tests (13 passing) * feat: wire hybrid context strategy into API path - Apply hybrid strategy after normalizeMessagesForAPI - Feature-flag controlled (HYBRID_CONTEXT_STRATEGY) - Optimizes cache/fresh balance for API requests * fix: resolve PR 2D blocking issues - Fix predictContextNeeds self-assign bug (matchedCategory = category) - Add test for non-empty predictedNeed - Preserve conversation tail in hybridStrategy (never drop last 3 messages) - Add comment for hardcoded 200k cap in claude.ts Fixes reviewer feedback from gnanam1990 and Vasanthdev2004 * fix: preserve tool_use/tool_result chains in hybridStrategy - Increase MIN_TAIL to 5 (tool_use -> tool_result -> assistant -> user -> next) - Add getMessageChain() to preserve paired messages - Chains kept together in final selection * fix: PR 860 - tool_use/tool_result pairing and safe token counting Blocking: - getMessageChain() now pairs by tool_use.id (block ID) not msg.message.id - Find tool_use blocks by id, pair with tool_result having matching tool_use_id - Fixes tool_result surviving while paired tool_use dropped - Token counting now includes array content (tool_use, tool_result, thinking) - Not just string content, prevents undercounting prompt size - Deduplicate messages by UUID when combining chains + split + tail - Prevents duplicate messages in final request Non-blocking: - Add regression test for tool_use/tool_result pairing * fix: PR 860 - account for actual structured payload size in token counting Blocking: - getMessageTokenCount now calculates actual token count for structured blocks - tool_use: uses JSON.stringify(input).length / 4 + base - tool_result: counts actual content (string or array of text blocks) - thinking: counts actual thinking text length / 4 - is_error flag adds small overhead Non-blocking: - Add tests for large tool_use input and large thinking blocks	2026-04-29 15:49:46 +08:00
emsanakhchivan	91f93ce615	feat: SDK Foundation — Type Declarations, Errors, and Utilities (#866 ) * feat(sdk): add SDK foundation — type declarations, errors, and utilities Adds standalone SDK building blocks with no SDK source dependencies: - sdk.d.ts: ambient type declarations for SDK bundle - coreSchemas.ts + coreTypes.generated.ts: Zod schemas and generated types - errors.ts: SDK-specific error classes - validation.ts: input validation utilities - messageFilters.ts: extracted message filter logic - handlePromptSubmit.ts: imports from messageFilters - 16 generated-types tests * fix(sdk): narrow assertFunction type from broad Function to callable signature Code review finding: assertFunction used `asserts value is Function` which accepts any function-like value without narrowing. Changed to `(...args: any[]) => any` for better type safety. * fix(sdk): update sdk.d.ts header — manually maintained, not generated Reviewer noted the header said "Generated from index.ts" but no generator produces this file. Updated to "Manually maintained — keep in sync with index.ts". Drift detection added in validate-externals.ts (PR 3). * fix(sdk): align sdk.d.ts types with canonical coreTypes.generated.ts Tighten SDK public type contract to resolve reviewer blockers: - PermissionResult: unknown[] → precise 6-shape discriminated union (addRules/replaceRules/removeRules/setMode/addDirectories/removeDirectories) - SDKSessionInfo: snake_case → camelCase (sessionId, lastModified, etc.) - ForkSessionResult: session_id → sessionId - SDKPermissionRequestMessage: uuid + session_id now required - SDKPermissionTimeoutMessage: added uuid + session_id - SessionMessage: parent_uuid → parentUuid - SDKMessage/SDKUserMessage/SDKResultMessage: replaced loose inline definitions with re-exports from coreTypes.generated.ts --------- Co-authored-by: Ali Alakbarli <ali.alakbarli@users.noreply.github.com>	2026-04-29 14:53:01 +08:00
KRATOS	5943c5c269	fix(input): strip leading ! when entering bash mode (#947 ) The PromptInput onChange handler had two branches for entering bash mode: a single-char path that just toggled the mode and a multi-char paste path that also stripped the leading `!` from the buffer. The single-char path returned without stripping, so typing a bare `!` into empty input switched modes but left the literal `!` visible. Consolidated both paths through a new pure helper `detectModeEntry` that returns the new mode plus the stripped buffer value, so there is no longer a branch where the mode character can leak into the buffer. Fixes #662	2026-04-29 10:29:59 +08:00
Kevin Codex	c0b5535d86	docs: add Atomic Chat partner (#942 ) Co-authored-by: OpenClaude <openclaude@gitlawb.com>	2026-04-28 23:35:25 +08:00
Vasanth T	d321c8fc6a	fix: avoid legacy Windows PasswordVault reads by default (#941 ) * fix: avoid legacy Windows PasswordVault reads by default * fix: isolate model capability override cache --------- Co-authored-by: OpenClaude Worker 3 <worker-3@openclaude.local>	2026-04-28 23:30:48 +08:00
KRATOS	8106880855	fix(typecheck): make `bun run typecheck` actionable on main (#473 ) (#938 ) Issue #473 reported that `bun run typecheck` fails on main with ~4400 errors due to repo-foundation drift, masking branch-specific regressions. Per kevincodex1's guidance ("lets narrow the typecheck scope for now and then we expand step by step") this PR addresses the foundational root causes and brings the error count down 60% so the gate is actionable for branch reviews. Changes: - tsconfig.json: bump target to ES2023 + add lib ["ES2023", "DOM"] so Array.findLast / findLastIndex resolve (kills 41 TS2550 errors). Add `noEmit: true` for typecheck-only mode and `allowImportingTsExtensions: true` (kills 40 TS5097 errors). Set `noImplicitAny: false` because cleaning up TSX-component implicit any is explicitly out of scope per the issue. - src/global.d.ts: ambient declaration for the build-time MACRO global injected by scripts/build.ts via Bun's `define` option (kills 9 TS2304 'Cannot find name MACRO' errors). - src/types/{message,utils,tools}.ts: stubs for the highest-impact missing modules from the partial source snapshot (~21 importers for message alone). Document the snapshot caveat at the top of each stub and reference issue #473 so future readers know they're placeholders. - src/entrypoints/sdk/controlTypes.ts and src/constants/querySource.ts: similar one-file stubs unblocking 18 + 19 importers respectively. - src/entrypoints/agentSdkTypes.ts: append `any`-typed aliases for ~70 SDK names that callers expect on the public surface but that live in stubbed sub-files (PermissionMode, SDKCompactBoundaryMessage, HookEvent, ModelUsage, ModelInfo, etc. — exactly the list from auriti's bug-report enumeration). Verified locally on Linux: - baseline `bunx tsc --noEmit` on stashed main: 4434 errors - with PR applied: 1782 errors (60% drop) - `bun run build`: passes (v0.7.0) - `bun test`: 1632 pass; the 4 remaining failures (StartupScreen, thinking) reproduce on main and are unrelated. - TS2550 (lib): 41 → 0 - TS5097 (.ts imports): 40 → 0 - TS2304 'MACRO': 9 → 0 - TS2307 missing modules: 587 → 325 Remaining errors are localized to specific stubbed modules and can be addressed in smaller follow-up issues, matching the issue's "Definition of done" criterion.	2026-04-28 17:44:26 +08:00
Kevin Codex	4c93a9f9f1	feat: add Opus 4.7 as default model and fix alias/thinking bugs (#928 ) - Add CLAUDE_OPUS_4_7_CONFIG and register it in ALL_MODEL_CONFIGS - Set Opus 4.7 as default for firstParty in getDefaultOpusModel() (3P stays on 4.6 until rollout) - Fix sonnet[1m] → 404 bug: query.ts was passing raw alias to API without resolving via parseUserSpecifiedModel - Add opus-4-7 to modelSupportsAdaptiveThinking so it uses { type: 'adaptive' } not { type: 'enabled' } - Fix duplicate opus47 case and wrong opus46[1m] fallthrough in getPublicModelDisplayName switch - Update user-facing display strings (picker labels, plan mode description) to reference Opus 4.7 - Add 3P fallback suggestion chain for opus-4-7 → opus-4-6 in validateModel Co-authored-by: OpenClaude <openclaude@gitlawb.com>	2026-04-28 17:31:06 +08:00
viudes	6ea3eb6483	feat(api): deterministic request-body serialization via stableStringify (#882 ) * feat(api): deterministic request-body serialization via stableStringify Add `stableStringify` helper that emits JSON with object keys sorted lexicographically at every depth (arrays preserved). Adopt it in the OpenAI-compatible shim and the Codex Responses-API shim for the outgoing request body. WHY: OpenAI / Kimi / DeepSeek / Codex use implicit prefix caching keyed on exact request bytes. Spurious insertion-order differences in spread-merged body objects otherwise invalidate the cache on every turn. Also a pre-requisite for Anthropic `cache_control` breakpoint hits. Byte-equivalent to `JSON.stringify` when keys already happen to be in lexical insertion order, so strictly additive across providers. * fix(api): preserve circular-ref TypeError in stableStringify + cover GitHub fallback Replace two-pass sortingReplacer approach with a single-pass deepSort that tracks ancestor objects via WeakSet, throwing TypeError on cycles (same contract as native JSON.stringify) and correctly handling DAGs via try/finally cleanup. Switch the GitHub Copilot /responses fallback in openaiShim.ts from JSON.stringify to stableStringify so that path is also byte-stable for prefix caching. Regression coverage added: top-level cycle, deep nested cycle, DAG safety. * fix(api): align stableStringify with native JSON.stringify pre-processing Replicate native JSON.stringify pre-processing inside deepSort so serialization output matches native behavior beyond key ordering: - invoke toJSON(key) when present (Date, URL, user classes); pass '' at top-level, property name for nested values, index string for array elements - unbox Number/String/Boolean wrappers via valueOf() so new Boolean(false) doesn't get truthy-coerced - run cycle detection on the post-toJSON value so a toJSON returning an ancestor still throws TypeError; DAGs continue to not throw - drop properties whose toJSON returns undefined, matching native Add focused stableStringify.test.ts (21 cases) asserting equality with JSON.stringify across toJSON paths, wrapper unboxing, cycle/DAG handling, and sortKeysDeep parity.	2026-04-27 23:33:15 +08:00
vrdons	f699c1f2fc	fix routing path (#923 )	2026-04-27 20:05:17 +08:00