Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b09972f223 | ||
|
|
336ddcc50d | ||
|
|
c0b8a59a23 | ||
|
|
aab489055c | ||
|
|
7002cb302b | ||
|
|
739b8d1f40 |
@@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
".": "0.5.0"
|
".": "0.5.2"
|
||||||
}
|
}
|
||||||
|
|||||||
16
CHANGELOG.md
16
CHANGELOG.md
@@ -1,5 +1,21 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## [0.5.2](https://github.com/Gitlawb/openclaude/compare/v0.5.1...v0.5.2) (2026-04-20)
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* **api:** replace phrase-based reasoning sanitizer with tag-based filter ([#779](https://github.com/Gitlawb/openclaude/issues/779)) ([336ddcc](https://github.com/Gitlawb/openclaude/commit/336ddcc50d59d79ebff50993f2673652aecb0d7d))
|
||||||
|
|
||||||
|
## [0.5.1](https://github.com/Gitlawb/openclaude/compare/v0.5.0...v0.5.1) (2026-04-20)
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* enforce Bash path constraints after sandbox allow ([#777](https://github.com/Gitlawb/openclaude/issues/777)) ([7002cb3](https://github.com/Gitlawb/openclaude/commit/7002cb302b78ea2a19da3f26226de24e2903fa1d))
|
||||||
|
* enforce MCP OAuth callback state before errors ([#775](https://github.com/Gitlawb/openclaude/issues/775)) ([739b8d1](https://github.com/Gitlawb/openclaude/commit/739b8d1f40fde0e401a5cbd2b9a55d88bd5124ad))
|
||||||
|
* require trusted approval for sandbox override ([#778](https://github.com/Gitlawb/openclaude/issues/778)) ([aab4890](https://github.com/Gitlawb/openclaude/commit/aab489055c53dd64369414116fe93226d2656273))
|
||||||
|
|
||||||
## [0.5.0](https://github.com/Gitlawb/openclaude/compare/v0.4.0...v0.5.0) (2026-04-20)
|
## [0.5.0](https://github.com/Gitlawb/openclaude/compare/v0.4.0...v0.5.0) (2026-04-20)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@gitlawb/openclaude",
|
"name": "@gitlawb/openclaude",
|
||||||
"version": "0.5.0",
|
"version": "0.5.2",
|
||||||
"description": "Claude Code opened to any LLM — OpenAI, Gemini, DeepSeek, Ollama, and 200+ models",
|
"description": "Claude Code opened to any LLM — OpenAI, Gemini, DeepSeek, Ollama, and 200+ models",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"bin": {
|
"bin": {
|
||||||
|
|||||||
@@ -114,8 +114,8 @@ export const SandboxSettingsSchema = lazySchema(() =>
|
|||||||
.boolean()
|
.boolean()
|
||||||
.optional()
|
.optional()
|
||||||
.describe(
|
.describe(
|
||||||
'Allow commands to run outside the sandbox via the dangerouslyDisableSandbox parameter. ' +
|
'Allow trusted, user-initiated commands to run outside the sandbox. ' +
|
||||||
'When false, the dangerouslyDisableSandbox parameter is completely ignored and all commands must run sandboxed. ' +
|
'When false, sandbox override requests are ignored and all commands must run sandboxed. ' +
|
||||||
'Default: true.',
|
'Default: true.',
|
||||||
),
|
),
|
||||||
network: SandboxNetworkConfigSchema(),
|
network: SandboxNetworkConfigSchema(),
|
||||||
|
|||||||
@@ -547,7 +547,7 @@ describe('Codex request translation', () => {
|
|||||||
])
|
])
|
||||||
})
|
})
|
||||||
|
|
||||||
test('strips leaked reasoning preamble from completed Codex text responses', () => {
|
test('strips <think> tag block from completed Codex text responses', () => {
|
||||||
const message = convertCodexResponseToAnthropicMessage(
|
const message = convertCodexResponseToAnthropicMessage(
|
||||||
{
|
{
|
||||||
id: 'resp_1',
|
id: 'resp_1',
|
||||||
@@ -560,7 +560,7 @@ describe('Codex request translation', () => {
|
|||||||
{
|
{
|
||||||
type: 'output_text',
|
type: 'output_text',
|
||||||
text:
|
text:
|
||||||
'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?',
|
'<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
@@ -578,6 +578,37 @@ describe('Codex request translation', () => {
|
|||||||
])
|
])
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test('strips unterminated <think> tag at block boundary in Codex completed response', () => {
|
||||||
|
const message = convertCodexResponseToAnthropicMessage(
|
||||||
|
{
|
||||||
|
id: 'resp_1',
|
||||||
|
model: 'gpt-5.4',
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
type: 'message',
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'output_text',
|
||||||
|
text:
|
||||||
|
'Here is the answer.\n<think>wait, let me reconsider the user request',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
usage: { input_tokens: 12, output_tokens: 4 },
|
||||||
|
},
|
||||||
|
'gpt-5.4',
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(message.content).toEqual([
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: 'Here is the answer.',
|
||||||
|
},
|
||||||
|
])
|
||||||
|
})
|
||||||
|
|
||||||
test('translates Codex SSE text stream into Anthropic events', async () => {
|
test('translates Codex SSE text stream into Anthropic events', async () => {
|
||||||
const responseText = [
|
const responseText = [
|
||||||
'event: response.output_item.added',
|
'event: response.output_item.added',
|
||||||
@@ -609,7 +640,7 @@ describe('Codex request translation', () => {
|
|||||||
])
|
])
|
||||||
})
|
})
|
||||||
|
|
||||||
test('strips leaked reasoning preamble from Codex SSE text stream', async () => {
|
test('strips <think> tag block from Codex SSE text stream', async () => {
|
||||||
const responseText = [
|
const responseText = [
|
||||||
'event: response.output_item.added',
|
'event: response.output_item.added',
|
||||||
'data: {"type":"response.output_item.added","item":{"id":"msg_1","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":0}',
|
'data: {"type":"response.output_item.added","item":{"id":"msg_1","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":0}',
|
||||||
@@ -618,13 +649,13 @@ describe('Codex request translation', () => {
|
|||||||
'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_1","output_index":0,"part":{"type":"output_text","text":""},"sequence_number":1}',
|
'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_1","output_index":0,"part":{"type":"output_text","text":""},"sequence_number":1}',
|
||||||
'',
|
'',
|
||||||
'event: response.output_text.delta',
|
'event: response.output_text.delta',
|
||||||
'data: {"type":"response.output_text.delta","content_index":0,"delta":"The user just said \\"hey\\" - a simple greeting. I should respond briefly and friendly.\\n\\nHey! How can I help you today?","item_id":"msg_1","output_index":0,"sequence_number":2}',
|
'data: {"type":"response.output_text.delta","content_index":0,"delta":"<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?","item_id":"msg_1","output_index":0,"sequence_number":2}',
|
||||||
'',
|
'',
|
||||||
'event: response.output_item.done',
|
'event: response.output_item.done',
|
||||||
'data: {"type":"response.output_item.done","item":{"id":"msg_1","type":"message","status":"completed","content":[{"type":"output_text","text":"The user just said \\"hey\\" - a simple greeting. I should respond briefly and friendly.\\n\\nHey! How can I help you today?"}],"role":"assistant"},"output_index":0,"sequence_number":3}',
|
'data: {"type":"response.output_item.done","item":{"id":"msg_1","type":"message","status":"completed","content":[{"type":"output_text","text":"<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?"}],"role":"assistant"},"output_index":0,"sequence_number":3}',
|
||||||
'',
|
'',
|
||||||
'event: response.completed',
|
'event: response.completed',
|
||||||
'data: {"type":"response.completed","response":{"id":"resp_1","status":"completed","model":"gpt-5.4","output":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"The user just said \\"hey\\" - a simple greeting. I should respond briefly and friendly.\\n\\nHey! How can I help you today?"}]}],"usage":{"input_tokens":2,"output_tokens":1}},"sequence_number":4}',
|
'data: {"type":"response.completed","response":{"id":"resp_1","status":"completed","model":"gpt-5.4","output":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?"}]}],"usage":{"input_tokens":2,"output_tokens":1}},"sequence_number":4}',
|
||||||
'',
|
'',
|
||||||
].join('\n')
|
].join('\n')
|
||||||
|
|
||||||
@@ -646,6 +677,50 @@ describe('Codex request translation', () => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(textDeltas).toEqual(['Hey! How can I help you today?'])
|
expect(textDeltas.join('')).toBe('Hey! How can I help you today?')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('preserves prose without tags (no phrase-based false positive)', async () => {
|
||||||
|
// Regression test: older phrase-based sanitizer would incorrectly strip text
|
||||||
|
// starting with "I should" or "The user". The tag-based approach leaves it alone.
|
||||||
|
const responseText = [
|
||||||
|
'event: response.output_item.added',
|
||||||
|
'data: {"type":"response.output_item.added","item":{"id":"msg_1","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":0}',
|
||||||
|
'',
|
||||||
|
'event: response.content_part.added',
|
||||||
|
'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_1","output_index":0,"part":{"type":"output_text","text":""},"sequence_number":1}',
|
||||||
|
'',
|
||||||
|
'event: response.output_text.delta',
|
||||||
|
'data: {"type":"response.output_text.delta","content_index":0,"delta":"I should note that the user role requires a briefly concise friendly response format.","item_id":"msg_1","output_index":0,"sequence_number":2}',
|
||||||
|
'',
|
||||||
|
'event: response.output_item.done',
|
||||||
|
'data: {"type":"response.output_item.done","item":{"id":"msg_1","type":"message","status":"completed","content":[{"type":"output_text","text":"I should note that the user role requires a briefly concise friendly response format."}],"role":"assistant"},"output_index":0,"sequence_number":3}',
|
||||||
|
'',
|
||||||
|
'event: response.completed',
|
||||||
|
'data: {"type":"response.completed","response":{"id":"resp_1","status":"completed","model":"gpt-5.4","output":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I should note that the user role requires a briefly concise friendly response format."}]}],"usage":{"input_tokens":2,"output_tokens":1}},"sequence_number":4}',
|
||||||
|
'',
|
||||||
|
].join('\n')
|
||||||
|
|
||||||
|
const stream = new ReadableStream({
|
||||||
|
start(controller) {
|
||||||
|
controller.enqueue(new TextEncoder().encode(responseText))
|
||||||
|
controller.close()
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
const textDeltas: string[] = []
|
||||||
|
for await (const event of codexStreamToAnthropic(
|
||||||
|
new Response(stream),
|
||||||
|
'gpt-5.4',
|
||||||
|
)) {
|
||||||
|
const delta = (event as { delta?: { type?: string; text?: string } }).delta
|
||||||
|
if (delta?.type === 'text_delta' && typeof delta.text === 'string') {
|
||||||
|
textDeltas.push(delta.text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(textDeltas.join('')).toBe(
|
||||||
|
'I should note that the user role requires a briefly concise friendly response format.',
|
||||||
|
)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -6,10 +6,9 @@ import type {
|
|||||||
} from './providerConfig.js'
|
} from './providerConfig.js'
|
||||||
import { sanitizeSchemaForOpenAICompat } from './openaiSchemaSanitizer.js'
|
import { sanitizeSchemaForOpenAICompat } from './openaiSchemaSanitizer.js'
|
||||||
import {
|
import {
|
||||||
looksLikeLeakedReasoningPrefix,
|
createThinkTagFilter,
|
||||||
shouldBufferPotentialReasoningPrefix,
|
stripThinkTags,
|
||||||
stripLeakedReasoningPreamble,
|
} from './thinkTagSanitizer.js'
|
||||||
} from './reasoningLeakSanitizer.js'
|
|
||||||
|
|
||||||
export interface AnthropicUsage {
|
export interface AnthropicUsage {
|
||||||
input_tokens: number
|
input_tokens: number
|
||||||
@@ -734,25 +733,22 @@ export async function* codexStreamToAnthropic(
|
|||||||
{ index: number; toolUseId: string }
|
{ index: number; toolUseId: string }
|
||||||
>()
|
>()
|
||||||
let activeTextBlockIndex: number | null = null
|
let activeTextBlockIndex: number | null = null
|
||||||
let activeTextBuffer = ''
|
const thinkFilter = createThinkTagFilter()
|
||||||
let textBufferMode: 'none' | 'pending' | 'strip' = 'none'
|
|
||||||
let nextContentBlockIndex = 0
|
let nextContentBlockIndex = 0
|
||||||
let sawToolUse = false
|
let sawToolUse = false
|
||||||
let finalResponse: Record<string, any> | undefined
|
let finalResponse: Record<string, any> | undefined
|
||||||
|
|
||||||
const closeActiveTextBlock = async function* () {
|
const closeActiveTextBlock = async function* () {
|
||||||
if (activeTextBlockIndex === null) return
|
if (activeTextBlockIndex === null) return
|
||||||
if (textBufferMode !== 'none') {
|
const tail = thinkFilter.flush()
|
||||||
const sanitized = stripLeakedReasoningPreamble(activeTextBuffer)
|
if (tail) {
|
||||||
if (sanitized) {
|
yield {
|
||||||
yield {
|
type: 'content_block_delta',
|
||||||
type: 'content_block_delta',
|
index: activeTextBlockIndex,
|
||||||
index: activeTextBlockIndex,
|
delta: {
|
||||||
delta: {
|
type: 'text_delta',
|
||||||
type: 'text_delta',
|
text: tail,
|
||||||
text: sanitized,
|
},
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
yield {
|
yield {
|
||||||
@@ -760,8 +756,6 @@ export async function* codexStreamToAnthropic(
|
|||||||
index: activeTextBlockIndex,
|
index: activeTextBlockIndex,
|
||||||
}
|
}
|
||||||
activeTextBlockIndex = null
|
activeTextBlockIndex = null
|
||||||
activeTextBuffer = ''
|
|
||||||
textBufferMode = 'none'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const startTextBlockIfNeeded = async function* () {
|
const startTextBlockIfNeeded = async function* () {
|
||||||
@@ -837,43 +831,17 @@ export async function* codexStreamToAnthropic(
|
|||||||
|
|
||||||
if (event.event === 'response.output_text.delta') {
|
if (event.event === 'response.output_text.delta') {
|
||||||
yield* startTextBlockIfNeeded()
|
yield* startTextBlockIfNeeded()
|
||||||
activeTextBuffer += payload.delta ?? ''
|
|
||||||
if (activeTextBlockIndex !== null) {
|
if (activeTextBlockIndex !== null) {
|
||||||
if (
|
const visible = thinkFilter.feed(payload.delta ?? '')
|
||||||
textBufferMode === 'strip' ||
|
if (visible) {
|
||||||
looksLikeLeakedReasoningPrefix(activeTextBuffer)
|
|
||||||
) {
|
|
||||||
textBufferMode = 'strip'
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if (textBufferMode === 'pending') {
|
|
||||||
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
yield {
|
yield {
|
||||||
type: 'content_block_delta',
|
type: 'content_block_delta',
|
||||||
index: activeTextBlockIndex,
|
index: activeTextBlockIndex,
|
||||||
delta: {
|
delta: {
|
||||||
type: 'text_delta',
|
type: 'text_delta',
|
||||||
text: activeTextBuffer,
|
text: visible,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
textBufferMode = 'none'
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
|
|
||||||
textBufferMode = 'pending'
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
yield {
|
|
||||||
type: 'content_block_delta',
|
|
||||||
index: activeTextBlockIndex,
|
|
||||||
delta: {
|
|
||||||
type: 'text_delta',
|
|
||||||
text: payload.delta ?? '',
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
@@ -969,7 +937,7 @@ export function convertCodexResponseToAnthropicMessage(
|
|||||||
if (part?.type === 'output_text') {
|
if (part?.type === 'output_text') {
|
||||||
content.push({
|
content.push({
|
||||||
type: 'text',
|
type: 'text',
|
||||||
text: stripLeakedReasoningPreamble(part.text ?? ''),
|
text: stripThinkTags(part.text ?? ''),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2513,7 +2513,7 @@ test('non-streaming: real content takes precedence over reasoning_content', asyn
|
|||||||
])
|
])
|
||||||
})
|
})
|
||||||
|
|
||||||
test('non-streaming: strips leaked reasoning preamble from assistant content', async () => {
|
test('non-streaming: strips <think> tag block from assistant content', async () => {
|
||||||
globalThis.fetch = (async () => {
|
globalThis.fetch = (async () => {
|
||||||
return new Response(
|
return new Response(
|
||||||
JSON.stringify({
|
JSON.stringify({
|
||||||
@@ -2524,7 +2524,7 @@ test('non-streaming: strips leaked reasoning preamble from assistant content', a
|
|||||||
message: {
|
message: {
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content:
|
content:
|
||||||
'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?',
|
'<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?',
|
||||||
},
|
},
|
||||||
finish_reason: 'stop',
|
finish_reason: 'stop',
|
||||||
},
|
},
|
||||||
@@ -2645,7 +2645,7 @@ test('streaming: thinking block closed before tool call', async () => {
|
|||||||
expect(thinkingStart?.content_block?.type).toBe('thinking')
|
expect(thinkingStart?.content_block?.type).toBe('thinking')
|
||||||
})
|
})
|
||||||
|
|
||||||
test('streaming: strips leaked reasoning preamble from assistant content deltas', async () => {
|
test('streaming: strips <think> tag block from assistant content deltas', async () => {
|
||||||
globalThis.fetch = (async () => {
|
globalThis.fetch = (async () => {
|
||||||
const chunks = makeStreamChunks([
|
const chunks = makeStreamChunks([
|
||||||
{
|
{
|
||||||
@@ -2658,7 +2658,7 @@ test('streaming: strips leaked reasoning preamble from assistant content deltas'
|
|||||||
delta: {
|
delta: {
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content:
|
content:
|
||||||
'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?',
|
'<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?',
|
||||||
},
|
},
|
||||||
finish_reason: null,
|
finish_reason: null,
|
||||||
},
|
},
|
||||||
@@ -2700,10 +2700,10 @@ test('streaming: strips leaked reasoning preamble from assistant content deltas'
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(textDeltas).toEqual(['Hey! How can I help you today?'])
|
expect(textDeltas.join('')).toBe('Hey! How can I help you today?')
|
||||||
})
|
})
|
||||||
|
|
||||||
test('streaming: strips leaked reasoning preamble when split across multiple content chunks', async () => {
|
test('streaming: strips <think> tag split across multiple content chunks', async () => {
|
||||||
globalThis.fetch = (async () => {
|
globalThis.fetch = (async () => {
|
||||||
const chunks = makeStreamChunks([
|
const chunks = makeStreamChunks([
|
||||||
{
|
{
|
||||||
@@ -2715,7 +2715,7 @@ test('streaming: strips leaked reasoning preamble when split across multiple con
|
|||||||
index: 0,
|
index: 0,
|
||||||
delta: {
|
delta: {
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: 'The user said "hey" - this is a simple greeting. ',
|
content: '<think>user wants a greeting,',
|
||||||
},
|
},
|
||||||
finish_reason: null,
|
finish_reason: null,
|
||||||
},
|
},
|
||||||
@@ -2729,8 +2729,21 @@ test('streaming: strips leaked reasoning preamble when split across multiple con
|
|||||||
{
|
{
|
||||||
index: 0,
|
index: 0,
|
||||||
delta: {
|
delta: {
|
||||||
content:
|
content: ' respond briefly</th',
|
||||||
'I should respond in a friendly, concise way.\n\nHey! How can I help you today?',
|
},
|
||||||
|
finish_reason: null,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'chatcmpl-1',
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
model: 'gpt-5-mini',
|
||||||
|
choices: [
|
||||||
|
{
|
||||||
|
index: 0,
|
||||||
|
delta: {
|
||||||
|
content: 'ink>Hey! How can I help you today?',
|
||||||
},
|
},
|
||||||
finish_reason: null,
|
finish_reason: null,
|
||||||
},
|
},
|
||||||
@@ -2773,7 +2786,69 @@ test('streaming: strips leaked reasoning preamble when split across multiple con
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(textDeltas).toEqual(['Hey! How can I help you today?'])
|
expect(textDeltas.join('')).toBe('Hey! How can I help you today?')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('streaming: preserves prose without tags (no phrase-based false positive)', async () => {
|
||||||
|
// Regression: older phrase-based sanitizer would strip "I should..." prose.
|
||||||
|
// The tag-based approach leaves legitimate assistant output alone.
|
||||||
|
globalThis.fetch = (async () => {
|
||||||
|
const chunks = makeStreamChunks([
|
||||||
|
{
|
||||||
|
id: 'chatcmpl-1',
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
model: 'gpt-5-mini',
|
||||||
|
choices: [
|
||||||
|
{
|
||||||
|
index: 0,
|
||||||
|
delta: {
|
||||||
|
role: 'assistant',
|
||||||
|
content:
|
||||||
|
'I should note that the user role requires a briefly concise friendly response format.',
|
||||||
|
},
|
||||||
|
finish_reason: null,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'chatcmpl-1',
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
model: 'gpt-5-mini',
|
||||||
|
choices: [
|
||||||
|
{
|
||||||
|
index: 0,
|
||||||
|
delta: {},
|
||||||
|
finish_reason: 'stop',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
])
|
||||||
|
|
||||||
|
return makeSseResponse(chunks)
|
||||||
|
}) as FetchType
|
||||||
|
|
||||||
|
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||||
|
const result = await client.beta.messages
|
||||||
|
.create({
|
||||||
|
model: 'gpt-5-mini',
|
||||||
|
system: 'test system',
|
||||||
|
messages: [{ role: 'user', content: 'hey' }],
|
||||||
|
max_tokens: 64,
|
||||||
|
stream: true,
|
||||||
|
})
|
||||||
|
.withResponse()
|
||||||
|
|
||||||
|
const textDeltas: string[] = []
|
||||||
|
for await (const event of result.data) {
|
||||||
|
const delta = (event as { delta?: { type?: string; text?: string } }).delta
|
||||||
|
if (delta?.type === 'text_delta' && typeof delta.text === 'string') {
|
||||||
|
textDeltas.push(delta.text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(textDeltas.join('')).toBe(
|
||||||
|
'I should note that the user role requires a briefly concise friendly response format.',
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
test('classifies localhost transport failures with actionable category marker', async () => {
|
test('classifies localhost transport failures with actionable category marker', async () => {
|
||||||
|
|||||||
@@ -32,10 +32,9 @@ import { resolveGeminiCredential } from '../../utils/geminiAuth.js'
|
|||||||
import { hydrateGeminiAccessTokenFromSecureStorage } from '../../utils/geminiCredentials.js'
|
import { hydrateGeminiAccessTokenFromSecureStorage } from '../../utils/geminiCredentials.js'
|
||||||
import { hydrateGithubModelsTokenFromSecureStorage } from '../../utils/githubModelsCredentials.js'
|
import { hydrateGithubModelsTokenFromSecureStorage } from '../../utils/githubModelsCredentials.js'
|
||||||
import {
|
import {
|
||||||
looksLikeLeakedReasoningPrefix,
|
createThinkTagFilter,
|
||||||
shouldBufferPotentialReasoningPrefix,
|
stripThinkTags,
|
||||||
stripLeakedReasoningPreamble,
|
} from './thinkTagSanitizer.js'
|
||||||
} from './reasoningLeakSanitizer.js'
|
|
||||||
import {
|
import {
|
||||||
codexStreamToAnthropic,
|
codexStreamToAnthropic,
|
||||||
collectCodexCompletedResponse,
|
collectCodexCompletedResponse,
|
||||||
@@ -718,8 +717,7 @@ async function* openaiStreamToAnthropic(
|
|||||||
let hasEmittedContentStart = false
|
let hasEmittedContentStart = false
|
||||||
let hasEmittedThinkingStart = false
|
let hasEmittedThinkingStart = false
|
||||||
let hasClosedThinking = false
|
let hasClosedThinking = false
|
||||||
let activeTextBuffer = ''
|
const thinkFilter = createThinkTagFilter()
|
||||||
let textBufferMode: 'none' | 'pending' | 'strip' = 'none'
|
|
||||||
let lastStopReason: 'tool_use' | 'max_tokens' | 'end_turn' | null = null
|
let lastStopReason: 'tool_use' | 'max_tokens' | 'end_turn' | null = null
|
||||||
let hasEmittedFinalUsage = false
|
let hasEmittedFinalUsage = false
|
||||||
let hasProcessedFinishReason = false
|
let hasProcessedFinishReason = false
|
||||||
@@ -798,14 +796,12 @@ async function* openaiStreamToAnthropic(
|
|||||||
const closeActiveContentBlock = async function* () {
|
const closeActiveContentBlock = async function* () {
|
||||||
if (!hasEmittedContentStart) return
|
if (!hasEmittedContentStart) return
|
||||||
|
|
||||||
if (textBufferMode !== 'none') {
|
const tail = thinkFilter.flush()
|
||||||
const sanitized = stripLeakedReasoningPreamble(activeTextBuffer)
|
if (tail) {
|
||||||
if (sanitized) {
|
yield {
|
||||||
yield {
|
type: 'content_block_delta',
|
||||||
type: 'content_block_delta',
|
index: contentBlockIndex,
|
||||||
index: contentBlockIndex,
|
delta: { type: 'text_delta', text: tail },
|
||||||
delta: { type: 'text_delta', text: sanitized },
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -815,8 +811,6 @@ async function* openaiStreamToAnthropic(
|
|||||||
}
|
}
|
||||||
contentBlockIndex++
|
contentBlockIndex++
|
||||||
hasEmittedContentStart = false
|
hasEmittedContentStart = false
|
||||||
activeTextBuffer = ''
|
|
||||||
textBufferMode = 'none'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -873,7 +867,6 @@ async function* openaiStreamToAnthropic(
|
|||||||
contentBlockIndex++
|
contentBlockIndex++
|
||||||
hasClosedThinking = true
|
hasClosedThinking = true
|
||||||
}
|
}
|
||||||
activeTextBuffer += delta.content
|
|
||||||
if (!hasEmittedContentStart) {
|
if (!hasEmittedContentStart) {
|
||||||
yield {
|
yield {
|
||||||
type: 'content_block_start',
|
type: 'content_block_start',
|
||||||
@@ -883,38 +876,13 @@ async function* openaiStreamToAnthropic(
|
|||||||
hasEmittedContentStart = true
|
hasEmittedContentStart = true
|
||||||
}
|
}
|
||||||
|
|
||||||
if (
|
const visible = thinkFilter.feed(delta.content)
|
||||||
textBufferMode === 'strip' ||
|
if (visible) {
|
||||||
looksLikeLeakedReasoningPrefix(activeTextBuffer)
|
|
||||||
) {
|
|
||||||
textBufferMode = 'strip'
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if (textBufferMode === 'pending') {
|
|
||||||
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
yield {
|
yield {
|
||||||
type: 'content_block_delta',
|
type: 'content_block_delta',
|
||||||
index: contentBlockIndex,
|
index: contentBlockIndex,
|
||||||
delta: {
|
delta: { type: 'text_delta', text: visible },
|
||||||
type: 'text_delta',
|
|
||||||
text: activeTextBuffer,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
textBufferMode = 'none'
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
|
|
||||||
textBufferMode = 'pending'
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
yield {
|
|
||||||
type: 'content_block_delta',
|
|
||||||
index: contentBlockIndex,
|
|
||||||
delta: { type: 'text_delta', text: delta.content },
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1742,7 +1710,7 @@ class OpenAIShimMessages {
|
|||||||
if (typeof rawContent === 'string' && rawContent) {
|
if (typeof rawContent === 'string' && rawContent) {
|
||||||
content.push({
|
content.push({
|
||||||
type: 'text',
|
type: 'text',
|
||||||
text: stripLeakedReasoningPreamble(rawContent),
|
text: stripThinkTags(rawContent),
|
||||||
})
|
})
|
||||||
} else if (Array.isArray(rawContent) && rawContent.length > 0) {
|
} else if (Array.isArray(rawContent) && rawContent.length > 0) {
|
||||||
const parts: string[] = []
|
const parts: string[] = []
|
||||||
@@ -1760,7 +1728,7 @@ class OpenAIShimMessages {
|
|||||||
if (joined) {
|
if (joined) {
|
||||||
content.push({
|
content.push({
|
||||||
type: 'text',
|
type: 'text',
|
||||||
text: stripLeakedReasoningPreamble(joined),
|
text: stripThinkTags(joined),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,46 +0,0 @@
|
|||||||
import { describe, expect, test } from 'bun:test'
|
|
||||||
|
|
||||||
import {
|
|
||||||
looksLikeLeakedReasoningPrefix,
|
|
||||||
shouldBufferPotentialReasoningPrefix,
|
|
||||||
stripLeakedReasoningPreamble,
|
|
||||||
} from './reasoningLeakSanitizer.ts'
|
|
||||||
|
|
||||||
describe('reasoning leak sanitizer', () => {
|
|
||||||
test('strips explicit internal reasoning preambles', () => {
|
|
||||||
const text =
|
|
||||||
'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?'
|
|
||||||
|
|
||||||
expect(looksLikeLeakedReasoningPrefix(text)).toBe(true)
|
|
||||||
expect(stripLeakedReasoningPreamble(text)).toBe(
|
|
||||||
'Hey! How can I help you today?',
|
|
||||||
)
|
|
||||||
})
|
|
||||||
|
|
||||||
test('does not strip normal user-facing advice that mentions "the user should"', () => {
|
|
||||||
const text =
|
|
||||||
'The user should reset their password immediately.\n\nHere are the steps...'
|
|
||||||
|
|
||||||
expect(looksLikeLeakedReasoningPrefix(text)).toBe(false)
|
|
||||||
expect(shouldBufferPotentialReasoningPrefix(text)).toBe(false)
|
|
||||||
expect(stripLeakedReasoningPreamble(text)).toBe(text)
|
|
||||||
})
|
|
||||||
|
|
||||||
test('does not strip legitimate first-person advice about responding to an incident', () => {
|
|
||||||
const text =
|
|
||||||
'I need to respond to this security incident immediately. The system is compromised.\n\nHere are the remediation steps...'
|
|
||||||
|
|
||||||
expect(looksLikeLeakedReasoningPrefix(text)).toBe(false)
|
|
||||||
expect(shouldBufferPotentialReasoningPrefix(text)).toBe(false)
|
|
||||||
expect(stripLeakedReasoningPreamble(text)).toBe(text)
|
|
||||||
})
|
|
||||||
|
|
||||||
test('does not strip legitimate first-person advice about answering a support ticket', () => {
|
|
||||||
const text =
|
|
||||||
'I need to answer the support ticket before end of day. The customer is waiting.\n\nHere is the response I drafted...'
|
|
||||||
|
|
||||||
expect(looksLikeLeakedReasoningPrefix(text)).toBe(false)
|
|
||||||
expect(shouldBufferPotentialReasoningPrefix(text)).toBe(false)
|
|
||||||
expect(stripLeakedReasoningPreamble(text)).toBe(text)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@@ -1,54 +0,0 @@
|
|||||||
const EXPLICIT_REASONING_START_RE =
|
|
||||||
/^\s*(i should\b|i need to\b|let me think\b|the task\b|the request\b)/i
|
|
||||||
|
|
||||||
const EXPLICIT_REASONING_META_RE =
|
|
||||||
/\b(user|request|question|prompt|message|task|greeting|small talk|briefly|friendly|concise)\b/i
|
|
||||||
|
|
||||||
const USER_META_START_RE =
|
|
||||||
/^\s*the user\s+(just\s+)?(said|asked|is asking|wants|wanted|mentioned|seems|appears)\b/i
|
|
||||||
|
|
||||||
const USER_REASONING_RE =
|
|
||||||
/^\s*the user\s+(just\s+)?(said|asked|is asking|wants|wanted|mentioned|seems|appears)\b[\s\S]*\b(i should|i need to|let me think|respond|reply|answer|greeting|small talk|briefly|friendly|concise)\b/i
|
|
||||||
|
|
||||||
export function shouldBufferPotentialReasoningPrefix(text: string): boolean {
|
|
||||||
const normalized = text.trim()
|
|
||||||
if (!normalized) return false
|
|
||||||
|
|
||||||
if (looksLikeLeakedReasoningPrefix(normalized)) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
const hasParagraphBoundary = /\n\s*\n/.test(normalized)
|
|
||||||
if (hasParagraphBoundary) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return (
|
|
||||||
EXPLICIT_REASONING_START_RE.test(normalized) ||
|
|
||||||
USER_META_START_RE.test(normalized)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
export function looksLikeLeakedReasoningPrefix(text: string): boolean {
|
|
||||||
const normalized = text.trim()
|
|
||||||
if (!normalized) return false
|
|
||||||
return (
|
|
||||||
(EXPLICIT_REASONING_START_RE.test(normalized) &&
|
|
||||||
EXPLICIT_REASONING_META_RE.test(normalized)) ||
|
|
||||||
USER_REASONING_RE.test(normalized)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
export function stripLeakedReasoningPreamble(text: string): string {
|
|
||||||
const normalized = text.replace(/\r\n/g, '\n')
|
|
||||||
const parts = normalized.split(/\n\s*\n/)
|
|
||||||
if (parts.length < 2) return text
|
|
||||||
|
|
||||||
const first = parts[0]?.trim() ?? ''
|
|
||||||
if (!looksLikeLeakedReasoningPrefix(first)) {
|
|
||||||
return text
|
|
||||||
}
|
|
||||||
|
|
||||||
const remainder = parts.slice(1).join('\n\n').trim()
|
|
||||||
return remainder || text
|
|
||||||
}
|
|
||||||
183
src/services/api/thinkTagSanitizer.test.ts
Normal file
183
src/services/api/thinkTagSanitizer.test.ts
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
import { describe, expect, test } from 'bun:test'
|
||||||
|
|
||||||
|
import {
|
||||||
|
createThinkTagFilter,
|
||||||
|
stripThinkTags,
|
||||||
|
} from './thinkTagSanitizer.ts'
|
||||||
|
|
||||||
|
describe('stripThinkTags — whole-text cleanup', () => {
|
||||||
|
test('strips closed think pair', () => {
|
||||||
|
expect(stripThinkTags('<think>reasoning</think>Hello')).toBe('Hello')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('strips closed thinking pair', () => {
|
||||||
|
expect(stripThinkTags('<thinking>x</thinking>Out')).toBe('Out')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('strips closed reasoning pair', () => {
|
||||||
|
expect(stripThinkTags('<reasoning>x</reasoning>Out')).toBe('Out')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('strips REASONING_SCRATCHPAD pair', () => {
|
||||||
|
expect(stripThinkTags('<REASONING_SCRATCHPAD>plan</REASONING_SCRATCHPAD>Answer'))
|
||||||
|
.toBe('Answer')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('is case-insensitive', () => {
|
||||||
|
expect(stripThinkTags('<THINKING>x</THINKING>out')).toBe('out')
|
||||||
|
expect(stripThinkTags('<Think>x</Think>out')).toBe('out')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles attributes on open tag', () => {
|
||||||
|
expect(stripThinkTags('<think id="plan-1">reason</think>ok')).toBe('ok')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('strips unterminated open tag at block boundary', () => {
|
||||||
|
expect(stripThinkTags('<think>reasoning that never closes')).toBe('')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('strips unterminated open tag after newline', () => {
|
||||||
|
// Block-boundary match consumes the leading newline, same as hermes.
|
||||||
|
expect(stripThinkTags('Answer: 42\n<think>second-guess myself'))
|
||||||
|
.toBe('Answer: 42')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('strips orphan close tag', () => {
|
||||||
|
expect(stripThinkTags('trailing </think>done')).toBe('trailing done')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('strips multiple blocks', () => {
|
||||||
|
expect(stripThinkTags('<think>a</think>B<think>c</think>D')).toBe('BD')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles reasoning mid-response after content', () => {
|
||||||
|
expect(stripThinkTags('Answer: 42\n<think>double-check</think>\nDone'))
|
||||||
|
.toBe('Answer: 42\n\nDone')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles nested-looking tags (lazy match + orphan cleanup)', () => {
|
||||||
|
expect(stripThinkTags('<think><think>x</think></think>y')).toBe('y')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('preserves legitimate non-think tags', () => {
|
||||||
|
expect(stripThinkTags('use <div> and <span>')).toBe('use <div> and <span>')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('preserves text without any tags', () => {
|
||||||
|
expect(stripThinkTags('Hello, world. I should respond briefly.')).toBe(
|
||||||
|
'Hello, world. I should respond briefly.',
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles empty input', () => {
|
||||||
|
expect(stripThinkTags('')).toBe('')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('createThinkTagFilter — streaming state machine', () => {
|
||||||
|
test('passes through plain text', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
expect(f.feed('Hello, ')).toBe('Hello, ')
|
||||||
|
expect(f.feed('world!')).toBe('world!')
|
||||||
|
expect(f.flush()).toBe('')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('strips a complete think block in one chunk', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
expect(f.feed('pre<think>reason</think>post')).toBe('prepost')
|
||||||
|
expect(f.flush()).toBe('')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles open tag split across deltas', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
expect(f.feed('before<th')).toBe('before')
|
||||||
|
expect(f.feed('ink>reason</think>after')).toBe('after')
|
||||||
|
expect(f.flush()).toBe('')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles close tag split across deltas', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
expect(f.feed('<think>reason</th')).toBe('')
|
||||||
|
expect(f.feed('ink>keep')).toBe('keep')
|
||||||
|
expect(f.flush()).toBe('')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles tag split on bare < boundary', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
expect(f.feed('leading <')).toBe('leading ')
|
||||||
|
expect(f.feed('think>inner</think>tail')).toBe('tail')
|
||||||
|
expect(f.flush()).toBe('')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('preserves partial non-tag < at boundary when next char rules it out', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
// "<d" — 'd' cannot start any of our tag names, so emit immediately
|
||||||
|
expect(f.feed('pre<d')).toBe('pre<d')
|
||||||
|
expect(f.feed('iv>rest')).toBe('iv>rest')
|
||||||
|
expect(f.flush()).toBe('')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('case-insensitive streaming', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
expect(f.feed('<THINKING>x</THINKING>out')).toBe('out')
|
||||||
|
expect(f.flush()).toBe('')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('unterminated open tag — flush drops remainder', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
expect(f.feed('<think>reasoning with no close ')).toBe('')
|
||||||
|
expect(f.feed('and more reasoning')).toBe('')
|
||||||
|
expect(f.flush()).toBe('')
|
||||||
|
expect(f.isInsideBlock()).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('multiple blocks in single feed', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
expect(f.feed('<think>a</think>B<think>c</think>D')).toBe('BD')
|
||||||
|
expect(f.flush()).toBe('')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('flush after clean stream emits nothing extra', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
expect(f.feed('complete message')).toBe('complete message')
|
||||||
|
expect(f.flush()).toBe('')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('flush of bare < at end emits it (not a tag prefix)', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
// bare '<' held back; flush emits it since it has no tag-name chars
|
||||||
|
expect(f.feed('x <')).toBe('x ')
|
||||||
|
expect(f.flush()).toBe('<')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('flush of partial tag-name prefix at end drops it', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
expect(f.feed('x <thi')).toBe('x ')
|
||||||
|
expect(f.flush()).toBe('')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles attributes on streaming open tag', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
expect(f.feed('<think type="plan">reason</think>ok')).toBe('ok')
|
||||||
|
expect(f.flush()).toBe('')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('mid-delta transition: content, reasoning, content', () => {
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
expect(f.feed('Answer: 42\n<think>')).toBe('Answer: 42\n')
|
||||||
|
expect(f.feed('double-check')).toBe('')
|
||||||
|
expect(f.feed('</think>\nDone')).toBe('\nDone')
|
||||||
|
expect(f.flush()).toBe('')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('orphan close tag mid-stream is stripped on flush via safety-net behavior', () => {
|
||||||
|
// Filter alone treats orphan close as "we're not inside", so it emits as-is.
|
||||||
|
// Safety net (stripThinkTags on final text) removes orphans.
|
||||||
|
const f = createThinkTagFilter()
|
||||||
|
const chunk1 = f.feed('trailing ')
|
||||||
|
const chunk2 = f.feed('</think>done')
|
||||||
|
const final = chunk1 + chunk2 + f.flush()
|
||||||
|
// Orphan close appears in stream output; safety net cleans it
|
||||||
|
expect(stripThinkTags(final)).toBe('trailing done')
|
||||||
|
})
|
||||||
|
})
|
||||||
162
src/services/api/thinkTagSanitizer.ts
Normal file
162
src/services/api/thinkTagSanitizer.ts
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
/**
|
||||||
|
* Think-tag sanitizer for reasoning content leaks.
|
||||||
|
*
|
||||||
|
* Some OpenAI-compatible reasoning models (MiniMax M2.7, GLM-4.5/5, DeepSeek, Kimi K2,
|
||||||
|
* self-hosted vLLM builds) emit chain-of-thought inline inside the `content` field using
|
||||||
|
* XML-like tags instead of the separate `reasoning_content` channel. Example:
|
||||||
|
*
|
||||||
|
* <think>the user wants foo, let me check bar</think>Here is the answer: ...
|
||||||
|
*
|
||||||
|
* This module strips those blocks structurally (tag-based), independent of English
|
||||||
|
* phrasings. Three layers:
|
||||||
|
*
|
||||||
|
* 1. `createThinkTagFilter()` — streaming state machine. Feeds deltas, emits only
|
||||||
|
* the visible (non-reasoning) portion, and buffers partial tags across chunk
|
||||||
|
* boundaries so `</th` + `ink>` still parses correctly.
|
||||||
|
*
|
||||||
|
* 2. `stripThinkTags()` — whole-text cleanup. Removes closed pairs, unterminated
|
||||||
|
* opens at block boundaries, and orphan open/close tags. Used for non-streaming
|
||||||
|
* responses and as a safety net after stream close.
|
||||||
|
*
|
||||||
|
* 3. Flush discards buffered partial tags at stream end (false-negative bias —
|
||||||
|
* prefer losing a partial reasoning fragment over leaking it).
|
||||||
|
*/
|
||||||
|
|
||||||
|
const TAG_NAMES = [
|
||||||
|
'think',
|
||||||
|
'thinking',
|
||||||
|
'reasoning',
|
||||||
|
'thought',
|
||||||
|
'reasoning_scratchpad',
|
||||||
|
] as const
|
||||||
|
|
||||||
|
const TAG_ALT = TAG_NAMES.join('|')
|
||||||
|
|
||||||
|
const OPEN_TAG_RE = new RegExp(`<\\s*(?:${TAG_ALT})\\b[^>]*>`, 'i')
|
||||||
|
const CLOSE_TAG_RE = new RegExp(`<\\s*/\\s*(?:${TAG_ALT})\\s*>`, 'i')
|
||||||
|
|
||||||
|
const CLOSED_PAIR_RE_G = new RegExp(
|
||||||
|
`<\\s*(${TAG_ALT})\\b[^>]*>[\\s\\S]*?<\\s*/\\s*\\1\\s*>`,
|
||||||
|
'gi',
|
||||||
|
)
|
||||||
|
const UNTERMINATED_OPEN_RE = new RegExp(
|
||||||
|
`(?:^|\\n)[ \\t]*<\\s*(?:${TAG_ALT})\\b[^>]*>[\\s\\S]*$`,
|
||||||
|
'i',
|
||||||
|
)
|
||||||
|
const ORPHAN_TAG_RE_G = new RegExp(
|
||||||
|
`<\\s*/?\\s*(?:${TAG_ALT})\\b[^>]*>\\s*`,
|
||||||
|
'gi',
|
||||||
|
)
|
||||||
|
|
||||||
|
const MAX_PARTIAL_TAG = 64
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove reasoning/thinking blocks from a complete text body.
|
||||||
|
*
|
||||||
|
* Handles:
|
||||||
|
* - Closed pairs: <think>...</think> (lazy match, anywhere in text)
|
||||||
|
* - Unterminated open tags at a block boundary: strips from the tag to end of string
|
||||||
|
* - Orphan open or close tags (no matching partner)
|
||||||
|
*
|
||||||
|
* False-negative bias: prefers leaving a few tag characters in rare edge cases over
|
||||||
|
* stripping legitimate content.
|
||||||
|
*/
|
||||||
|
export function stripThinkTags(text: string): string {
|
||||||
|
if (!text) return text
|
||||||
|
let out = text
|
||||||
|
out = out.replace(CLOSED_PAIR_RE_G, '')
|
||||||
|
out = out.replace(UNTERMINATED_OPEN_RE, '')
|
||||||
|
out = out.replace(ORPHAN_TAG_RE_G, '')
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ThinkTagFilter {
|
||||||
|
feed(chunk: string): string
|
||||||
|
flush(): string
|
||||||
|
isInsideBlock(): boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Streaming state machine. Feed deltas, emits visible (non-reasoning) text.
|
||||||
|
* Handles tags split across chunk boundaries by holding back a short tail buffer
|
||||||
|
* whenever the current buffer ends with what looks like a partial tag.
|
||||||
|
*/
|
||||||
|
export function createThinkTagFilter(): ThinkTagFilter {
|
||||||
|
let inside = false
|
||||||
|
let buffer = ''
|
||||||
|
|
||||||
|
function findPartialTagStart(s: string): number {
|
||||||
|
const lastLt = s.lastIndexOf('<')
|
||||||
|
if (lastLt === -1) return -1
|
||||||
|
if (s.indexOf('>', lastLt) !== -1) return -1
|
||||||
|
const tail = s.slice(lastLt)
|
||||||
|
if (tail.length > MAX_PARTIAL_TAG) return -1
|
||||||
|
|
||||||
|
const m = /^<\s*\/?\s*([a-zA-Z_]\w*)?\s*$/.exec(tail)
|
||||||
|
if (!m) return -1
|
||||||
|
const partialName = (m[1] ?? '').toLowerCase()
|
||||||
|
if (!partialName) return lastLt
|
||||||
|
if (TAG_NAMES.some(name => name.startsWith(partialName))) return lastLt
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
function feed(chunk: string): string {
|
||||||
|
if (!chunk) return ''
|
||||||
|
buffer += chunk
|
||||||
|
let out = ''
|
||||||
|
|
||||||
|
while (buffer.length > 0) {
|
||||||
|
if (!inside) {
|
||||||
|
const open = OPEN_TAG_RE.exec(buffer)
|
||||||
|
if (open) {
|
||||||
|
out += buffer.slice(0, open.index)
|
||||||
|
buffer = buffer.slice(open.index + open[0].length)
|
||||||
|
inside = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
const partialStart = findPartialTagStart(buffer)
|
||||||
|
if (partialStart === -1) {
|
||||||
|
out += buffer
|
||||||
|
buffer = ''
|
||||||
|
} else {
|
||||||
|
out += buffer.slice(0, partialStart)
|
||||||
|
buffer = buffer.slice(partialStart)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
const close = CLOSE_TAG_RE.exec(buffer)
|
||||||
|
if (close) {
|
||||||
|
buffer = buffer.slice(close.index + close[0].length)
|
||||||
|
inside = false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
const partialStart = findPartialTagStart(buffer)
|
||||||
|
if (partialStart === -1) {
|
||||||
|
buffer = ''
|
||||||
|
} else {
|
||||||
|
buffer = buffer.slice(partialStart)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
function flush(): string {
|
||||||
|
const held = buffer
|
||||||
|
const wasInside = inside
|
||||||
|
buffer = ''
|
||||||
|
inside = false
|
||||||
|
|
||||||
|
if (wasInside) return ''
|
||||||
|
if (!held) return ''
|
||||||
|
|
||||||
|
if (/^<\s*\/?\s*[a-zA-Z_]/.test(held)) return ''
|
||||||
|
return held
|
||||||
|
}
|
||||||
|
|
||||||
|
return { feed, flush, isInsideBlock: () => inside }
|
||||||
|
}
|
||||||
61
src/services/mcp/auth.test.ts
Normal file
61
src/services/mcp/auth.test.ts
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
import assert from 'node:assert/strict'
|
||||||
|
import test from 'node:test'
|
||||||
|
|
||||||
|
import { validateOAuthCallbackParams } from './auth.js'
|
||||||
|
|
||||||
|
test('OAuth callback rejects error parameters before state validation can be bypassed', () => {
|
||||||
|
const result = validateOAuthCallbackParams(
|
||||||
|
{
|
||||||
|
error: 'access_denied',
|
||||||
|
error_description: 'denied by provider',
|
||||||
|
},
|
||||||
|
'expected-state',
|
||||||
|
)
|
||||||
|
|
||||||
|
assert.deepEqual(result, { type: 'state_mismatch' })
|
||||||
|
})
|
||||||
|
|
||||||
|
test('OAuth callback accepts provider errors only when state matches', () => {
|
||||||
|
const result = validateOAuthCallbackParams(
|
||||||
|
{
|
||||||
|
state: 'expected-state',
|
||||||
|
error: 'access_denied',
|
||||||
|
error_description: 'denied by provider',
|
||||||
|
error_uri: 'https://example.test/error',
|
||||||
|
},
|
||||||
|
'expected-state',
|
||||||
|
)
|
||||||
|
|
||||||
|
assert.deepEqual(result, {
|
||||||
|
type: 'error',
|
||||||
|
error: 'access_denied',
|
||||||
|
errorDescription: 'denied by provider',
|
||||||
|
errorUri: 'https://example.test/error',
|
||||||
|
message:
|
||||||
|
'OAuth error: access_denied - denied by provider (See: https://example.test/error)',
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
test('OAuth callback accepts authorization codes only when state matches', () => {
|
||||||
|
assert.deepEqual(
|
||||||
|
validateOAuthCallbackParams(
|
||||||
|
{
|
||||||
|
state: 'expected-state',
|
||||||
|
code: 'auth-code',
|
||||||
|
},
|
||||||
|
'expected-state',
|
||||||
|
),
|
||||||
|
{ type: 'code', code: 'auth-code' },
|
||||||
|
)
|
||||||
|
|
||||||
|
assert.deepEqual(
|
||||||
|
validateOAuthCallbackParams(
|
||||||
|
{
|
||||||
|
state: 'wrong-state',
|
||||||
|
code: 'auth-code',
|
||||||
|
},
|
||||||
|
'expected-state',
|
||||||
|
),
|
||||||
|
{ type: 'state_mismatch' },
|
||||||
|
)
|
||||||
|
})
|
||||||
@@ -124,6 +124,74 @@ function redactSensitiveUrlParams(url: string): string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type OAuthCallbackParamValue = string | string[] | null | undefined
|
||||||
|
|
||||||
|
type OAuthCallbackValidationResult =
|
||||||
|
| { type: 'code'; code: string }
|
||||||
|
| {
|
||||||
|
type: 'error'
|
||||||
|
error: string
|
||||||
|
errorDescription: string
|
||||||
|
errorUri: string
|
||||||
|
message: string
|
||||||
|
}
|
||||||
|
| { type: 'missing_result' }
|
||||||
|
| { type: 'state_mismatch' }
|
||||||
|
|
||||||
|
function getFirstOAuthCallbackParam(
|
||||||
|
value: OAuthCallbackParamValue,
|
||||||
|
): string | undefined {
|
||||||
|
if (Array.isArray(value)) {
|
||||||
|
return value.find(item => item.length > 0)
|
||||||
|
}
|
||||||
|
return value && value.length > 0 ? value : undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
export function validateOAuthCallbackParams(
|
||||||
|
params: {
|
||||||
|
code?: OAuthCallbackParamValue
|
||||||
|
state?: OAuthCallbackParamValue
|
||||||
|
error?: OAuthCallbackParamValue
|
||||||
|
error_description?: OAuthCallbackParamValue
|
||||||
|
error_uri?: OAuthCallbackParamValue
|
||||||
|
},
|
||||||
|
oauthState: string,
|
||||||
|
): OAuthCallbackValidationResult {
|
||||||
|
const code = getFirstOAuthCallbackParam(params.code)
|
||||||
|
const state = getFirstOAuthCallbackParam(params.state)
|
||||||
|
const error = getFirstOAuthCallbackParam(params.error)
|
||||||
|
const errorDescription =
|
||||||
|
getFirstOAuthCallbackParam(params.error_description) ?? ''
|
||||||
|
const errorUri = getFirstOAuthCallbackParam(params.error_uri) ?? ''
|
||||||
|
|
||||||
|
if (state !== oauthState) {
|
||||||
|
return { type: 'state_mismatch' }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
let message = `OAuth error: ${error}`
|
||||||
|
if (errorDescription) {
|
||||||
|
message += ` - ${errorDescription}`
|
||||||
|
}
|
||||||
|
if (errorUri) {
|
||||||
|
message += ` (See: ${errorUri})`
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
type: 'error',
|
||||||
|
error,
|
||||||
|
errorDescription,
|
||||||
|
errorUri,
|
||||||
|
message,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code) {
|
||||||
|
return { type: 'code', code }
|
||||||
|
}
|
||||||
|
|
||||||
|
return { type: 'missing_result' }
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Some OAuth servers (notably Slack) return HTTP 200 for all responses,
|
* Some OAuth servers (notably Slack) return HTTP 200 for all responses,
|
||||||
* signaling errors via the JSON body instead. The SDK's executeTokenRequest
|
* signaling errors via the JSON body instead. The SDK's executeTokenRequest
|
||||||
@@ -1058,30 +1126,31 @@ export async function performMCPOAuthFlow(
|
|||||||
options.onWaitingForCallback((callbackUrl: string) => {
|
options.onWaitingForCallback((callbackUrl: string) => {
|
||||||
try {
|
try {
|
||||||
const parsed = new URL(callbackUrl)
|
const parsed = new URL(callbackUrl)
|
||||||
const code = parsed.searchParams.get('code')
|
const result = validateOAuthCallbackParams(
|
||||||
const state = parsed.searchParams.get('state')
|
{
|
||||||
const error = parsed.searchParams.get('error')
|
code: parsed.searchParams.get('code'),
|
||||||
|
state: parsed.searchParams.get('state'),
|
||||||
|
error: parsed.searchParams.get('error'),
|
||||||
|
error_description:
|
||||||
|
parsed.searchParams.get('error_description'),
|
||||||
|
error_uri: parsed.searchParams.get('error_uri'),
|
||||||
|
},
|
||||||
|
oauthState,
|
||||||
|
)
|
||||||
|
|
||||||
if (error) {
|
if (result.type === 'state_mismatch') {
|
||||||
const errorDescription =
|
// Ignore so a stray or malicious URL cannot cancel an active flow.
|
||||||
parsed.searchParams.get('error_description') || ''
|
|
||||||
cleanup()
|
|
||||||
rejectOnce(
|
|
||||||
new Error(`OAuth error: ${error} - ${errorDescription}`),
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!code) {
|
if (result.type === 'missing_result') {
|
||||||
// Not a valid callback URL, ignore so the user can try again
|
// Not a valid callback URL, ignore so the user can try again.
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state !== oauthState) {
|
if (result.type === 'error') {
|
||||||
cleanup()
|
cleanup()
|
||||||
rejectOnce(
|
rejectOnce(new Error(result.message))
|
||||||
new Error('OAuth state mismatch - possible CSRF attack'),
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1090,7 +1159,7 @@ export async function performMCPOAuthFlow(
|
|||||||
`Received auth code via manual callback URL`,
|
`Received auth code via manual callback URL`,
|
||||||
)
|
)
|
||||||
cleanup()
|
cleanup()
|
||||||
resolveOnce(code)
|
resolveOnce(result.code)
|
||||||
} catch {
|
} catch {
|
||||||
// Invalid URL, ignore so the user can try again
|
// Invalid URL, ignore so the user can try again
|
||||||
}
|
}
|
||||||
@@ -1101,53 +1170,49 @@ export async function performMCPOAuthFlow(
|
|||||||
const parsedUrl = parse(req.url || '', true)
|
const parsedUrl = parse(req.url || '', true)
|
||||||
|
|
||||||
if (parsedUrl.pathname === '/callback') {
|
if (parsedUrl.pathname === '/callback') {
|
||||||
const code = parsedUrl.query.code as string
|
const result = validateOAuthCallbackParams(
|
||||||
const state = parsedUrl.query.state as string
|
parsedUrl.query,
|
||||||
const error = parsedUrl.query.error
|
oauthState,
|
||||||
const errorDescription = parsedUrl.query.error_description as string
|
)
|
||||||
const errorUri = parsedUrl.query.error_uri as string
|
|
||||||
|
|
||||||
// Validate OAuth state to prevent CSRF attacks
|
// Validate OAuth state to prevent CSRF attacks
|
||||||
if (!error && state !== oauthState) {
|
if (result.type === 'state_mismatch') {
|
||||||
res.writeHead(400, { 'Content-Type': 'text/html' })
|
res.writeHead(400, { 'Content-Type': 'text/html' })
|
||||||
res.end(
|
res.end(
|
||||||
`<h1>Authentication Error</h1><p>Invalid state parameter. Please try again.</p><p>You can close this window.</p>`,
|
`<h1>Authentication Error</h1><p>Invalid state parameter. Please try again.</p><p>You can close this window.</p>`,
|
||||||
)
|
)
|
||||||
cleanup()
|
|
||||||
rejectOnce(new Error('OAuth state mismatch - possible CSRF attack'))
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if (error) {
|
if (result.type === 'missing_result') {
|
||||||
|
res.writeHead(400, { 'Content-Type': 'text/html' })
|
||||||
|
res.end(
|
||||||
|
`<h1>Authentication Error</h1><p>Missing OAuth result. Please try again.</p><p>You can close this window.</p>`,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.type === 'error') {
|
||||||
res.writeHead(200, { 'Content-Type': 'text/html' })
|
res.writeHead(200, { 'Content-Type': 'text/html' })
|
||||||
// Sanitize error messages to prevent XSS
|
// Sanitize error messages to prevent XSS
|
||||||
const sanitizedError = xss(String(error))
|
const sanitizedError = xss(result.error)
|
||||||
const sanitizedErrorDescription = errorDescription
|
const sanitizedErrorDescription = result.errorDescription
|
||||||
? xss(String(errorDescription))
|
? xss(result.errorDescription)
|
||||||
: ''
|
: ''
|
||||||
res.end(
|
res.end(
|
||||||
`<h1>Authentication Error</h1><p>${sanitizedError}: ${sanitizedErrorDescription}</p><p>You can close this window.</p>`,
|
`<h1>Authentication Error</h1><p>${sanitizedError}: ${sanitizedErrorDescription}</p><p>You can close this window.</p>`,
|
||||||
)
|
)
|
||||||
cleanup()
|
cleanup()
|
||||||
let errorMessage = `OAuth error: ${error}`
|
rejectOnce(new Error(result.message))
|
||||||
if (errorDescription) {
|
|
||||||
errorMessage += ` - ${errorDescription}`
|
|
||||||
}
|
|
||||||
if (errorUri) {
|
|
||||||
errorMessage += ` (See: ${errorUri})`
|
|
||||||
}
|
|
||||||
rejectOnce(new Error(errorMessage))
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code) {
|
res.writeHead(200, { 'Content-Type': 'text/html' })
|
||||||
res.writeHead(200, { 'Content-Type': 'text/html' })
|
res.end(
|
||||||
res.end(
|
`<h1>Authentication Successful</h1><p>You can close this window. Return to Claude Code.</p>`,
|
||||||
`<h1>Authentication Successful</h1><p>You can close this window. Return to Claude Code.</p>`,
|
)
|
||||||
)
|
cleanup()
|
||||||
cleanup()
|
resolveOnce(result.code)
|
||||||
resolveOnce(code)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@@ -240,21 +240,28 @@ For commands that are harder to parse at a glance (piped commands, obscure flags
|
|||||||
- curl -s url | jq '.data[]' → "Fetch JSON from URL and extract data array elements"`),
|
- curl -s url | jq '.data[]' → "Fetch JSON from URL and extract data array elements"`),
|
||||||
run_in_background: semanticBoolean(z.boolean().optional()).describe(`Set to true to run this command in the background. Use Read to read the output later.`),
|
run_in_background: semanticBoolean(z.boolean().optional()).describe(`Set to true to run this command in the background. Use Read to read the output later.`),
|
||||||
dangerouslyDisableSandbox: semanticBoolean(z.boolean().optional()).describe('Set this to true to dangerously override sandbox mode and run commands without sandboxing.'),
|
dangerouslyDisableSandbox: semanticBoolean(z.boolean().optional()).describe('Set this to true to dangerously override sandbox mode and run commands without sandboxing.'),
|
||||||
|
_dangerouslyDisableSandboxApproved: z.boolean().optional().describe('Internal: user-approved sandbox override'),
|
||||||
_simulatedSedEdit: z.object({
|
_simulatedSedEdit: z.object({
|
||||||
filePath: z.string(),
|
filePath: z.string(),
|
||||||
newContent: z.string()
|
newContent: z.string()
|
||||||
}).optional().describe('Internal: pre-computed sed edit result from preview')
|
}).optional().describe('Internal: pre-computed sed edit result from preview')
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Always omit _simulatedSedEdit from the model-facing schema. It is an internal-only
|
// Always omit internal-only fields from the model-facing schema.
|
||||||
// field set by SedEditPermissionRequest after the user approves a sed edit preview.
|
// _simulatedSedEdit is set by SedEditPermissionRequest after the user approves a
|
||||||
// Exposing it in the schema would let the model bypass permission checks and the
|
// sed edit preview; exposing it would let the model bypass permission checks and
|
||||||
// sandbox by pairing an innocuous command with an arbitrary file write.
|
// the sandbox by pairing an innocuous command with an arbitrary file write.
|
||||||
|
// dangerouslyDisableSandbox is also omitted because sandbox escape must be tied
|
||||||
|
// to trusted user/internal provenance, not model-controlled tool input.
|
||||||
// Also conditionally remove run_in_background when background tasks are disabled.
|
// Also conditionally remove run_in_background when background tasks are disabled.
|
||||||
const inputSchema = lazySchema(() => isBackgroundTasksDisabled ? fullInputSchema().omit({
|
const inputSchema = lazySchema(() => isBackgroundTasksDisabled ? fullInputSchema().omit({
|
||||||
run_in_background: true,
|
run_in_background: true,
|
||||||
|
dangerouslyDisableSandbox: true,
|
||||||
|
_dangerouslyDisableSandboxApproved: true,
|
||||||
_simulatedSedEdit: true
|
_simulatedSedEdit: true
|
||||||
}) : fullInputSchema().omit({
|
}) : fullInputSchema().omit({
|
||||||
|
dangerouslyDisableSandbox: true,
|
||||||
|
_dangerouslyDisableSandboxApproved: true,
|
||||||
_simulatedSedEdit: true
|
_simulatedSedEdit: true
|
||||||
}));
|
}));
|
||||||
type InputSchema = ReturnType<typeof inputSchema>;
|
type InputSchema = ReturnType<typeof inputSchema>;
|
||||||
|
|||||||
59
src/tools/BashTool/bashPermissions.test.ts
Normal file
59
src/tools/BashTool/bashPermissions.test.ts
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
import { afterEach, expect, test } from 'bun:test'
|
||||||
|
|
||||||
|
import { getEmptyToolPermissionContext } from '../../Tool.js'
|
||||||
|
import { SandboxManager } from '../../utils/sandbox/sandbox-adapter.js'
|
||||||
|
import { bashToolHasPermission } from './bashPermissions.js'
|
||||||
|
|
||||||
|
const originalSandboxMethods = {
|
||||||
|
isSandboxingEnabled: SandboxManager.isSandboxingEnabled,
|
||||||
|
isAutoAllowBashIfSandboxedEnabled:
|
||||||
|
SandboxManager.isAutoAllowBashIfSandboxedEnabled,
|
||||||
|
areUnsandboxedCommandsAllowed: SandboxManager.areUnsandboxedCommandsAllowed,
|
||||||
|
getExcludedCommands: SandboxManager.getExcludedCommands,
|
||||||
|
}
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
SandboxManager.isSandboxingEnabled =
|
||||||
|
originalSandboxMethods.isSandboxingEnabled
|
||||||
|
SandboxManager.isAutoAllowBashIfSandboxedEnabled =
|
||||||
|
originalSandboxMethods.isAutoAllowBashIfSandboxedEnabled
|
||||||
|
SandboxManager.areUnsandboxedCommandsAllowed =
|
||||||
|
originalSandboxMethods.areUnsandboxedCommandsAllowed
|
||||||
|
SandboxManager.getExcludedCommands = originalSandboxMethods.getExcludedCommands
|
||||||
|
})
|
||||||
|
|
||||||
|
function makeToolUseContext() {
|
||||||
|
const toolPermissionContext = getEmptyToolPermissionContext()
|
||||||
|
|
||||||
|
return {
|
||||||
|
abortController: new AbortController(),
|
||||||
|
options: {
|
||||||
|
isNonInteractiveSession: false,
|
||||||
|
},
|
||||||
|
getAppState() {
|
||||||
|
return {
|
||||||
|
toolPermissionContext,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
} as never
|
||||||
|
}
|
||||||
|
|
||||||
|
test('sandbox auto-allow still enforces Bash path constraints', async () => {
|
||||||
|
;(globalThis as unknown as { MACRO: { VERSION: string } }).MACRO = {
|
||||||
|
VERSION: 'test',
|
||||||
|
}
|
||||||
|
|
||||||
|
SandboxManager.isSandboxingEnabled = () => true
|
||||||
|
SandboxManager.isAutoAllowBashIfSandboxedEnabled = () => true
|
||||||
|
SandboxManager.areUnsandboxedCommandsAllowed = () => true
|
||||||
|
SandboxManager.getExcludedCommands = () => []
|
||||||
|
|
||||||
|
const result = await bashToolHasPermission(
|
||||||
|
{ command: 'cat ../../../../../etc/passwd' },
|
||||||
|
makeToolUseContext(),
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(result.behavior).toBe('ask')
|
||||||
|
expect(result.message).toContain('was blocked')
|
||||||
|
expect(result.message).toContain('/etc/passwd')
|
||||||
|
})
|
||||||
@@ -1814,7 +1814,10 @@ export async function bashToolHasPermission(
|
|||||||
input,
|
input,
|
||||||
appState.toolPermissionContext,
|
appState.toolPermissionContext,
|
||||||
)
|
)
|
||||||
if (sandboxAutoAllowResult.behavior !== 'passthrough') {
|
if (
|
||||||
|
sandboxAutoAllowResult.behavior === 'deny' ||
|
||||||
|
sandboxAutoAllowResult.behavior === 'ask'
|
||||||
|
) {
|
||||||
return sandboxAutoAllowResult
|
return sandboxAutoAllowResult
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -179,9 +179,6 @@ function getSimpleSandboxSection(): string {
|
|||||||
const networkRestrictionConfig = SandboxManager.getNetworkRestrictionConfig()
|
const networkRestrictionConfig = SandboxManager.getNetworkRestrictionConfig()
|
||||||
const allowUnixSockets = SandboxManager.getAllowUnixSockets()
|
const allowUnixSockets = SandboxManager.getAllowUnixSockets()
|
||||||
const ignoreViolations = SandboxManager.getIgnoreViolations()
|
const ignoreViolations = SandboxManager.getIgnoreViolations()
|
||||||
const allowUnsandboxedCommands =
|
|
||||||
SandboxManager.areUnsandboxedCommandsAllowed()
|
|
||||||
|
|
||||||
// Replace the per-UID temp dir literal (e.g. /private/tmp/claude-1001/) with
|
// Replace the per-UID temp dir literal (e.g. /private/tmp/claude-1001/) with
|
||||||
// "$TMPDIR" so the prompt is identical across users — avoids busting the
|
// "$TMPDIR" so the prompt is identical across users — avoids busting the
|
||||||
// cross-user global prompt cache. The sandbox already sets $TMPDIR at runtime.
|
// cross-user global prompt cache. The sandbox already sets $TMPDIR at runtime.
|
||||||
@@ -225,38 +222,9 @@ function getSimpleSandboxSection(): string {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
const sandboxOverrideItems: Array<string | string[]> =
|
|
||||||
allowUnsandboxedCommands
|
|
||||||
? [
|
|
||||||
'You should always default to running commands within the sandbox. Do NOT attempt to set `dangerouslyDisableSandbox: true` unless:',
|
|
||||||
[
|
|
||||||
'The user *explicitly* asks you to bypass sandbox',
|
|
||||||
'A specific command just failed and you see evidence of sandbox restrictions causing the failure. Note that commands can fail for many reasons unrelated to the sandbox (missing files, wrong arguments, network issues, etc.).',
|
|
||||||
],
|
|
||||||
'Evidence of sandbox-caused failures includes:',
|
|
||||||
[
|
|
||||||
'"Operation not permitted" errors for file/network operations',
|
|
||||||
'Access denied to specific paths outside allowed directories',
|
|
||||||
'Network connection failures to non-whitelisted hosts',
|
|
||||||
'Unix socket connection errors',
|
|
||||||
],
|
|
||||||
'When you see evidence of sandbox-caused failure:',
|
|
||||||
[
|
|
||||||
"Immediately retry with `dangerouslyDisableSandbox: true` (don't ask, just do it)",
|
|
||||||
'Briefly explain what sandbox restriction likely caused the failure. Be sure to mention that the user can use the `/sandbox` command to manage restrictions.',
|
|
||||||
'This will prompt the user for permission',
|
|
||||||
],
|
|
||||||
'Treat each command you execute with `dangerouslyDisableSandbox: true` individually. Even if you have recently run a command with this setting, you should default to running future commands within the sandbox.',
|
|
||||||
'Do not suggest adding sensitive paths like ~/.bashrc, ~/.zshrc, ~/.ssh/*, or credential files to the sandbox allowlist.',
|
|
||||||
]
|
|
||||||
: [
|
|
||||||
'All commands MUST run in sandbox mode - the `dangerouslyDisableSandbox` parameter is disabled by policy.',
|
|
||||||
'Commands cannot run outside the sandbox under any circumstances.',
|
|
||||||
'If a command fails due to sandbox restrictions, work with the user to adjust sandbox settings instead.',
|
|
||||||
]
|
|
||||||
|
|
||||||
const items: Array<string | string[]> = [
|
const items: Array<string | string[]> = [
|
||||||
...sandboxOverrideItems,
|
'Commands MUST run in sandbox mode. If a command fails due to sandbox restrictions, explain the likely restriction and work with the user to adjust sandbox settings or run an explicit user-initiated shell command.',
|
||||||
|
'Do not suggest adding sensitive paths like ~/.bashrc, ~/.zshrc, ~/.ssh/*, or credential files to the sandbox allowlist.',
|
||||||
'For temporary files, always use the `$TMPDIR` environment variable. TMPDIR is automatically set to the correct sandbox-writable directory in sandbox mode. Do NOT use `/tmp` directly - use `$TMPDIR` instead.',
|
'For temporary files, always use the `$TMPDIR` environment variable. TMPDIR is automatically set to the correct sandbox-writable directory in sandbox mode. Do NOT use `/tmp` directly - use `$TMPDIR` instead.',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
74
src/tools/BashTool/shouldUseSandbox.test.ts
Normal file
74
src/tools/BashTool/shouldUseSandbox.test.ts
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import { afterEach, expect, test } from 'bun:test'
|
||||||
|
|
||||||
|
import { SandboxManager } from '../../utils/sandbox/sandbox-adapter.js'
|
||||||
|
import { BashTool } from './BashTool.js'
|
||||||
|
import { PowerShellTool } from '../PowerShellTool/PowerShellTool.js'
|
||||||
|
import { shouldUseSandbox } from './shouldUseSandbox.js'
|
||||||
|
|
||||||
|
const originalSandboxMethods = {
|
||||||
|
isSandboxingEnabled: SandboxManager.isSandboxingEnabled,
|
||||||
|
areUnsandboxedCommandsAllowed: SandboxManager.areUnsandboxedCommandsAllowed,
|
||||||
|
}
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
SandboxManager.isSandboxingEnabled =
|
||||||
|
originalSandboxMethods.isSandboxingEnabled
|
||||||
|
SandboxManager.areUnsandboxedCommandsAllowed =
|
||||||
|
originalSandboxMethods.areUnsandboxedCommandsAllowed
|
||||||
|
})
|
||||||
|
|
||||||
|
test('model-facing Bash schema rejects dangerouslyDisableSandbox', () => {
|
||||||
|
const result = BashTool.inputSchema.safeParse({
|
||||||
|
command: 'cat /etc/passwd',
|
||||||
|
dangerouslyDisableSandbox: true,
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(result.success).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('model-facing PowerShell schema rejects dangerouslyDisableSandbox', () => {
|
||||||
|
const result = PowerShellTool.inputSchema.safeParse({
|
||||||
|
command: 'Get-Content C:\\Windows\\System32\\drivers\\etc\\hosts',
|
||||||
|
dangerouslyDisableSandbox: true,
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(result.success).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('model-controlled dangerouslyDisableSandbox does not bypass sandbox', () => {
|
||||||
|
SandboxManager.isSandboxingEnabled = () => true
|
||||||
|
SandboxManager.areUnsandboxedCommandsAllowed = () => true
|
||||||
|
|
||||||
|
expect(
|
||||||
|
shouldUseSandbox({
|
||||||
|
command: 'cat /etc/passwd',
|
||||||
|
dangerouslyDisableSandbox: true,
|
||||||
|
}),
|
||||||
|
).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('trusted internal approval can disable sandbox when policy allows it', () => {
|
||||||
|
SandboxManager.isSandboxingEnabled = () => true
|
||||||
|
SandboxManager.areUnsandboxedCommandsAllowed = () => true
|
||||||
|
|
||||||
|
expect(
|
||||||
|
shouldUseSandbox({
|
||||||
|
command: 'cat /etc/passwd',
|
||||||
|
dangerouslyDisableSandbox: true,
|
||||||
|
_dangerouslyDisableSandboxApproved: true,
|
||||||
|
}),
|
||||||
|
).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('trusted internal approval cannot disable sandbox when policy forbids it', () => {
|
||||||
|
SandboxManager.isSandboxingEnabled = () => true
|
||||||
|
SandboxManager.areUnsandboxedCommandsAllowed = () => false
|
||||||
|
|
||||||
|
expect(
|
||||||
|
shouldUseSandbox({
|
||||||
|
command: 'cat /etc/passwd',
|
||||||
|
dangerouslyDisableSandbox: true,
|
||||||
|
_dangerouslyDisableSandboxApproved: true,
|
||||||
|
}),
|
||||||
|
).toBe(true)
|
||||||
|
})
|
||||||
@@ -13,6 +13,7 @@ import {
|
|||||||
type SandboxInput = {
|
type SandboxInput = {
|
||||||
command?: string
|
command?: string
|
||||||
dangerouslyDisableSandbox?: boolean
|
dangerouslyDisableSandbox?: boolean
|
||||||
|
_dangerouslyDisableSandboxApproved?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: excludedCommands is a user-facing convenience feature, not a security boundary.
|
// NOTE: excludedCommands is a user-facing convenience feature, not a security boundary.
|
||||||
@@ -141,9 +142,13 @@ export function shouldUseSandbox(input: Partial<SandboxInput>): boolean {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't sandbox if explicitly overridden AND unsandboxed commands are allowed by policy
|
// Only trusted internal callers may request an unsandboxed command. The
|
||||||
|
// model-facing Bash schema omits _dangerouslyDisableSandboxApproved, so a
|
||||||
|
// tool_use payload cannot disable the sandbox by setting
|
||||||
|
// dangerouslyDisableSandbox directly.
|
||||||
if (
|
if (
|
||||||
input.dangerouslyDisableSandbox &&
|
input.dangerouslyDisableSandbox &&
|
||||||
|
input._dangerouslyDisableSandboxApproved &&
|
||||||
SandboxManager.areUnsandboxedCommandsAllowed()
|
SandboxManager.areUnsandboxedCommandsAllowed()
|
||||||
) {
|
) {
|
||||||
return false
|
return false
|
||||||
|
|||||||
@@ -230,13 +230,20 @@ const fullInputSchema = lazySchema(() => z.strictObject({
|
|||||||
timeout: semanticNumber(z.number().optional()).describe(`Optional timeout in milliseconds (max ${getMaxTimeoutMs()})`),
|
timeout: semanticNumber(z.number().optional()).describe(`Optional timeout in milliseconds (max ${getMaxTimeoutMs()})`),
|
||||||
description: z.string().optional().describe('Clear, concise description of what this command does in active voice.'),
|
description: z.string().optional().describe('Clear, concise description of what this command does in active voice.'),
|
||||||
run_in_background: semanticBoolean(z.boolean().optional()).describe(`Set to true to run this command in the background. Use Read to read the output later.`),
|
run_in_background: semanticBoolean(z.boolean().optional()).describe(`Set to true to run this command in the background. Use Read to read the output later.`),
|
||||||
dangerouslyDisableSandbox: semanticBoolean(z.boolean().optional()).describe('Set this to true to dangerously override sandbox mode and run commands without sandboxing.')
|
dangerouslyDisableSandbox: semanticBoolean(z.boolean().optional()).describe('Set this to true to dangerously override sandbox mode and run commands without sandboxing.'),
|
||||||
|
_dangerouslyDisableSandboxApproved: z.boolean().optional().describe('Internal: user-approved sandbox override')
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Conditionally remove run_in_background from schema when background tasks are disabled
|
// Omit internal-only sandbox override fields from the model-facing schema.
|
||||||
|
// Conditionally remove run_in_background from schema when background tasks are disabled.
|
||||||
const inputSchema = lazySchema(() => isBackgroundTasksDisabled ? fullInputSchema().omit({
|
const inputSchema = lazySchema(() => isBackgroundTasksDisabled ? fullInputSchema().omit({
|
||||||
run_in_background: true
|
run_in_background: true,
|
||||||
}) : fullInputSchema());
|
dangerouslyDisableSandbox: true,
|
||||||
|
_dangerouslyDisableSandboxApproved: true
|
||||||
|
}) : fullInputSchema().omit({
|
||||||
|
dangerouslyDisableSandbox: true,
|
||||||
|
_dangerouslyDisableSandboxApproved: true
|
||||||
|
}));
|
||||||
type InputSchema = ReturnType<typeof inputSchema>;
|
type InputSchema = ReturnType<typeof inputSchema>;
|
||||||
|
|
||||||
// Use fullInputSchema for the type to always include run_in_background
|
// Use fullInputSchema for the type to always include run_in_background
|
||||||
@@ -697,7 +704,8 @@ async function* runPowerShellCommand({
|
|||||||
description,
|
description,
|
||||||
timeout,
|
timeout,
|
||||||
run_in_background,
|
run_in_background,
|
||||||
dangerouslyDisableSandbox
|
dangerouslyDisableSandbox,
|
||||||
|
_dangerouslyDisableSandboxApproved
|
||||||
} = input;
|
} = input;
|
||||||
const timeoutMs = Math.min(timeout || getDefaultTimeoutMs(), getMaxTimeoutMs());
|
const timeoutMs = Math.min(timeout || getDefaultTimeoutMs(), getMaxTimeoutMs());
|
||||||
let fullOutput = '';
|
let fullOutput = '';
|
||||||
@@ -749,7 +757,8 @@ async function* runPowerShellCommand({
|
|||||||
// The explicit platform check is redundant-but-obvious.
|
// The explicit platform check is redundant-but-obvious.
|
||||||
shouldUseSandbox: getPlatform() === 'windows' ? false : shouldUseSandbox({
|
shouldUseSandbox: getPlatform() === 'windows' ? false : shouldUseSandbox({
|
||||||
command,
|
command,
|
||||||
dangerouslyDisableSandbox
|
dangerouslyDisableSandbox,
|
||||||
|
_dangerouslyDisableSandboxApproved
|
||||||
}),
|
}),
|
||||||
shouldAutoBackground
|
shouldAutoBackground
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -662,10 +662,6 @@ export function normalizeToolInput<T extends Tool>(
|
|||||||
...(timeout !== undefined && { timeout }),
|
...(timeout !== undefined && { timeout }),
|
||||||
...(description !== undefined && { description }),
|
...(description !== undefined && { description }),
|
||||||
...(run_in_background !== undefined && { run_in_background }),
|
...(run_in_background !== undefined && { run_in_background }),
|
||||||
...('dangerouslyDisableSandbox' in parsed &&
|
|
||||||
parsed.dangerouslyDisableSandbox !== undefined && {
|
|
||||||
dangerouslyDisableSandbox: parsed.dangerouslyDisableSandbox,
|
|
||||||
}),
|
|
||||||
} as z.infer<T['inputSchema']>
|
} as z.infer<T['inputSchema']>
|
||||||
}
|
}
|
||||||
case FileEditTool.name: {
|
case FileEditTool.name: {
|
||||||
|
|||||||
@@ -65,10 +65,11 @@ export async function processBashCommand(inputString: string, precedingInputBloc
|
|||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
// User-initiated `!` commands run outside sandbox. Both shell tools honor
|
// User-initiated `!` commands run outside sandbox when policy allows it.
|
||||||
// dangerouslyDisableSandbox (checked against areUnsandboxedCommandsAllowed()
|
// Bash requires an internal approval marker so model-controlled tool input
|
||||||
// in shouldUseSandbox.ts). PS sandbox is Linux/macOS/WSL2 only — on Windows
|
// cannot disable sandboxing by setting dangerouslyDisableSandbox directly.
|
||||||
// native, shouldUseSandbox() returns false regardless (unsupported platform).
|
// PS sandbox is Linux/macOS/WSL2 only — on Windows native, shouldUseSandbox()
|
||||||
|
// returns false regardless (unsupported platform).
|
||||||
// Lazy-require PowerShellTool so its ~300KB chunk only loads when the
|
// Lazy-require PowerShellTool so its ~300KB chunk only loads when the
|
||||||
// user has actually selected the powershell default shell.
|
// user has actually selected the powershell default shell.
|
||||||
type PSMod = typeof import('src/tools/PowerShellTool/PowerShellTool.js');
|
type PSMod = typeof import('src/tools/PowerShellTool/PowerShellTool.js');
|
||||||
@@ -81,10 +82,12 @@ export async function processBashCommand(inputString: string, precedingInputBloc
|
|||||||
const shellTool = PowerShellTool ?? BashTool;
|
const shellTool = PowerShellTool ?? BashTool;
|
||||||
const response = PowerShellTool ? await PowerShellTool.call({
|
const response = PowerShellTool ? await PowerShellTool.call({
|
||||||
command: inputString,
|
command: inputString,
|
||||||
dangerouslyDisableSandbox: true
|
dangerouslyDisableSandbox: true,
|
||||||
|
_dangerouslyDisableSandboxApproved: true
|
||||||
}, bashModeContext, undefined, undefined, onProgress) : await BashTool.call({
|
}, bashModeContext, undefined, undefined, onProgress) : await BashTool.call({
|
||||||
command: inputString,
|
command: inputString,
|
||||||
dangerouslyDisableSandbox: true
|
dangerouslyDisableSandbox: true,
|
||||||
|
_dangerouslyDisableSandboxApproved: true
|
||||||
}, bashModeContext, undefined, undefined, onProgress);
|
}, bashModeContext, undefined, undefined, onProgress);
|
||||||
const data = response.data;
|
const data = response.data;
|
||||||
if (!data) {
|
if (!data) {
|
||||||
|
|||||||
Reference in New Issue
Block a user