fix(api): replace phrase-based reasoning sanitizer with tag-based filter (#779)
Reasoning models (MiniMax M2.7, GLM-4.5/5, DeepSeek, Kimi K2) inline chain-of-thought inside <think>...</think> tags in the content field rather than using the reasoning_content channel. The prior phrase-matching sanitizer (looksLikeLeakedReasoningPrefix) only caught English-prose preambles like "I should"/"the user asked", missed tag-based leaks entirely, and risked false-stripping legitimate assistant output. Replace with a structural tag-based approach (same pattern as hermes-agent): - createThinkTagFilter() — streaming state machine that buffers partial tags across SSE delta boundaries (<th| + |ink>), so tags split mid-chunk still parse correctly. - stripThinkTags() — whole-text cleanup for non-streaming responses and as a safety net. Handles closed pairs, unterminated opens at block boundaries, and orphan tags. - Recognizes think, thinking, reasoning, thought, REASONING_SCRATCHPAD case-insensitively, including tags with attributes. - False-negative bias: flush() discards buffered partial tags at stream end rather than leaking them. Existing phrase-based shim tests updated to exercise the actual <think> tag leak. Added regression tests confirming legitimate prose starting with "I should..." is preserved (the old sanitizer's main false-positive). Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2513,7 +2513,7 @@ test('non-streaming: real content takes precedence over reasoning_content', asyn
|
||||
])
|
||||
})
|
||||
|
||||
test('non-streaming: strips leaked reasoning preamble from assistant content', async () => {
|
||||
test('non-streaming: strips <think> tag block from assistant content', async () => {
|
||||
globalThis.fetch = (async () => {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
@@ -2524,7 +2524,7 @@ test('non-streaming: strips leaked reasoning preamble from assistant content', a
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content:
|
||||
'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?',
|
||||
'<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
@@ -2645,7 +2645,7 @@ test('streaming: thinking block closed before tool call', async () => {
|
||||
expect(thinkingStart?.content_block?.type).toBe('thinking')
|
||||
})
|
||||
|
||||
test('streaming: strips leaked reasoning preamble from assistant content deltas', async () => {
|
||||
test('streaming: strips <think> tag block from assistant content deltas', async () => {
|
||||
globalThis.fetch = (async () => {
|
||||
const chunks = makeStreamChunks([
|
||||
{
|
||||
@@ -2658,7 +2658,7 @@ test('streaming: strips leaked reasoning preamble from assistant content deltas'
|
||||
delta: {
|
||||
role: 'assistant',
|
||||
content:
|
||||
'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?',
|
||||
'<think>user wants a greeting, respond briefly</think>Hey! How can I help you today?',
|
||||
},
|
||||
finish_reason: null,
|
||||
},
|
||||
@@ -2700,10 +2700,10 @@ test('streaming: strips leaked reasoning preamble from assistant content deltas'
|
||||
}
|
||||
}
|
||||
|
||||
expect(textDeltas).toEqual(['Hey! How can I help you today?'])
|
||||
expect(textDeltas.join('')).toBe('Hey! How can I help you today?')
|
||||
})
|
||||
|
||||
test('streaming: strips leaked reasoning preamble when split across multiple content chunks', async () => {
|
||||
test('streaming: strips <think> tag split across multiple content chunks', async () => {
|
||||
globalThis.fetch = (async () => {
|
||||
const chunks = makeStreamChunks([
|
||||
{
|
||||
@@ -2715,7 +2715,7 @@ test('streaming: strips leaked reasoning preamble when split across multiple con
|
||||
index: 0,
|
||||
delta: {
|
||||
role: 'assistant',
|
||||
content: 'The user said "hey" - this is a simple greeting. ',
|
||||
content: '<think>user wants a greeting,',
|
||||
},
|
||||
finish_reason: null,
|
||||
},
|
||||
@@ -2729,8 +2729,21 @@ test('streaming: strips leaked reasoning preamble when split across multiple con
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
content:
|
||||
'I should respond in a friendly, concise way.\n\nHey! How can I help you today?',
|
||||
content: ' respond briefly</th',
|
||||
},
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'chatcmpl-1',
|
||||
object: 'chat.completion.chunk',
|
||||
model: 'gpt-5-mini',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
content: 'ink>Hey! How can I help you today?',
|
||||
},
|
||||
finish_reason: null,
|
||||
},
|
||||
@@ -2773,7 +2786,69 @@ test('streaming: strips leaked reasoning preamble when split across multiple con
|
||||
}
|
||||
}
|
||||
|
||||
expect(textDeltas).toEqual(['Hey! How can I help you today?'])
|
||||
expect(textDeltas.join('')).toBe('Hey! How can I help you today?')
|
||||
})
|
||||
|
||||
test('streaming: preserves prose without tags (no phrase-based false positive)', async () => {
|
||||
// Regression: older phrase-based sanitizer would strip "I should..." prose.
|
||||
// The tag-based approach leaves legitimate assistant output alone.
|
||||
globalThis.fetch = (async () => {
|
||||
const chunks = makeStreamChunks([
|
||||
{
|
||||
id: 'chatcmpl-1',
|
||||
object: 'chat.completion.chunk',
|
||||
model: 'gpt-5-mini',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
role: 'assistant',
|
||||
content:
|
||||
'I should note that the user role requires a briefly concise friendly response format.',
|
||||
},
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'chatcmpl-1',
|
||||
object: 'chat.completion.chunk',
|
||||
model: 'gpt-5-mini',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
},
|
||||
])
|
||||
|
||||
return makeSseResponse(chunks)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
const result = await client.beta.messages
|
||||
.create({
|
||||
model: 'gpt-5-mini',
|
||||
system: 'test system',
|
||||
messages: [{ role: 'user', content: 'hey' }],
|
||||
max_tokens: 64,
|
||||
stream: true,
|
||||
})
|
||||
.withResponse()
|
||||
|
||||
const textDeltas: string[] = []
|
||||
for await (const event of result.data) {
|
||||
const delta = (event as { delta?: { type?: string; text?: string } }).delta
|
||||
if (delta?.type === 'text_delta' && typeof delta.text === 'string') {
|
||||
textDeltas.push(delta.text)
|
||||
}
|
||||
}
|
||||
|
||||
expect(textDeltas.join('')).toBe(
|
||||
'I should note that the user role requires a briefly concise friendly response format.',
|
||||
)
|
||||
})
|
||||
|
||||
test('classifies localhost transport failures with actionable category marker', async () => {
|
||||
|
||||
Reference in New Issue
Block a user