fix(api): replace phrase-based reasoning sanitizer with tag-based filter (#779)
Reasoning models (MiniMax M2.7, GLM-4.5/5, DeepSeek, Kimi K2) inline chain-of-thought inside <think>...</think> tags in the content field rather than using the reasoning_content channel. The prior phrase-matching sanitizer (looksLikeLeakedReasoningPrefix) only caught English-prose preambles like "I should"/"the user asked", missed tag-based leaks entirely, and risked false-stripping legitimate assistant output. Replace with a structural tag-based approach (same pattern as hermes-agent): - createThinkTagFilter() — streaming state machine that buffers partial tags across SSE delta boundaries (<th| + |ink>), so tags split mid-chunk still parse correctly. - stripThinkTags() — whole-text cleanup for non-streaming responses and as a safety net. Handles closed pairs, unterminated opens at block boundaries, and orphan tags. - Recognizes think, thinking, reasoning, thought, REASONING_SCRATCHPAD case-insensitively, including tags with attributes. - False-negative bias: flush() discards buffered partial tags at stream end rather than leaking them. Existing phrase-based shim tests updated to exercise the actual <think> tag leak. Added regression tests confirming legitimate prose starting with "I should..." is preserved (the old sanitizer's main false-positive). Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -32,10 +32,9 @@ import { resolveGeminiCredential } from '../../utils/geminiAuth.js'
|
||||
import { hydrateGeminiAccessTokenFromSecureStorage } from '../../utils/geminiCredentials.js'
|
||||
import { hydrateGithubModelsTokenFromSecureStorage } from '../../utils/githubModelsCredentials.js'
|
||||
import {
|
||||
looksLikeLeakedReasoningPrefix,
|
||||
shouldBufferPotentialReasoningPrefix,
|
||||
stripLeakedReasoningPreamble,
|
||||
} from './reasoningLeakSanitizer.js'
|
||||
createThinkTagFilter,
|
||||
stripThinkTags,
|
||||
} from './thinkTagSanitizer.js'
|
||||
import {
|
||||
codexStreamToAnthropic,
|
||||
collectCodexCompletedResponse,
|
||||
@@ -718,8 +717,7 @@ async function* openaiStreamToAnthropic(
|
||||
let hasEmittedContentStart = false
|
||||
let hasEmittedThinkingStart = false
|
||||
let hasClosedThinking = false
|
||||
let activeTextBuffer = ''
|
||||
let textBufferMode: 'none' | 'pending' | 'strip' = 'none'
|
||||
const thinkFilter = createThinkTagFilter()
|
||||
let lastStopReason: 'tool_use' | 'max_tokens' | 'end_turn' | null = null
|
||||
let hasEmittedFinalUsage = false
|
||||
let hasProcessedFinishReason = false
|
||||
@@ -798,14 +796,12 @@ async function* openaiStreamToAnthropic(
|
||||
const closeActiveContentBlock = async function* () {
|
||||
if (!hasEmittedContentStart) return
|
||||
|
||||
if (textBufferMode !== 'none') {
|
||||
const sanitized = stripLeakedReasoningPreamble(activeTextBuffer)
|
||||
if (sanitized) {
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: contentBlockIndex,
|
||||
delta: { type: 'text_delta', text: sanitized },
|
||||
}
|
||||
const tail = thinkFilter.flush()
|
||||
if (tail) {
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: contentBlockIndex,
|
||||
delta: { type: 'text_delta', text: tail },
|
||||
}
|
||||
}
|
||||
|
||||
@@ -815,8 +811,6 @@ async function* openaiStreamToAnthropic(
|
||||
}
|
||||
contentBlockIndex++
|
||||
hasEmittedContentStart = false
|
||||
activeTextBuffer = ''
|
||||
textBufferMode = 'none'
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -873,7 +867,6 @@ async function* openaiStreamToAnthropic(
|
||||
contentBlockIndex++
|
||||
hasClosedThinking = true
|
||||
}
|
||||
activeTextBuffer += delta.content
|
||||
if (!hasEmittedContentStart) {
|
||||
yield {
|
||||
type: 'content_block_start',
|
||||
@@ -883,38 +876,13 @@ async function* openaiStreamToAnthropic(
|
||||
hasEmittedContentStart = true
|
||||
}
|
||||
|
||||
if (
|
||||
textBufferMode === 'strip' ||
|
||||
looksLikeLeakedReasoningPrefix(activeTextBuffer)
|
||||
) {
|
||||
textBufferMode = 'strip'
|
||||
continue
|
||||
}
|
||||
|
||||
if (textBufferMode === 'pending') {
|
||||
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
|
||||
continue
|
||||
}
|
||||
const visible = thinkFilter.feed(delta.content)
|
||||
if (visible) {
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: contentBlockIndex,
|
||||
delta: {
|
||||
type: 'text_delta',
|
||||
text: activeTextBuffer,
|
||||
},
|
||||
delta: { type: 'text_delta', text: visible },
|
||||
}
|
||||
textBufferMode = 'none'
|
||||
continue
|
||||
}
|
||||
|
||||
if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
|
||||
textBufferMode = 'pending'
|
||||
continue
|
||||
}
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: contentBlockIndex,
|
||||
delta: { type: 'text_delta', text: delta.content },
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1742,7 +1710,7 @@ class OpenAIShimMessages {
|
||||
if (typeof rawContent === 'string' && rawContent) {
|
||||
content.push({
|
||||
type: 'text',
|
||||
text: stripLeakedReasoningPreamble(rawContent),
|
||||
text: stripThinkTags(rawContent),
|
||||
})
|
||||
} else if (Array.isArray(rawContent) && rawContent.length > 0) {
|
||||
const parts: string[] = []
|
||||
@@ -1760,7 +1728,7 @@ class OpenAIShimMessages {
|
||||
if (joined) {
|
||||
content.push({
|
||||
type: 'text',
|
||||
text: stripLeakedReasoningPreamble(joined),
|
||||
text: stripThinkTags(joined),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user