fix(api): replace phrase-based reasoning sanitizer with tag-based filter (#779)

Reasoning models (MiniMax M2.7, GLM-4.5/5, DeepSeek, Kimi K2) inline chain-of-thought inside <think>...</think> tags in the content field rather than using the reasoning_content channel. The prior phrase-matching sanitizer (looksLikeLeakedReasoningPrefix) only caught English-prose preambles like "I should"/"the user asked", missed tag-based leaks entirely, and risked false-stripping legitimate assistant output. Replace with a structural tag-based approach (same pattern as hermes-agent): - createThinkTagFilter() — streaming state machine that buffers partial tags across SSE delta boundaries (<th| + |ink>), so tags split mid-chunk still parse correctly. - stripThinkTags() — whole-text cleanup for non-streaming responses and as a safety net. Handles closed pairs, unterminated opens at block boundaries, and orphan tags. - Recognizes think, thinking, reasoning, thought, REASONING_SCRATCHPAD case-insensitively, including tags with attributes. - False-negative bias: flush() discards buffered partial tags at stream end rather than leaking them. Existing phrase-based shim tests updated to exercise the actual <think> tag leak. Added regression tests confirming legitimate prose starting with "I should..." is preserved (the old sanitizer's main false-positive). Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 15:18:58 +08:00
parent c0b8a59a23
commit 336ddcc50d
8 changed files with 544 additions and 213 deletions
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -32,10 +32,9 @@ import { resolveGeminiCredential } from '../../utils/geminiAuth.js'
 import { hydrateGeminiAccessTokenFromSecureStorage } from '../../utils/geminiCredentials.js'
 import { hydrateGithubModelsTokenFromSecureStorage } from '../../utils/githubModelsCredentials.js'
 import {
-  looksLikeLeakedReasoningPrefix,
-  shouldBufferPotentialReasoningPrefix,
-  stripLeakedReasoningPreamble,
-} from './reasoningLeakSanitizer.js'
+  createThinkTagFilter,
+  stripThinkTags,
+} from './thinkTagSanitizer.js'
 import {
  codexStreamToAnthropic,
  collectCodexCompletedResponse,
@@ -718,8 +717,7 @@ async function* openaiStreamToAnthropic(
  let hasEmittedContentStart = false
  let hasEmittedThinkingStart = false
  let hasClosedThinking = false
-  let activeTextBuffer = ''
-  let textBufferMode: 'none' | 'pending' | 'strip' = 'none'
+  const thinkFilter = createThinkTagFilter()
  let lastStopReason: 'tool_use' | 'max_tokens' | 'end_turn' | null = null
  let hasEmittedFinalUsage = false
  let hasProcessedFinishReason = false
@@ -798,14 +796,12 @@ async function* openaiStreamToAnthropic(
  const closeActiveContentBlock = async function* () {
    if (!hasEmittedContentStart) return

-    if (textBufferMode !== 'none') {
-      const sanitized = stripLeakedReasoningPreamble(activeTextBuffer)
-      if (sanitized) {
-        yield {
-          type: 'content_block_delta',
-          index: contentBlockIndex,
-          delta: { type: 'text_delta', text: sanitized },
-        }
+    const tail = thinkFilter.flush()
+    if (tail) {
+      yield {
+        type: 'content_block_delta',
+        index: contentBlockIndex,
+        delta: { type: 'text_delta', text: tail },
      }
    }

@@ -815,8 +811,6 @@ async function* openaiStreamToAnthropic(
    }
    contentBlockIndex++
    hasEmittedContentStart = false
-    activeTextBuffer = ''
-    textBufferMode = 'none'
  }

  try {
@@ -873,7 +867,6 @@ async function* openaiStreamToAnthropic(
            contentBlockIndex++
            hasClosedThinking = true
          }
-          activeTextBuffer += delta.content
          if (!hasEmittedContentStart) {
            yield {
              type: 'content_block_start',
@@ -883,38 +876,13 @@ async function* openaiStreamToAnthropic(
            hasEmittedContentStart = true
          }

-          if (
-            textBufferMode === 'strip' ||
-            looksLikeLeakedReasoningPrefix(activeTextBuffer)
-          ) {
-            textBufferMode = 'strip'
-            continue
-          }
-
-          if (textBufferMode === 'pending') {
-            if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
-              continue
-            }
+          const visible = thinkFilter.feed(delta.content)
+          if (visible) {
            yield {
              type: 'content_block_delta',
              index: contentBlockIndex,
-              delta: {
-                type: 'text_delta',
-                text: activeTextBuffer,
-              },
+              delta: { type: 'text_delta', text: visible },
            }
-            textBufferMode = 'none'
-            continue
-          }
-
-          if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
-            textBufferMode = 'pending'
-            continue
-          }
-          yield {
-            type: 'content_block_delta',
-            index: contentBlockIndex,
-            delta: { type: 'text_delta', text: delta.content },
          }
        }

@@ -1742,7 +1710,7 @@ class OpenAIShimMessages {
    if (typeof rawContent === 'string' && rawContent) {
      content.push({
        type: 'text',
-        text: stripLeakedReasoningPreamble(rawContent),
+        text: stripThinkTags(rawContent),
      })
    } else if (Array.isArray(rawContent) && rawContent.length > 0) {
      const parts: string[] = []
@@ -1760,7 +1728,7 @@ class OpenAIShimMessages {
      if (joined) {
        content.push({
          type: 'text',
-          text: stripLeakedReasoningPreamble(joined),
+          text: stripThinkTags(joined),
        })
      }
    }