fix(api): replace phrase-based reasoning sanitizer with tag-based filter (#779)

Reasoning models (MiniMax M2.7, GLM-4.5/5, DeepSeek, Kimi K2) inline chain-of-thought inside <think>...</think> tags in the content field rather than using the reasoning_content channel. The prior phrase-matching sanitizer (looksLikeLeakedReasoningPrefix) only caught English-prose preambles like "I should"/"the user asked", missed tag-based leaks entirely, and risked false-stripping legitimate assistant output. Replace with a structural tag-based approach (same pattern as hermes-agent): - createThinkTagFilter() — streaming state machine that buffers partial tags across SSE delta boundaries (<th| + |ink>), so tags split mid-chunk still parse correctly. - stripThinkTags() — whole-text cleanup for non-streaming responses and as a safety net. Handles closed pairs, unterminated opens at block boundaries, and orphan tags. - Recognizes think, thinking, reasoning, thought, REASONING_SCRATCHPAD case-insensitively, including tags with attributes. - False-negative bias: flush() discards buffered partial tags at stream end rather than leaking them. Existing phrase-based shim tests updated to exercise the actual <think> tag leak. Added regression tests confirming legitimate prose starting with "I should..." is preserved (the old sanitizer's main false-positive). Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 15:18:58 +08:00
parent c0b8a59a23
commit 336ddcc50d
8 changed files with 544 additions and 213 deletions
--- a/src/services/api/codexShim.ts
+++ b/src/services/api/codexShim.ts
@@ -6,10 +6,9 @@ import type {
 } from './providerConfig.js'
 import { sanitizeSchemaForOpenAICompat } from './openaiSchemaSanitizer.js'
 import {
-  looksLikeLeakedReasoningPrefix,
-  shouldBufferPotentialReasoningPrefix,
-  stripLeakedReasoningPreamble,
-} from './reasoningLeakSanitizer.js'
+  createThinkTagFilter,
+  stripThinkTags,
+} from './thinkTagSanitizer.js'

 export interface AnthropicUsage {
  input_tokens: number
@@ -734,25 +733,22 @@ export async function* codexStreamToAnthropic(
    { index: number; toolUseId: string }
  >()
  let activeTextBlockIndex: number | null = null
-  let activeTextBuffer = ''
-  let textBufferMode: 'none' | 'pending' | 'strip' = 'none'
+  const thinkFilter = createThinkTagFilter()
  let nextContentBlockIndex = 0
  let sawToolUse = false
  let finalResponse: Record<string, any> | undefined

  const closeActiveTextBlock = async function* () {
    if (activeTextBlockIndex === null) return
-    if (textBufferMode !== 'none') {
-      const sanitized = stripLeakedReasoningPreamble(activeTextBuffer)
-      if (sanitized) {
-        yield {
-          type: 'content_block_delta',
-          index: activeTextBlockIndex,
-          delta: {
-            type: 'text_delta',
-            text: sanitized,
-          },
-        }
+    const tail = thinkFilter.flush()
+    if (tail) {
+      yield {
+        type: 'content_block_delta',
+        index: activeTextBlockIndex,
+        delta: {
+          type: 'text_delta',
+          text: tail,
+        },
      }
    }
    yield {
@@ -760,8 +756,6 @@ export async function* codexStreamToAnthropic(
      index: activeTextBlockIndex,
    }
    activeTextBlockIndex = null
-    activeTextBuffer = ''
-    textBufferMode = 'none'
  }

  const startTextBlockIfNeeded = async function* () {
@@ -837,43 +831,17 @@ export async function* codexStreamToAnthropic(

    if (event.event === 'response.output_text.delta') {
      yield* startTextBlockIfNeeded()
-      activeTextBuffer += payload.delta ?? ''
      if (activeTextBlockIndex !== null) {
-        if (
-          textBufferMode === 'strip' ||
-          looksLikeLeakedReasoningPrefix(activeTextBuffer)
-        ) {
-          textBufferMode = 'strip'
-          continue
-        }
-
-        if (textBufferMode === 'pending') {
-          if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
-            continue
-          }
+        const visible = thinkFilter.feed(payload.delta ?? '')
+        if (visible) {
          yield {
            type: 'content_block_delta',
            index: activeTextBlockIndex,
            delta: {
              type: 'text_delta',
-              text: activeTextBuffer,
+              text: visible,
            },
          }
-          textBufferMode = 'none'
-          continue
-        }
-
-        if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
-          textBufferMode = 'pending'
-          continue
-        }
-        yield {
-          type: 'content_block_delta',
-          index: activeTextBlockIndex,
-          delta: {
-            type: 'text_delta',
-            text: payload.delta ?? '',
-          },
        }
      }
      continue
@@ -969,7 +937,7 @@ export function convertCodexResponseToAnthropicMessage(
        if (part?.type === 'output_text') {
          content.push({
            type: 'text',
-            text: stripLeakedReasoningPreamble(part.text ?? ''),
+            text: stripThinkTags(part.text ?? ''),
          })
        }
      }