Fix/openclaude diagnostics settings (#483)

* fix: use openclaude paths in diagnostics and settings * fix: strip leaked reasoning from assistant output * fix: preserve legacy claude config compatibility * fix: tighten path and reasoning compatibility * fix: buffer streamed reasoning leak preambles * test: cover openclaude migration and reasoning fixes * test: isolate execFileNoThrow from cross-file mocks
2026-04-09 20:42:51 +08:00
parent 32fbd0c7b4
commit 42b121bd0d
23 changed files with 934 additions and 101 deletions
--- a/src/services/api/codexShim.test.ts
+++ b/src/services/api/codexShim.test.ts
@@ -465,6 +465,37 @@ describe('Codex request translation', () => {
    ])
  })

+  test('strips leaked reasoning preamble from completed Codex text responses', () => {
+    const message = convertCodexResponseToAnthropicMessage(
+      {
+        id: 'resp_1',
+        model: 'gpt-5.4',
+        output: [
+          {
+            type: 'message',
+            role: 'assistant',
+            content: [
+              {
+                type: 'output_text',
+                text:
+                  'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?',
+              },
+            ],
+          },
+        ],
+        usage: { input_tokens: 12, output_tokens: 4 },
+      },
+      'gpt-5.4',
+    )
+
+    expect(message.content).toEqual([
+      {
+        type: 'text',
+        text: 'Hey! How can I help you today?',
+      },
+    ])
+  })
+
  test('translates Codex SSE text stream into Anthropic events', async () => {
    const responseText = [
      'event: response.output_item.added',
@@ -495,4 +526,44 @@ describe('Codex request translation', () => {
      'message_stop',
    ])
  })
+
+  test('strips leaked reasoning preamble from Codex SSE text stream', async () => {
+    const responseText = [
+      'event: response.output_item.added',
+      'data: {"type":"response.output_item.added","item":{"id":"msg_1","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":0}',
+      '',
+      'event: response.content_part.added',
+      'data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_1","output_index":0,"part":{"type":"output_text","text":""},"sequence_number":1}',
+      '',
+      'event: response.output_text.delta',
+      'data: {"type":"response.output_text.delta","content_index":0,"delta":"The user just said \\"hey\\" - a simple greeting. I should respond briefly and friendly.\\n\\nHey! How can I help you today?","item_id":"msg_1","output_index":0,"sequence_number":2}',
+      '',
+      'event: response.output_item.done',
+      'data: {"type":"response.output_item.done","item":{"id":"msg_1","type":"message","status":"completed","content":[{"type":"output_text","text":"The user just said \\"hey\\" - a simple greeting. I should respond briefly and friendly.\\n\\nHey! How can I help you today?"}],"role":"assistant"},"output_index":0,"sequence_number":3}',
+      '',
+      'event: response.completed',
+      'data: {"type":"response.completed","response":{"id":"resp_1","status":"completed","model":"gpt-5.4","output":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"The user just said \\"hey\\" - a simple greeting. I should respond briefly and friendly.\\n\\nHey! How can I help you today?"}]}],"usage":{"input_tokens":2,"output_tokens":1}},"sequence_number":4}',
+      '',
+    ].join('\n')
+
+    const stream = new ReadableStream({
+      start(controller) {
+        controller.enqueue(new TextEncoder().encode(responseText))
+        controller.close()
+      },
+    })
+
+    const textDeltas: string[] = []
+    for await (const event of codexStreamToAnthropic(
+      new Response(stream),
+      'gpt-5.4',
+    )) {
+      const delta = (event as { delta?: { type?: string; text?: string } }).delta
+      if (delta?.type === 'text_delta' && typeof delta.text === 'string') {
+        textDeltas.push(delta.text)
+      }
+    }
+
+    expect(textDeltas).toEqual(['Hey! How can I help you today?'])
+  })
 })
--- a/src/services/api/codexShim.ts
+++ b/src/services/api/codexShim.ts
@@ -4,6 +4,11 @@ import type {
  ResolvedProviderRequest,
 } from './providerConfig.js'
 import { sanitizeSchemaForOpenAICompat } from './openaiSchemaSanitizer.js'
+import {
+  looksLikeLeakedReasoningPrefix,
+  shouldBufferPotentialReasoningPrefix,
+  stripLeakedReasoningPreamble,
+} from './reasoningLeakSanitizer.js'

 export interface AnthropicUsage {
  input_tokens: number
@@ -678,17 +683,34 @@ export async function* codexStreamToAnthropic(
    { index: number; toolUseId: string }
  >()
  let activeTextBlockIndex: number | null = null
+  let activeTextBuffer = ''
+  let textBufferMode: 'none' | 'pending' | 'strip' = 'none'
  let nextContentBlockIndex = 0
  let sawToolUse = false
  let finalResponse: Record<string, any> | undefined

  const closeActiveTextBlock = async function* () {
    if (activeTextBlockIndex === null) return
+    if (textBufferMode !== 'none') {
+      const sanitized = stripLeakedReasoningPreamble(activeTextBuffer)
+      if (sanitized) {
+        yield {
+          type: 'content_block_delta',
+          index: activeTextBlockIndex,
+          delta: {
+            type: 'text_delta',
+            text: sanitized,
+          },
+        }
+      }
+    }
    yield {
      type: 'content_block_stop',
      index: activeTextBlockIndex,
    }
    activeTextBlockIndex = null
+    activeTextBuffer = ''
+    textBufferMode = 'none'
  }

  const startTextBlockIfNeeded = async function* () {
@@ -764,7 +786,36 @@ export async function* codexStreamToAnthropic(

    if (event.event === 'response.output_text.delta') {
      yield* startTextBlockIfNeeded()
+      activeTextBuffer += payload.delta ?? ''
      if (activeTextBlockIndex !== null) {
+        if (
+          textBufferMode === 'strip' ||
+          looksLikeLeakedReasoningPrefix(activeTextBuffer)
+        ) {
+          textBufferMode = 'strip'
+          continue
+        }
+
+        if (textBufferMode === 'pending') {
+          if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
+            continue
+          }
+          yield {
+            type: 'content_block_delta',
+            index: activeTextBlockIndex,
+            delta: {
+              type: 'text_delta',
+              text: activeTextBuffer,
+            },
+          }
+          textBufferMode = 'none'
+          continue
+        }
+
+        if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
+          textBufferMode = 'pending'
+          continue
+        }
        yield {
          type: 'content_block_delta',
          index: activeTextBlockIndex,
@@ -859,7 +910,7 @@ export function convertCodexResponseToAnthropicMessage(
        if (part?.type === 'output_text') {
          content.push({
            type: 'text',
-            text: part.text ?? '',
+            text: stripLeakedReasoningPreamble(part.text ?? ''),
          })
        }
      }
--- a/src/services/api/openaiShim.test.ts
+++ b/src/services/api/openaiShim.test.ts
@@ -1946,7 +1946,7 @@ test('coalesces consecutive assistant messages preserving tool_calls (issue #202
  expect(assistantMsgs?.[0]?.tool_calls?.length).toBeGreaterThan(0)
 })

-test('non-streaming: reasoning_content emitted as thinking block, used as text when content is null', async () => {
+test('non-streaming: reasoning_content emitted as thinking block only when content is null', async () => {
  globalThis.fetch = (async (_input, _init) => {
    return new Response(
      JSON.stringify({
@@ -1988,7 +1988,6 @@ test('non-streaming: reasoning_content emitted as thinking block, used as text w

  expect(result.content).toEqual([
    { type: 'thinking', thinking: 'Let me think about this step by step.' },
-    { type: 'text', text: 'Let me think about this step by step.' },
  ])
 })

@@ -2034,7 +2033,6 @@ test('non-streaming: empty string content does not fall through to reasoning_con

  expect(result.content).toEqual([
    { type: 'thinking', thinking: 'Chain of thought here.' },
-    { type: 'text', text: 'Chain of thought here.' },
  ])
 })

@@ -2084,6 +2082,46 @@ test('non-streaming: real content takes precedence over reasoning_content', asyn
  ])
 })

+test('non-streaming: strips leaked reasoning preamble from assistant content', async () => {
+  globalThis.fetch = (async () => {
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-1',
+        model: 'gpt-5-mini',
+        choices: [
+          {
+            message: {
+              role: 'assistant',
+              content:
+                'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?',
+            },
+            finish_reason: 'stop',
+          },
+        ],
+        usage: {
+          prompt_tokens: 10,
+          completion_tokens: 20,
+          total_tokens: 30,
+        },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+  const result = (await client.beta.messages.create({
+    model: 'gpt-5-mini',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'hey' }],
+    max_tokens: 64,
+    stream: false,
+  })) as { content: Array<Record<string, unknown>> }
+
+  expect(result.content).toEqual([
+    { type: 'text', text: 'Hey! How can I help you today?' },
+  ])
+})
+
 test('streaming: thinking block closed before tool call', async () => {
  globalThis.fetch = (async (_input, _init) => {
    const chunks = makeStreamChunks([
@@ -2175,3 +2213,134 @@ test('streaming: thinking block closed before tool call', async () => {
  }
  expect(thinkingStart?.content_block?.type).toBe('thinking')
 })
+
+test('streaming: strips leaked reasoning preamble from assistant content deltas', async () => {
+  globalThis.fetch = (async () => {
+    const chunks = makeStreamChunks([
+      {
+        id: 'chatcmpl-1',
+        object: 'chat.completion.chunk',
+        model: 'gpt-5-mini',
+        choices: [
+          {
+            index: 0,
+            delta: {
+              role: 'assistant',
+              content:
+                'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?',
+            },
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: 'chatcmpl-1',
+        object: 'chat.completion.chunk',
+        model: 'gpt-5-mini',
+        choices: [
+          {
+            index: 0,
+            delta: {},
+            finish_reason: 'stop',
+          },
+        ],
+      },
+    ])
+
+    return makeSseResponse(chunks)
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+  const result = await client.beta.messages
+    .create({
+      model: 'gpt-5-mini',
+      system: 'test system',
+      messages: [{ role: 'user', content: 'hey' }],
+      max_tokens: 64,
+      stream: true,
+    })
+    .withResponse()
+
+  const textDeltas: string[] = []
+  for await (const event of result.data) {
+    const delta = (event as { delta?: { type?: string; text?: string } }).delta
+    if (delta?.type === 'text_delta' && typeof delta.text === 'string') {
+      textDeltas.push(delta.text)
+    }
+  }
+
+  expect(textDeltas).toEqual(['Hey! How can I help you today?'])
+})
+
+test('streaming: strips leaked reasoning preamble when split across multiple content chunks', async () => {
+  globalThis.fetch = (async () => {
+    const chunks = makeStreamChunks([
+      {
+        id: 'chatcmpl-1',
+        object: 'chat.completion.chunk',
+        model: 'gpt-5-mini',
+        choices: [
+          {
+            index: 0,
+            delta: {
+              role: 'assistant',
+              content: 'The user said "hey" - this is a simple greeting. ',
+            },
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: 'chatcmpl-1',
+        object: 'chat.completion.chunk',
+        model: 'gpt-5-mini',
+        choices: [
+          {
+            index: 0,
+            delta: {
+              content:
+                'I should respond in a friendly, concise way.\n\nHey! How can I help you today?',
+            },
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: 'chatcmpl-1',
+        object: 'chat.completion.chunk',
+        model: 'gpt-5-mini',
+        choices: [
+          {
+            index: 0,
+            delta: {},
+            finish_reason: 'stop',
+          },
+        ],
+      },
+    ])
+
+    return makeSseResponse(chunks)
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  const result = await client.beta.messages
+    .create({
+      model: 'gpt-5-mini',
+      system: 'test system',
+      messages: [{ role: 'user', content: 'hey' }],
+      max_tokens: 64,
+      stream: true,
+    })
+    .withResponse()
+
+  const textDeltas: string[] = []
+  for await (const event of result.data) {
+    const delta = (event as { delta?: { type?: string; text?: string } }).delta
+    if (delta?.type === 'text_delta' && typeof delta.text === 'string') {
+      textDeltas.push(delta.text)
+    }
+  }
+
+  expect(textDeltas).toEqual(['Hey! How can I help you today?'])
+})
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -26,6 +26,11 @@ import { isEnvTruthy } from '../../utils/envUtils.js'
 import { resolveGeminiCredential } from '../../utils/geminiAuth.js'
 import { hydrateGeminiAccessTokenFromSecureStorage } from '../../utils/geminiCredentials.js'
 import { hydrateGithubModelsTokenFromSecureStorage } from '../../utils/githubModelsCredentials.js'
+import {
+  looksLikeLeakedReasoningPrefix,
+  shouldBufferPotentialReasoningPrefix,
+  stripLeakedReasoningPreamble,
+} from './reasoningLeakSanitizer.js'
 import {
  codexStreamToAnthropic,
  collectCodexCompletedResponse,
@@ -588,6 +593,8 @@ async function* openaiStreamToAnthropic(
  let hasEmittedContentStart = false
  let hasEmittedThinkingStart = false
  let hasClosedThinking = false
+  let activeTextBuffer = ''
+  let textBufferMode: 'none' | 'pending' | 'strip' = 'none'
  let lastStopReason: 'tool_use' | 'max_tokens' | 'end_turn' | null = null
  let hasEmittedFinalUsage = false
  let hasProcessedFinishReason = false
@@ -618,6 +625,30 @@ async function* openaiStreamToAnthropic(
  const decoder = new TextDecoder()
  let buffer = ''

+  const closeActiveContentBlock = async function* () {
+    if (!hasEmittedContentStart) return
+
+    if (textBufferMode !== 'none') {
+      const sanitized = stripLeakedReasoningPreamble(activeTextBuffer)
+      if (sanitized) {
+        yield {
+          type: 'content_block_delta',
+          index: contentBlockIndex,
+          delta: { type: 'text_delta', text: sanitized },
+        }
+      }
+    }
+
+    yield {
+      type: 'content_block_stop',
+      index: contentBlockIndex,
+    }
+    contentBlockIndex++
+    hasEmittedContentStart = false
+    activeTextBuffer = ''
+    textBufferMode = 'none'
+  }
+
  try {
    while (true) {
      const { done, value } = await reader.read()
@@ -672,6 +703,7 @@ async function* openaiStreamToAnthropic(
            contentBlockIndex++
            hasClosedThinking = true
          }
+          activeTextBuffer += delta.content
          if (!hasEmittedContentStart) {
            yield {
              type: 'content_block_start',
@@ -680,6 +712,35 @@ async function* openaiStreamToAnthropic(
            }
            hasEmittedContentStart = true
          }
+
+          if (
+            textBufferMode === 'strip' ||
+            looksLikeLeakedReasoningPrefix(activeTextBuffer)
+          ) {
+            textBufferMode = 'strip'
+            continue
+          }
+
+          if (textBufferMode === 'pending') {
+            if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
+              continue
+            }
+            yield {
+              type: 'content_block_delta',
+              index: contentBlockIndex,
+              delta: {
+                type: 'text_delta',
+                text: activeTextBuffer,
+              },
+            }
+            textBufferMode = 'none'
+            continue
+          }
+
+          if (shouldBufferPotentialReasoningPrefix(activeTextBuffer)) {
+            textBufferMode = 'pending'
+            continue
+          }
          yield {
            type: 'content_block_delta',
            index: contentBlockIndex,
@@ -698,12 +759,7 @@ async function* openaiStreamToAnthropic(
                hasClosedThinking = true
              }
              if (hasEmittedContentStart) {
-                yield {
-                  type: 'content_block_stop',
-                  index: contentBlockIndex,
-                }
-                contentBlockIndex++
-                hasEmittedContentStart = false
+                yield* closeActiveContentBlock()
              }

              const toolBlockIndex = contentBlockIndex
@@ -786,10 +842,7 @@ async function* openaiStreamToAnthropic(
          }
          // Close any open content blocks
          if (hasEmittedContentStart) {
-            yield {
-              type: 'content_block_stop',
-              index: contentBlockIndex,
-            }
+            yield* closeActiveContentBlock()
          }
          // Close active tool calls
          for (const [, tc] of activeToolCalls) {
@@ -1383,9 +1436,9 @@ class OpenAIShimMessages {
    const choice = data.choices?.[0]
    const content: Array<Record<string, unknown>> = []

-    // Some reasoning models (e.g. GLM-5) put their reply in reasoning_content
-    // while content stays null — emit reasoning as a thinking block, then
-    // fall back to it for visible text if content is empty.
+    // Some reasoning models (e.g. GLM-5) put their chain-of-thought in
+    // reasoning_content while content stays null. Preserve it as a thinking
+    // block, but do not surface it as visible assistant text.
    const reasoningText = choice?.message?.reasoning_content
    if (typeof reasoningText === 'string' && reasoningText) {
      content.push({ type: 'thinking', thinking: reasoningText })
@@ -1393,9 +1446,12 @@ class OpenAIShimMessages {
    const rawContent =
      choice?.message?.content !== '' && choice?.message?.content != null
        ? choice?.message?.content
-        : choice?.message?.reasoning_content
+        : null
    if (typeof rawContent === 'string' && rawContent) {
-      content.push({ type: 'text', text: rawContent })
+      content.push({
+        type: 'text',
+        text: stripLeakedReasoningPreamble(rawContent),
+      })
    } else if (Array.isArray(rawContent) && rawContent.length > 0) {
      const parts: string[] = []
      for (const part of rawContent) {
@@ -1410,7 +1466,10 @@ class OpenAIShimMessages {
      }
      const joined = parts.join('\n')
      if (joined) {
-        content.push({ type: 'text', text: joined })
+        content.push({
+          type: 'text',
+          text: stripLeakedReasoningPreamble(joined),
+        })
      }
    }

--- a/src/services/api/reasoningLeakSanitizer.test.ts
+++ b/src/services/api/reasoningLeakSanitizer.test.ts
@@ -0,0 +1,46 @@
+import { describe, expect, test } from 'bun:test'
+
+import {
+  looksLikeLeakedReasoningPrefix,
+  shouldBufferPotentialReasoningPrefix,
+  stripLeakedReasoningPreamble,
+} from './reasoningLeakSanitizer.ts'
+
+describe('reasoning leak sanitizer', () => {
+  test('strips explicit internal reasoning preambles', () => {
+    const text =
+      'The user just said "hey" - a simple greeting. I should respond briefly and friendly.\n\nHey! How can I help you today?'
+
+    expect(looksLikeLeakedReasoningPrefix(text)).toBe(true)
+    expect(stripLeakedReasoningPreamble(text)).toBe(
+      'Hey! How can I help you today?',
+    )
+  })
+
+  test('does not strip normal user-facing advice that mentions "the user should"', () => {
+    const text =
+      'The user should reset their password immediately.\n\nHere are the steps...'
+
+    expect(looksLikeLeakedReasoningPrefix(text)).toBe(false)
+    expect(shouldBufferPotentialReasoningPrefix(text)).toBe(false)
+    expect(stripLeakedReasoningPreamble(text)).toBe(text)
+  })
+
+  test('does not strip legitimate first-person advice about responding to an incident', () => {
+    const text =
+      'I need to respond to this security incident immediately. The system is compromised.\n\nHere are the remediation steps...'
+
+    expect(looksLikeLeakedReasoningPrefix(text)).toBe(false)
+    expect(shouldBufferPotentialReasoningPrefix(text)).toBe(false)
+    expect(stripLeakedReasoningPreamble(text)).toBe(text)
+  })
+
+  test('does not strip legitimate first-person advice about answering a support ticket', () => {
+    const text =
+      'I need to answer the support ticket before end of day. The customer is waiting.\n\nHere is the response I drafted...'
+
+    expect(looksLikeLeakedReasoningPrefix(text)).toBe(false)
+    expect(shouldBufferPotentialReasoningPrefix(text)).toBe(false)
+    expect(stripLeakedReasoningPreamble(text)).toBe(text)
+  })
+})
--- a/src/services/api/reasoningLeakSanitizer.ts
+++ b/src/services/api/reasoningLeakSanitizer.ts
@@ -0,0 +1,54 @@
+const EXPLICIT_REASONING_START_RE =
+  /^\s*(i should\b|i need to\b|let me think\b|the task\b|the request\b)/i
+
+const EXPLICIT_REASONING_META_RE =
+  /\b(user|request|question|prompt|message|task|greeting|small talk|briefly|friendly|concise)\b/i
+
+const USER_META_START_RE =
+  /^\s*the user\s+(just\s+)?(said|asked|is asking|wants|wanted|mentioned|seems|appears)\b/i
+
+const USER_REASONING_RE =
+  /^\s*the user\s+(just\s+)?(said|asked|is asking|wants|wanted|mentioned|seems|appears)\b[\s\S]*\b(i should|i need to|let me think|respond|reply|answer|greeting|small talk|briefly|friendly|concise)\b/i
+
+export function shouldBufferPotentialReasoningPrefix(text: string): boolean {
+  const normalized = text.trim()
+  if (!normalized) return false
+
+  if (looksLikeLeakedReasoningPrefix(normalized)) {
+    return true
+  }
+
+  const hasParagraphBoundary = /\n\s*\n/.test(normalized)
+  if (hasParagraphBoundary) {
+    return false
+  }
+
+  return (
+    EXPLICIT_REASONING_START_RE.test(normalized) ||
+    USER_META_START_RE.test(normalized)
+  )
+}
+
+export function looksLikeLeakedReasoningPrefix(text: string): boolean {
+  const normalized = text.trim()
+  if (!normalized) return false
+  return (
+    (EXPLICIT_REASONING_START_RE.test(normalized) &&
+      EXPLICIT_REASONING_META_RE.test(normalized)) ||
+    USER_REASONING_RE.test(normalized)
+  )
+}
+
+export function stripLeakedReasoningPreamble(text: string): string {
+  const normalized = text.replace(/\r\n/g, '\n')
+  const parts = normalized.split(/\n\s*\n/)
+  if (parts.length < 2) return text
+
+  const first = parts[0]?.trim() ?? ''
+  if (!looksLikeLeakedReasoningPrefix(first)) {
+    return text
+  }
+
+  const remainder = parts.slice(1).join('\n\n').trim()
+  return remainder || text
+}