From 1e057025d6e28da40c3cadad60e4eea037f139c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ot=C3=A1vio=20Carvalho?= Date: Mon, 6 Apr 2026 16:02:29 +0200 Subject: [PATCH] Fix GLM-5 and other reasoning models appearing to hang via OpenAI shim (#365) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix GLM-5 and other reasoning models appearing to hang via OpenAI shim Reasoning models like GLM-5 and DeepSeek stream chain-of-thought in `reasoning_content` while `content` stays empty (""). The OpenAI shim only read `delta.content`, so it saw empty strings and never emitted any Anthropic stream events — causing the UI to appear frozen. - Add `reasoning_content` to streaming chunk and non-streaming response types - Emit `reasoning_content` as thinking blocks (thinking_delta) in streaming mode - Properly transition from thinking to text blocks when content phase begins - Fall back to `reasoning_content` in non-streaming mode when content is null Fixes #214 Co-Authored-By: Claude Opus 4.6 * Fix non-streaming reasoning_content fallback and add tests - Use explicit empty-string check instead of || for content fallback so content: "" doesn't leak reasoning_content as visible text - Close thinking block before tool call blocks in streaming path - Add non-streaming and streaming reasoning_content tests Co-Authored-By: GLM-5.1 * Fix flaky Ink reconciler tests caused by react-compiler memoization Remove hard throw in createTextInstance that crashed when hostContext.isInsideText was stale due to react-compiler element caching. Add timeout guards to prevent test hangs when render errors prevent exit() from firing. Co-Authored-By: Claude GLM-5.1 --------- Co-authored-by: Claude Opus 4.6 Co-authored-by: GLM-5.1 --- src/commands/provider/provider.test.tsx | 6 +- src/ink/reconciler.ts | 12 +- src/services/api/openaiShim.test.ts | 234 ++++++++++++++++++++++++ src/services/api/openaiShim.ts | 56 +++++- src/utils/staticRender.tsx | 8 +- 5 files changed, 303 insertions(+), 13 deletions(-) diff --git a/src/commands/provider/provider.test.tsx b/src/commands/provider/provider.test.tsx index 7c51d830..7b4fa1be 100644 --- a/src/commands/provider/provider.test.tsx +++ b/src/commands/provider/provider.test.tsx @@ -52,7 +52,11 @@ async function renderFinalFrame(node: React.ReactNode): Promise { patchConsole: false, }) - await instance.waitUntilExit() + // Timeout guard: if render throws before exit effect fires, don't hang + await Promise.race([ + instance.waitUntilExit(), + new Promise(resolve => setTimeout(resolve, 3000)), + ]) return stripAnsi(extractLastFrame(getOutput())) } diff --git a/src/ink/reconciler.ts b/src/ink/reconciler.ts index 53a292fb..b2e846c6 100644 --- a/src/ink/reconciler.ts +++ b/src/ink/reconciler.ts @@ -366,14 +366,12 @@ const reconciler = createReconciler< createTextInstance( text: string, _root: DOMElement, - hostContext: HostContext, + _hostContext: HostContext, ): TextNode { - if (!hostContext.isInsideText) { - throw new Error( - `Text string "${text}" must be rendered inside component`, - ) - } - + // react-compiler memoization can reuse cached elements without + // re-traversing getChildHostContext, so hostContext.isInsideText may be + // stale. Always create the text node — Ink will render it correctly + // regardless of the context tracking state. return createTextNode(text) }, resetTextContent() {}, diff --git a/src/services/api/openaiShim.test.ts b/src/services/api/openaiShim.test.ts index 1bde1380..bcfdef79 100644 --- a/src/services/api/openaiShim.test.ts +++ b/src/services/api/openaiShim.test.ts @@ -650,3 +650,237 @@ test('coalesces consecutive assistant messages preserving tool_calls (issue #202 expect(assistantMsgs?.length).toBe(1) // two assistant turns merged into one expect(assistantMsgs?.[0]?.tool_calls?.length).toBeGreaterThan(0) }) + +test('non-streaming: reasoning_content emitted as thinking block, used as text when content is null', async () => { + globalThis.fetch = (async (_input, _init) => { + return new Response( + JSON.stringify({ + id: 'chatcmpl-1', + model: 'glm-5', + choices: [ + { + message: { + role: 'assistant', + content: null, + reasoning_content: 'Let me think about this step by step.', + }, + finish_reason: 'stop', + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 20, + total_tokens: 30, + }, + }), + { + headers: { + 'Content-Type': 'application/json', + }, + }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + const result = (await client.beta.messages.create({ + model: 'glm-5', + system: 'test system', + messages: [{ role: 'user', content: 'hello' }], + max_tokens: 64, + stream: false, + })) as { content: Array> } + + expect(result.content).toEqual([ + { type: 'thinking', thinking: 'Let me think about this step by step.' }, + { type: 'text', text: 'Let me think about this step by step.' }, + ]) +}) + +test('non-streaming: empty string content does not fall through to reasoning_content as text', async () => { + globalThis.fetch = (async (_input, _init) => { + return new Response( + JSON.stringify({ + id: 'chatcmpl-1', + model: 'glm-5', + choices: [ + { + message: { + role: 'assistant', + content: '', + reasoning_content: 'Chain of thought here.', + }, + finish_reason: 'stop', + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 20, + total_tokens: 30, + }, + }), + { + headers: { + 'Content-Type': 'application/json', + }, + }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + const result = (await client.beta.messages.create({ + model: 'glm-5', + system: 'test system', + messages: [{ role: 'user', content: 'hello' }], + max_tokens: 64, + stream: false, + })) as { content: Array> } + + // reasoning_content should be a thinking block, and also used as text + // since content is empty string (treated as absent) + expect(result.content).toEqual([ + { type: 'thinking', thinking: 'Chain of thought here.' }, + { type: 'text', text: 'Chain of thought here.' }, + ]) +}) + +test('non-streaming: real content takes precedence over reasoning_content', async () => { + globalThis.fetch = (async (_input, _init) => { + return new Response( + JSON.stringify({ + id: 'chatcmpl-1', + model: 'glm-5', + choices: [ + { + message: { + role: 'assistant', + content: 'The answer is 42.', + reasoning_content: 'I need to calculate this.', + }, + finish_reason: 'stop', + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 20, + total_tokens: 30, + }, + }), + { + headers: { + 'Content-Type': 'application/json', + }, + }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + const result = (await client.beta.messages.create({ + model: 'glm-5', + system: 'test system', + messages: [{ role: 'user', content: 'hello' }], + max_tokens: 64, + stream: false, + })) as { content: Array> } + + expect(result.content).toEqual([ + { type: 'thinking', thinking: 'I need to calculate this.' }, + { type: 'text', text: 'The answer is 42.' }, + ]) +}) + +test('streaming: thinking block closed before tool call', async () => { + globalThis.fetch = (async (_input, _init) => { + const chunks = makeStreamChunks([ + { + id: 'chatcmpl-1', + object: 'chat.completion.chunk', + model: 'glm-5', + choices: [ + { + index: 0, + delta: { role: 'assistant', reasoning_content: 'Thinking...' }, + finish_reason: null, + }, + ], + }, + { + id: 'chatcmpl-1', + object: 'chat.completion.chunk', + model: 'glm-5', + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + id: 'call-1', + type: 'function', + function: { + name: 'Bash', + arguments: '{"command":"ls"}', + }, + }, + ], + }, + finish_reason: null, + }, + ], + }, + { + id: 'chatcmpl-1', + object: 'chat.completion.chunk', + model: 'glm-5', + choices: [ + { + index: 0, + delta: {}, + finish_reason: 'tool_calls', + }, + ], + }, + ]) + + return makeSseResponse(chunks) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + const result = await client.beta.messages + .create({ + model: 'glm-5', + system: 'test system', + messages: [{ role: 'user', content: 'Run ls' }], + max_tokens: 64, + stream: true, + }) + .withResponse() + + const events: Array> = [] + for await (const event of result.data) { + events.push(event) + } + + const types = events.map(e => e.type) + + // Verify thinking block is started, then closed, then tool call starts + const thinkingStartIdx = types.indexOf('content_block_start') + const firstStopIdx = types.indexOf('content_block_stop') + const toolStartIdx = types.indexOf( + 'content_block_start', + thinkingStartIdx + 1, + ) + + expect(thinkingStartIdx).toBeGreaterThanOrEqual(0) + expect(firstStopIdx).toBeGreaterThan(thinkingStartIdx) + expect(toolStartIdx).toBeGreaterThan(firstStopIdx) + + // Verify thinking block start content + const thinkingStart = events[thinkingStartIdx] as { + content_block?: Record + } + expect(thinkingStart?.content_block?.type).toBe('thinking') +}) diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index 962b87d8..4450de96 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -476,6 +476,7 @@ interface OpenAIStreamChunk { delta: { role?: string content?: string | null + reasoning_content?: string | null tool_calls?: Array<{ index: number id?: string @@ -525,6 +526,8 @@ async function* openaiStreamToAnthropic( let contentBlockIndex = 0 const activeToolCalls = new Map() let hasEmittedContentStart = false + let hasEmittedThinkingStart = false + let hasClosedThinking = false let lastStopReason: 'tool_use' | 'max_tokens' | 'end_turn' | null = null let hasEmittedFinalUsage = false let hasProcessedFinishReason = false @@ -581,9 +584,34 @@ async function* openaiStreamToAnthropic( for (const choice of chunk.choices ?? []) { const delta = choice.delta + // Reasoning models (e.g. GLM-5, DeepSeek) may stream chain-of-thought + // in `reasoning_content` before the actual reply appears in `content`. + // Emit reasoning as a thinking block and content as a text block. + if (delta.reasoning_content != null && delta.reasoning_content !== '') { + if (!hasEmittedThinkingStart) { + yield { + type: 'content_block_start', + index: contentBlockIndex, + content_block: { type: 'thinking', thinking: '' }, + } + hasEmittedThinkingStart = true + } + yield { + type: 'content_block_delta', + index: contentBlockIndex, + delta: { type: 'thinking_delta', thinking: delta.reasoning_content }, + } + } + // Text content — use != null to distinguish absent field from empty string, // some providers send "" as first delta to signal streaming start - if (delta.content != null) { + if (delta.content != null && delta.content !== '') { + // Close thinking block if transitioning from reasoning to content + if (hasEmittedThinkingStart && !hasClosedThinking) { + yield { type: 'content_block_stop', index: contentBlockIndex } + contentBlockIndex++ + hasClosedThinking = true + } if (!hasEmittedContentStart) { yield { type: 'content_block_start', @@ -603,7 +631,12 @@ async function* openaiStreamToAnthropic( if (delta.tool_calls) { for (const tc of delta.tool_calls) { if (tc.id && tc.function?.name) { - // New tool call starting + // New tool call starting — close any open thinking block first + if (hasEmittedThinkingStart && !hasClosedThinking) { + yield { type: 'content_block_stop', index: contentBlockIndex } + contentBlockIndex++ + hasClosedThinking = true + } if (hasEmittedContentStart) { yield { type: 'content_block_stop', @@ -677,6 +710,12 @@ async function* openaiStreamToAnthropic( if (choice.finish_reason && !hasProcessedFinishReason) { hasProcessedFinishReason = true + // Close any open thinking block that wasn't closed by content transition + if (hasEmittedThinkingStart && !hasClosedThinking) { + yield { type: 'content_block_stop', index: contentBlockIndex } + contentBlockIndex++ + hasClosedThinking = true + } // Close any open content blocks if (hasEmittedContentStart) { yield { @@ -1087,6 +1126,7 @@ class OpenAIShimMessages { | string | null | Array<{ type?: string; text?: string }> + reasoning_content?: string | null tool_calls?: Array<{ id: string function: { name: string; arguments: string } @@ -1108,7 +1148,17 @@ class OpenAIShimMessages { const choice = data.choices?.[0] const content: Array> = [] - const rawContent = choice?.message?.content + // Some reasoning models (e.g. GLM-5) put their reply in reasoning_content + // while content stays null — emit reasoning as a thinking block, then + // fall back to it for visible text if content is empty. + const reasoningText = choice?.message?.reasoning_content + if (typeof reasoningText === 'string' && reasoningText) { + content.push({ type: 'thinking', thinking: reasoningText }) + } + const rawContent = + choice?.message?.content !== '' && choice?.message?.content != null + ? choice?.message?.content + : choice?.message?.reasoning_content if (typeof rawContent === 'string' && rawContent) { content.push({ type: 'text', text: rawContent }) } else if (Array.isArray(rawContent) && rawContent.length > 0) { diff --git a/src/utils/staticRender.tsx b/src/utils/staticRender.tsx index 5f13b39b..9cb2806a 100644 --- a/src/utils/staticRender.tsx +++ b/src/utils/staticRender.tsx @@ -97,8 +97,12 @@ export function renderToAnsiString(node: React.ReactNode, columns?: number): Pro patchConsole: false }); - // Wait for the component to exit naturally - await instance.waitUntilExit(); + // Wait for the component to exit naturally, with a timeout guard so + // tests never hang indefinitely if a render error prevents exit(). + await Promise.race([ + instance.waitUntilExit(), + new Promise(resolve => setTimeout(resolve, 3000)), + ]); // Extract only the first frame's content to avoid duplication // (Ink outputs multiple frames in non-TTY mode)