Fix GLM-5 and other reasoning models appearing to hang via OpenAI shim (#365)

* Fix GLM-5 and other reasoning models appearing to hang via OpenAI shim

Reasoning models like GLM-5 and DeepSeek stream chain-of-thought in
`reasoning_content` while `content` stays empty (""). The OpenAI shim
only read `delta.content`, so it saw empty strings and never emitted
any Anthropic stream events — causing the UI to appear frozen.

- Add `reasoning_content` to streaming chunk and non-streaming response types
- Emit `reasoning_content` as thinking blocks (thinking_delta) in streaming mode
- Properly transition from thinking to text blocks when content phase begins
- Fall back to `reasoning_content` in non-streaming mode when content is null

Fixes #214

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Fix non-streaming reasoning_content fallback and add tests

- Use explicit empty-string check instead of || for content fallback
  so content: "" doesn't leak reasoning_content as visible text
- Close thinking block before tool call blocks in streaming path
- Add non-streaming and streaming reasoning_content tests

Co-Authored-By: GLM-5.1 <noreply@openclaude.dev>

* Fix flaky Ink reconciler tests caused by react-compiler memoization

Remove hard throw in createTextInstance that crashed when hostContext.isInsideText
was stale due to react-compiler element caching. Add timeout guards to prevent
test hangs when render errors prevent exit() from firing.

Co-Authored-By: Claude GLM-5.1 <noreply@openclaude.dev>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: GLM-5.1 <noreply@openclaude.dev>
This commit is contained in:
Otávio Carvalho
2026-04-06 16:02:29 +02:00
committed by GitHub
parent aff2bd87e4
commit 1e057025d6
5 changed files with 303 additions and 13 deletions

View File

@@ -476,6 +476,7 @@ interface OpenAIStreamChunk {
delta: {
role?: string
content?: string | null
reasoning_content?: string | null
tool_calls?: Array<{
index: number
id?: string
@@ -525,6 +526,8 @@ async function* openaiStreamToAnthropic(
let contentBlockIndex = 0
const activeToolCalls = new Map<number, { id: string; name: string; index: number; jsonBuffer: string }>()
let hasEmittedContentStart = false
let hasEmittedThinkingStart = false
let hasClosedThinking = false
let lastStopReason: 'tool_use' | 'max_tokens' | 'end_turn' | null = null
let hasEmittedFinalUsage = false
let hasProcessedFinishReason = false
@@ -581,9 +584,34 @@ async function* openaiStreamToAnthropic(
for (const choice of chunk.choices ?? []) {
const delta = choice.delta
// Reasoning models (e.g. GLM-5, DeepSeek) may stream chain-of-thought
// in `reasoning_content` before the actual reply appears in `content`.
// Emit reasoning as a thinking block and content as a text block.
if (delta.reasoning_content != null && delta.reasoning_content !== '') {
if (!hasEmittedThinkingStart) {
yield {
type: 'content_block_start',
index: contentBlockIndex,
content_block: { type: 'thinking', thinking: '' },
}
hasEmittedThinkingStart = true
}
yield {
type: 'content_block_delta',
index: contentBlockIndex,
delta: { type: 'thinking_delta', thinking: delta.reasoning_content },
}
}
// Text content — use != null to distinguish absent field from empty string,
// some providers send "" as first delta to signal streaming start
if (delta.content != null) {
if (delta.content != null && delta.content !== '') {
// Close thinking block if transitioning from reasoning to content
if (hasEmittedThinkingStart && !hasClosedThinking) {
yield { type: 'content_block_stop', index: contentBlockIndex }
contentBlockIndex++
hasClosedThinking = true
}
if (!hasEmittedContentStart) {
yield {
type: 'content_block_start',
@@ -603,7 +631,12 @@ async function* openaiStreamToAnthropic(
if (delta.tool_calls) {
for (const tc of delta.tool_calls) {
if (tc.id && tc.function?.name) {
// New tool call starting
// New tool call starting — close any open thinking block first
if (hasEmittedThinkingStart && !hasClosedThinking) {
yield { type: 'content_block_stop', index: contentBlockIndex }
contentBlockIndex++
hasClosedThinking = true
}
if (hasEmittedContentStart) {
yield {
type: 'content_block_stop',
@@ -677,6 +710,12 @@ async function* openaiStreamToAnthropic(
if (choice.finish_reason && !hasProcessedFinishReason) {
hasProcessedFinishReason = true
// Close any open thinking block that wasn't closed by content transition
if (hasEmittedThinkingStart && !hasClosedThinking) {
yield { type: 'content_block_stop', index: contentBlockIndex }
contentBlockIndex++
hasClosedThinking = true
}
// Close any open content blocks
if (hasEmittedContentStart) {
yield {
@@ -1087,6 +1126,7 @@ class OpenAIShimMessages {
| string
| null
| Array<{ type?: string; text?: string }>
reasoning_content?: string | null
tool_calls?: Array<{
id: string
function: { name: string; arguments: string }
@@ -1108,7 +1148,17 @@ class OpenAIShimMessages {
const choice = data.choices?.[0]
const content: Array<Record<string, unknown>> = []
const rawContent = choice?.message?.content
// Some reasoning models (e.g. GLM-5) put their reply in reasoning_content
// while content stays null — emit reasoning as a thinking block, then
// fall back to it for visible text if content is empty.
const reasoningText = choice?.message?.reasoning_content
if (typeof reasoningText === 'string' && reasoningText) {
content.push({ type: 'thinking', thinking: reasoningText })
}
const rawContent =
choice?.message?.content !== '' && choice?.message?.content != null
? choice?.message?.content
: choice?.message?.reasoning_content
if (typeof rawContent === 'string' && rawContent) {
content.push({ type: 'text', text: rawContent })
} else if (Array.isArray(rawContent) && rawContent.length > 0) {