Fix GLM-5 and other reasoning models appearing to hang via OpenAI shim (#365)
* Fix GLM-5 and other reasoning models appearing to hang via OpenAI shim
Reasoning models like GLM-5 and DeepSeek stream chain-of-thought in
`reasoning_content` while `content` stays empty (""). The OpenAI shim
only read `delta.content`, so it saw empty strings and never emitted
any Anthropic stream events — causing the UI to appear frozen.
- Add `reasoning_content` to streaming chunk and non-streaming response types
- Emit `reasoning_content` as thinking blocks (thinking_delta) in streaming mode
- Properly transition from thinking to text blocks when content phase begins
- Fall back to `reasoning_content` in non-streaming mode when content is null
Fixes #214
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* Fix non-streaming reasoning_content fallback and add tests
- Use explicit empty-string check instead of || for content fallback
so content: "" doesn't leak reasoning_content as visible text
- Close thinking block before tool call blocks in streaming path
- Add non-streaming and streaming reasoning_content tests
Co-Authored-By: GLM-5.1 <noreply@openclaude.dev>
* Fix flaky Ink reconciler tests caused by react-compiler memoization
Remove hard throw in createTextInstance that crashed when hostContext.isInsideText
was stale due to react-compiler element caching. Add timeout guards to prevent
test hangs when render errors prevent exit() from firing.
Co-Authored-By: Claude GLM-5.1 <noreply@openclaude.dev>
---------
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: GLM-5.1 <noreply@openclaude.dev>
This commit is contained in:
@@ -650,3 +650,237 @@ test('coalesces consecutive assistant messages preserving tool_calls (issue #202
|
||||
expect(assistantMsgs?.length).toBe(1) // two assistant turns merged into one
|
||||
expect(assistantMsgs?.[0]?.tool_calls?.length).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
test('non-streaming: reasoning_content emitted as thinking block, used as text when content is null', async () => {
|
||||
globalThis.fetch = (async (_input, _init) => {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'glm-5',
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: null,
|
||||
reasoning_content: 'Let me think about this step by step.',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 10,
|
||||
completion_tokens: 20,
|
||||
total_tokens: 30,
|
||||
},
|
||||
}),
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
const result = (await client.beta.messages.create({
|
||||
model: 'glm-5',
|
||||
system: 'test system',
|
||||
messages: [{ role: 'user', content: 'hello' }],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
})) as { content: Array<Record<string, unknown>> }
|
||||
|
||||
expect(result.content).toEqual([
|
||||
{ type: 'thinking', thinking: 'Let me think about this step by step.' },
|
||||
{ type: 'text', text: 'Let me think about this step by step.' },
|
||||
])
|
||||
})
|
||||
|
||||
test('non-streaming: empty string content does not fall through to reasoning_content as text', async () => {
|
||||
globalThis.fetch = (async (_input, _init) => {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'glm-5',
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
reasoning_content: 'Chain of thought here.',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 10,
|
||||
completion_tokens: 20,
|
||||
total_tokens: 30,
|
||||
},
|
||||
}),
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
const result = (await client.beta.messages.create({
|
||||
model: 'glm-5',
|
||||
system: 'test system',
|
||||
messages: [{ role: 'user', content: 'hello' }],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
})) as { content: Array<Record<string, unknown>> }
|
||||
|
||||
// reasoning_content should be a thinking block, and also used as text
|
||||
// since content is empty string (treated as absent)
|
||||
expect(result.content).toEqual([
|
||||
{ type: 'thinking', thinking: 'Chain of thought here.' },
|
||||
{ type: 'text', text: 'Chain of thought here.' },
|
||||
])
|
||||
})
|
||||
|
||||
test('non-streaming: real content takes precedence over reasoning_content', async () => {
|
||||
globalThis.fetch = (async (_input, _init) => {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
id: 'chatcmpl-1',
|
||||
model: 'glm-5',
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: 'The answer is 42.',
|
||||
reasoning_content: 'I need to calculate this.',
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 10,
|
||||
completion_tokens: 20,
|
||||
total_tokens: 30,
|
||||
},
|
||||
}),
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
const result = (await client.beta.messages.create({
|
||||
model: 'glm-5',
|
||||
system: 'test system',
|
||||
messages: [{ role: 'user', content: 'hello' }],
|
||||
max_tokens: 64,
|
||||
stream: false,
|
||||
})) as { content: Array<Record<string, unknown>> }
|
||||
|
||||
expect(result.content).toEqual([
|
||||
{ type: 'thinking', thinking: 'I need to calculate this.' },
|
||||
{ type: 'text', text: 'The answer is 42.' },
|
||||
])
|
||||
})
|
||||
|
||||
test('streaming: thinking block closed before tool call', async () => {
|
||||
globalThis.fetch = (async (_input, _init) => {
|
||||
const chunks = makeStreamChunks([
|
||||
{
|
||||
id: 'chatcmpl-1',
|
||||
object: 'chat.completion.chunk',
|
||||
model: 'glm-5',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: { role: 'assistant', reasoning_content: 'Thinking...' },
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'chatcmpl-1',
|
||||
object: 'chat.completion.chunk',
|
||||
model: 'glm-5',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
tool_calls: [
|
||||
{
|
||||
index: 0,
|
||||
id: 'call-1',
|
||||
type: 'function',
|
||||
function: {
|
||||
name: 'Bash',
|
||||
arguments: '{"command":"ls"}',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'chatcmpl-1',
|
||||
object: 'chat.completion.chunk',
|
||||
model: 'glm-5',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {},
|
||||
finish_reason: 'tool_calls',
|
||||
},
|
||||
],
|
||||
},
|
||||
])
|
||||
|
||||
return makeSseResponse(chunks)
|
||||
}) as FetchType
|
||||
|
||||
const client = createOpenAIShimClient({}) as OpenAIShimClient
|
||||
|
||||
const result = await client.beta.messages
|
||||
.create({
|
||||
model: 'glm-5',
|
||||
system: 'test system',
|
||||
messages: [{ role: 'user', content: 'Run ls' }],
|
||||
max_tokens: 64,
|
||||
stream: true,
|
||||
})
|
||||
.withResponse()
|
||||
|
||||
const events: Array<Record<string, unknown>> = []
|
||||
for await (const event of result.data) {
|
||||
events.push(event)
|
||||
}
|
||||
|
||||
const types = events.map(e => e.type)
|
||||
|
||||
// Verify thinking block is started, then closed, then tool call starts
|
||||
const thinkingStartIdx = types.indexOf('content_block_start')
|
||||
const firstStopIdx = types.indexOf('content_block_stop')
|
||||
const toolStartIdx = types.indexOf(
|
||||
'content_block_start',
|
||||
thinkingStartIdx + 1,
|
||||
)
|
||||
|
||||
expect(thinkingStartIdx).toBeGreaterThanOrEqual(0)
|
||||
expect(firstStopIdx).toBeGreaterThan(thinkingStartIdx)
|
||||
expect(toolStartIdx).toBeGreaterThan(firstStopIdx)
|
||||
|
||||
// Verify thinking block start content
|
||||
const thinkingStart = events[thinkingStartIdx] as {
|
||||
content_block?: Record<string, unknown>
|
||||
}
|
||||
expect(thinkingStart?.content_block?.type).toBe('thinking')
|
||||
})
|
||||
|
||||
@@ -476,6 +476,7 @@ interface OpenAIStreamChunk {
|
||||
delta: {
|
||||
role?: string
|
||||
content?: string | null
|
||||
reasoning_content?: string | null
|
||||
tool_calls?: Array<{
|
||||
index: number
|
||||
id?: string
|
||||
@@ -525,6 +526,8 @@ async function* openaiStreamToAnthropic(
|
||||
let contentBlockIndex = 0
|
||||
const activeToolCalls = new Map<number, { id: string; name: string; index: number; jsonBuffer: string }>()
|
||||
let hasEmittedContentStart = false
|
||||
let hasEmittedThinkingStart = false
|
||||
let hasClosedThinking = false
|
||||
let lastStopReason: 'tool_use' | 'max_tokens' | 'end_turn' | null = null
|
||||
let hasEmittedFinalUsage = false
|
||||
let hasProcessedFinishReason = false
|
||||
@@ -581,9 +584,34 @@ async function* openaiStreamToAnthropic(
|
||||
for (const choice of chunk.choices ?? []) {
|
||||
const delta = choice.delta
|
||||
|
||||
// Reasoning models (e.g. GLM-5, DeepSeek) may stream chain-of-thought
|
||||
// in `reasoning_content` before the actual reply appears in `content`.
|
||||
// Emit reasoning as a thinking block and content as a text block.
|
||||
if (delta.reasoning_content != null && delta.reasoning_content !== '') {
|
||||
if (!hasEmittedThinkingStart) {
|
||||
yield {
|
||||
type: 'content_block_start',
|
||||
index: contentBlockIndex,
|
||||
content_block: { type: 'thinking', thinking: '' },
|
||||
}
|
||||
hasEmittedThinkingStart = true
|
||||
}
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: contentBlockIndex,
|
||||
delta: { type: 'thinking_delta', thinking: delta.reasoning_content },
|
||||
}
|
||||
}
|
||||
|
||||
// Text content — use != null to distinguish absent field from empty string,
|
||||
// some providers send "" as first delta to signal streaming start
|
||||
if (delta.content != null) {
|
||||
if (delta.content != null && delta.content !== '') {
|
||||
// Close thinking block if transitioning from reasoning to content
|
||||
if (hasEmittedThinkingStart && !hasClosedThinking) {
|
||||
yield { type: 'content_block_stop', index: contentBlockIndex }
|
||||
contentBlockIndex++
|
||||
hasClosedThinking = true
|
||||
}
|
||||
if (!hasEmittedContentStart) {
|
||||
yield {
|
||||
type: 'content_block_start',
|
||||
@@ -603,7 +631,12 @@ async function* openaiStreamToAnthropic(
|
||||
if (delta.tool_calls) {
|
||||
for (const tc of delta.tool_calls) {
|
||||
if (tc.id && tc.function?.name) {
|
||||
// New tool call starting
|
||||
// New tool call starting — close any open thinking block first
|
||||
if (hasEmittedThinkingStart && !hasClosedThinking) {
|
||||
yield { type: 'content_block_stop', index: contentBlockIndex }
|
||||
contentBlockIndex++
|
||||
hasClosedThinking = true
|
||||
}
|
||||
if (hasEmittedContentStart) {
|
||||
yield {
|
||||
type: 'content_block_stop',
|
||||
@@ -677,6 +710,12 @@ async function* openaiStreamToAnthropic(
|
||||
if (choice.finish_reason && !hasProcessedFinishReason) {
|
||||
hasProcessedFinishReason = true
|
||||
|
||||
// Close any open thinking block that wasn't closed by content transition
|
||||
if (hasEmittedThinkingStart && !hasClosedThinking) {
|
||||
yield { type: 'content_block_stop', index: contentBlockIndex }
|
||||
contentBlockIndex++
|
||||
hasClosedThinking = true
|
||||
}
|
||||
// Close any open content blocks
|
||||
if (hasEmittedContentStart) {
|
||||
yield {
|
||||
@@ -1087,6 +1126,7 @@ class OpenAIShimMessages {
|
||||
| string
|
||||
| null
|
||||
| Array<{ type?: string; text?: string }>
|
||||
reasoning_content?: string | null
|
||||
tool_calls?: Array<{
|
||||
id: string
|
||||
function: { name: string; arguments: string }
|
||||
@@ -1108,7 +1148,17 @@ class OpenAIShimMessages {
|
||||
const choice = data.choices?.[0]
|
||||
const content: Array<Record<string, unknown>> = []
|
||||
|
||||
const rawContent = choice?.message?.content
|
||||
// Some reasoning models (e.g. GLM-5) put their reply in reasoning_content
|
||||
// while content stays null — emit reasoning as a thinking block, then
|
||||
// fall back to it for visible text if content is empty.
|
||||
const reasoningText = choice?.message?.reasoning_content
|
||||
if (typeof reasoningText === 'string' && reasoningText) {
|
||||
content.push({ type: 'thinking', thinking: reasoningText })
|
||||
}
|
||||
const rawContent =
|
||||
choice?.message?.content !== '' && choice?.message?.content != null
|
||||
? choice?.message?.content
|
||||
: choice?.message?.reasoning_content
|
||||
if (typeof rawContent === 'string' && rawContent) {
|
||||
content.push({ type: 'text', text: rawContent })
|
||||
} else if (Array.isArray(rawContent) && rawContent.length > 0) {
|
||||
|
||||
Reference in New Issue
Block a user