fix(openai-shim): preserve final streaming usage chunks
Handle OpenAI-compatible SSE responses that send usage in a trailing empty-choices chunk so token accounting and budget enforcement stay correct.
This commit is contained in:
@@ -267,6 +267,19 @@ function makeMessageId(): string {
|
||||
return `msg_${Math.random().toString(36).slice(2)}${Date.now().toString(36)}`
|
||||
}
|
||||
|
||||
function convertChunkUsage(
|
||||
usage: OpenAIStreamChunk['usage'] | undefined,
|
||||
): Partial<AnthropicUsage> | undefined {
|
||||
if (!usage) return undefined
|
||||
|
||||
return {
|
||||
input_tokens: usage.prompt_tokens ?? 0,
|
||||
output_tokens: usage.completion_tokens ?? 0,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Async generator that transforms an OpenAI SSE stream into
|
||||
* Anthropic-format BetaRawMessageStreamEvent objects.
|
||||
@@ -279,6 +292,8 @@ async function* openaiStreamToAnthropic(
|
||||
let contentBlockIndex = 0
|
||||
const activeToolCalls = new Map<number, { id: string; name: string; index: number }>()
|
||||
let hasEmittedContentStart = false
|
||||
let lastStopReason: 'tool_use' | 'max_tokens' | 'end_turn' | null = null
|
||||
let hasEmittedFinalUsage = false
|
||||
|
||||
// Emit message_start
|
||||
yield {
|
||||
@@ -326,6 +341,8 @@ async function* openaiStreamToAnthropic(
|
||||
continue
|
||||
}
|
||||
|
||||
const chunkUsage = convertChunkUsage(chunk.usage)
|
||||
|
||||
for (const choice of chunk.choices ?? []) {
|
||||
const delta = choice.delta
|
||||
|
||||
@@ -427,16 +444,31 @@ async function* openaiStreamToAnthropic(
|
||||
: choice.finish_reason === 'length'
|
||||
? 'max_tokens'
|
||||
: 'end_turn'
|
||||
lastStopReason = stopReason
|
||||
|
||||
yield {
|
||||
type: 'message_delta',
|
||||
delta: { stop_reason: stopReason, stop_sequence: null },
|
||||
usage: {
|
||||
output_tokens: chunk.usage?.completion_tokens ?? 0,
|
||||
},
|
||||
...(chunkUsage ? { usage: chunkUsage } : {}),
|
||||
}
|
||||
if (chunkUsage) {
|
||||
hasEmittedFinalUsage = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
!hasEmittedFinalUsage &&
|
||||
chunkUsage &&
|
||||
(chunk.choices?.length ?? 0) === 0
|
||||
) {
|
||||
yield {
|
||||
type: 'message_delta',
|
||||
delta: { stop_reason: lastStopReason, stop_sequence: null },
|
||||
usage: chunkUsage,
|
||||
}
|
||||
hasEmittedFinalUsage = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user