feat: add streaming optimizer and structured request logging (#703)

* Integrate request logging and streaming optimizer - Add logApiCallStart/End for API request tracking with correlation IDs - Add streaming state tracking with processStreamChunk - Flush buffer and log stream stats at stream end - Resolve merge conflict with main branch * feat: add streaming optimizer and structured request logging * fix: address PR review feedback - Remove buffering from streamingOptimizer - now purely observational - Use logForDebugging instead of console.log for structured logging - Remove dead code (streamResponse, bufferedStreamResponse, etc.) - Use existing logging infrastructure instead of raw console.log - Keep only used functions: createStreamState, processStreamChunk, getStreamStats * test: add unit tests for requestLogging and streamingOptimizer - streamingOptimizer.test.ts: 6 tests for createStreamState, processStreamChunk, getStreamStats - requestLogging.test.ts: 6 tests for createCorrelationId, logApiCallStart, logApiCallEnd * fix: correct durationMs test to be >= 0 instead of exactly 0 * fix: address PR #703 blockers and non-blockers 1. BLOCKER FIX: Skip clone() for streaming responses - Only call response.clone() + .json() for non-streaming requests - For streaming, usage comes via stream chunks anyway 2. NON-BLOCKER: Document dead code in flushStreamBuffer - Added comment explaining it's a no-op kept for API compat 3. NON-BLOCKER: vi.mock in tests - left as-is (test framework issue) * fix: address all remaining non-blockers for PR #703 1. Remove dead code: flushStreamBuffer call and unused import 2. Fix test for Bun: remove vi.mock, use simple no-throw tests
2026-04-22 08:36:07 +01:00
parent e92e5274b2
commit 5b9cd21e37
5 changed files with 326 additions and 0 deletions
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -67,6 +67,8 @@ import {
  normalizeToolArguments,
  hasToolFieldMapping,
 } from './toolArgumentNormalization.js'
+import { logApiCallStart, logApiCallEnd } from '../../utils/requestLogging.js'
+import { createStreamState, processStreamChunk, getStreamStats } from '../../utils/streamingOptimizer.js'

 type SecretValueSource = Partial<{
  OPENAI_API_KEY: string
@@ -857,6 +859,7 @@ async function* openaiStreamToAnthropic(
  let lastStopReason: 'tool_use' | 'max_tokens' | 'end_turn' | null = null
  let hasEmittedFinalUsage = false
  let hasProcessedFinishReason = false
+  const streamState = createStreamState()

  // Emit message_start
  yield {
@@ -1020,6 +1023,7 @@ async function* openaiStreamToAnthropic(
              delta: { type: 'text_delta', text: visible },
            }
          }
+          processStreamChunk(streamState, delta.content)
        }

        // Tool calls
@@ -1039,6 +1043,7 @@ async function* openaiStreamToAnthropic(
              const toolBlockIndex = contentBlockIndex
              const initialArguments = tc.function.arguments ?? ''
              const normalizeAtStop = hasToolFieldMapping(tc.function.name)
+              processStreamChunk(streamState, tc.function.arguments ?? '')
              activeToolCalls.set(tc.index, {
                id: tc.id,
                name: tc.function.name,
@@ -1236,6 +1241,20 @@ async function* openaiStreamToAnthropic(
    reader.releaseLock()
  }

+  const stats = getStreamStats(streamState)
+  if (stats.totalChunks > 0) {
+    logForDebugging(
+      JSON.stringify({
+        type: 'stream_stats',
+        model,
+        total_chunks: stats.totalChunks,
+        first_token_ms: stats.firstTokenMs,
+        duration_ms: stats.durationMs,
+      }),
+      { level: 'debug' },
+    )
+  }
+
  yield { type: 'message_stop' }
 }

@@ -1715,6 +1734,12 @@ class OpenAIShimMessages {
    }

    let response: Response | undefined
+    const provider = request.baseUrl.includes('nvidia') ? 'nvidia-nim'
+      : request.baseUrl.includes('minimax') ? 'minimax'
+      : request.baseUrl.includes('localhost:11434') || request.baseUrl.includes('localhost:11435') ? 'ollama'
+      : request.baseUrl.includes('anthropic') ? 'anthropic'
+      : 'openai'
+    const { correlationId, startTime } = logApiCallStart(provider, request.resolvedModel)
    for (let attempt = 0; attempt < maxAttempts; attempt++) {
      try {
        response = await fetchWithProxyRetry(
@@ -1752,6 +1777,20 @@ class OpenAIShimMessages {
      }

      if (response.ok) {
+        let tokensIn = 0
+        let tokensOut = 0
+        // Skip clone() for streaming responses - it blocks until full body is received,
+        // defeating the purpose of streaming. Usage data is already sent via
+        // stream_options: { include_usage: true } and can be extracted from the stream.
+        if (!params.stream) {
+          try {
+            const clone = response.clone()
+            const data = await clone.json()
+            tokensIn = data.usage?.prompt_tokens ?? 0
+            tokensOut = data.usage?.completion_tokens ?? 0
+          } catch { /* ignore */ }
+        }
+        logApiCallEnd(correlationId, startTime, request.resolvedModel, 'success', tokensIn, tokensOut, false)
        return response
      }