import { getSessionId } from '../../bootstrap/state.js'
import { resolveProviderRequest } from '../../services/api/providerConfig.js'
import type { LocalCommandCall } from '../../types/command.js'
import { logForDebugging } from '../../utils/debug.js'
import { isEnvTruthy } from '../../utils/envUtils.js'
import { hydrateGithubModelsTokenFromSecureStorage } from '../../utils/githubModelsCredentials.js'
import { getMainLoopModel } from '../../utils/model/model.js'

const COPILOT_HEADERS: Record<string, string> = {
  'User-Agent': 'GitHubCopilotChat/0.26.7',
  'Editor-Version': 'vscode/1.99.3',
  'Editor-Plugin-Version': 'copilot-chat/0.26.7',
  'Copilot-Integration-Id': 'vscode-chat',
}

// Large system prompt (~6000 chars, ~1500 tokens) to cross the 1024-token cache threshold
const SYSTEM_PROMPT = [
  'You are a coding assistant. Answer concisely.',
  'CONTEXT: User is working on a TypeScript project with Bun runtime.',
  ...Array.from(
    { length: 80 },
    (_, i) =>
      `Rule ${i + 1}: Follow best practices for TypeScript including strict typing, error handling, testing, and clean code. Prefer explicit types over any. Use const assertions. Await all async operations.`,
  ),
].join('\n\n')

const USER_MESSAGE = 'Say "hello" and nothing else.'
const DELAY_MS = 3000

/**
 * Extract model family from a versioned model string.
 * e.g. "gpt-5.4-0626" → "gpt-5.4", "codex-mini-latest" → "codex-mini"
 */
function getModelFamily(model: string | undefined): string {
  if (!model) return 'unknown'
  return model
    .replace(/-\d{4,}$/, '')
    .replace(/-latest$/, '')
    .replace(/-preview$/, '')
}

function getField(obj: unknown, path: string): unknown {
  return path
    .split('.')
    .reduce((o: any, k: string) => (o != null ? o[k] : undefined), obj)
}

interface ProbeResult {
  label: string
  status: number
  elapsed: number
  headers: Record<string, string>
  usage: Record<string, unknown> | null
  responseId: string | null
  error: string | null
}

async function sendProbe(
  url: string,
  headers: Record<string, string>,
  body: Record<string, unknown>,
  label: string,
): Promise<ProbeResult> {
  const start = Date.now()
  let response: Response
  try {
    response = await fetch(url, {
      method: 'POST',
      headers,
      body: JSON.stringify(body),
    })
  } catch (err: any) {
    return {
      label,
      status: 0,
      elapsed: Date.now() - start,
      headers: {},
      usage: null,
      responseId: null,
      error: err.message,
    }
  }
  const elapsed = Date.now() - start

  const respHeaders: Record<string, string> = {}
  response.headers.forEach((value, key) => {
    respHeaders[key] = value
  })

  if (!response.ok) {
    const errorBody = await response.text().catch(() => '')
    return {
      label,
      status: response.status,
      elapsed,
      headers: respHeaders,
      usage: null,
      responseId: null,
      error: errorBody,
    }
  }

  // Parse SSE stream for usage data
  const text = await response.text()
  let usage: Record<string, unknown> | null = null
  let responseId: string | null = null

  const isResponses = url.endsWith('/responses')
  for (const chunk of text.split('\n\n')) {
    const lines = chunk
      .split('\n')
      .map((l) => l.trim())
      .filter(Boolean)

    if (isResponses) {
      const eventLine = lines.find((l) => l.startsWith('event: '))
      const dataLines = lines.filter((l) => l.startsWith('data: '))
      if (!eventLine || !dataLines.length) continue
      const event = eventLine.slice(7).trim()
      if (
        event === 'response.completed' ||
        event === 'response.incomplete'
      ) {
        try {
          const data = JSON.parse(
            dataLines.map((l) => l.slice(6)).join('\n'),
          )
          usage = (data?.response?.usage as Record<string, unknown>) ?? null
          responseId = (data?.response?.id as string) ?? null
        } catch {}
      }
    } else {
      for (const line of lines) {
        if (!line.startsWith('data: ')) continue
        const raw = line.slice(6).trim()
        if (raw === '[DONE]') continue
        try {
          const data = JSON.parse(raw) as Record<string, unknown>
          if (data.usage) {
            usage = data.usage as Record<string, unknown>
            responseId = (data.id as string) ?? null
          }
        } catch {}
      }
    }
  }

  return { label, status: response.status, elapsed, headers: respHeaders, usage, responseId, error: null }
}

function formatResult(r: ProbeResult): string {
  const lines: string[] = [`--- ${r.label} ---`]
  if (r.error) {
    lines.push(`  ERROR (HTTP ${r.status}): ${r.error.slice(0, 200)}`)
    return lines.join('\n')
  }
  lines.push(`  HTTP ${r.status} — ${r.elapsed}ms`)
  if (r.responseId) lines.push(`  response.id: ${r.responseId}`)

  if (r.usage) {
    lines.push('  Usage:')
    lines.push(`    ${JSON.stringify(r.usage, null, 2).replace(/\n/g, '\n    ')}`)
  } else {
    lines.push('  Usage: null')
  }

  // Interesting headers
  for (const h of [
    'openai-processing-ms',
    'x-ratelimit-remaining',
    'x-ratelimit-limit',
    'x-ms-region',
    'x-github-request-id',
    'x-request-id',
  ]) {
    if (r.headers[h]) lines.push(`  ${h}: ${r.headers[h]}`)
  }
  return lines.join('\n')
}

export const call: LocalCommandCall = async (args) => {
  const parts = (args ?? '').trim().split(/\s+/).filter(Boolean)
  const noKey = parts.includes('--no-key')
  const modelOverride = parts.find((p) => !p.startsWith('--')) || undefined
  const modelStr = modelOverride ?? getMainLoopModel()
  const request = resolveProviderRequest({ model: modelStr })
  const isGithub = isEnvTruthy(process.env.CLAUDE_CODE_USE_GITHUB)

  // Resolve API key the same way the OpenAI shim does
  let apiKey = process.env.OPENAI_API_KEY ?? ''
  if (!apiKey && isGithub) {
    hydrateGithubModelsTokenFromSecureStorage()
    apiKey =
      process.env.OPENAI_API_KEY ??
      process.env.GITHUB_TOKEN ??
      process.env.GH_TOKEN ??
      ''
  }

  if (!apiKey) {
    return {
      type: 'text',
      value:
        'No API key found. Make sure you are in an active OpenAI-compatible or GitHub Copilot session.\n' +
        'For GitHub Copilot: run /onboard-github first.\n' +
        'For OpenAI-compatible: set OPENAI_API_KEY.',
    }
  }

  const useResponses = request.transport === 'codex_responses'
  const endpoint = useResponses ? '/responses' : '/chat/completions'
  const url = `${request.baseUrl}${endpoint}`
  const family = getModelFamily(request.resolvedModel)
  const cacheKey = `${getSessionId()}:${family}`

  const headers: Record<string, string> = {
    'Content-Type': 'application/json',
    Authorization: `Bearer ${apiKey}`,
    originator: 'openclaude',
  }
  if (isGithub) {
    Object.assign(headers, COPILOT_HEADERS)
  }

  let body: Record<string, unknown>
  if (useResponses) {
    body = {
      model: request.resolvedModel,
      instructions: SYSTEM_PROMPT,
      input: [
        {
          type: 'message',
          role: 'user',
          content: [{ type: 'input_text', text: USER_MESSAGE }],
        },
      ],
      stream: true,
      ...(noKey ? {} : {
        store: false,
        prompt_cache_key: cacheKey,
        prompt_cache_retention: '24h',
      }),
    }
  } else {
    body = {
      model: request.resolvedModel,
      messages: [
        { role: 'system', content: SYSTEM_PROMPT },
        { role: 'user', content: USER_MESSAGE },
      ],
      stream: true,
      stream_options: { include_usage: true },
      max_tokens: 20,
      ...(noKey ? {} : {
        store: false,
        prompt_cache_key: cacheKey,
      }),
    }
  }

  // Log configuration
  const config = [
    `[cache-probe] Starting cache probe${noKey ? ' (--no-key: cache params OMITTED)' : ''}`,
    `  model: ${request.resolvedModel} (family: ${family})`,
    `  transport: ${request.transport}`,
    `  endpoint: ${url}`,
    `  prompt_cache_key: ${noKey ? 'NOT SENT' : cacheKey}`,
    `  store: ${noKey ? 'NOT SENT' : 'false'}`,
    `  system prompt: ~${Math.round(SYSTEM_PROMPT.length / 4)} tokens`,
    `  delay between calls: ${DELAY_MS}ms`,
  ].join('\n')
  logForDebugging(config)

  // Call 1 — Cold
  const r1 = await sendProbe(url, headers, body, 'CALL 1 — Cold (no cache)')
  logForDebugging(`[cache-probe]\n${formatResult(r1)}`)

  if (r1.error) {
    return {
      type: 'text',
      value: `Cache probe failed on first call: HTTP ${r1.status}\n${r1.error.slice(0, 300)}\n\nFull details in debug log.`,
    }
  }

  // Wait
  await new Promise((r) => setTimeout(r, DELAY_MS))

  // Call 2 — Warm
  const r2 = await sendProbe(url, headers, body, 'CALL 2 — Warm (cache expected)')
  logForDebugging(`[cache-probe]\n${formatResult(r2)}`)

  // --- Comparison ---
  const fields = [
    'input_tokens',
    'output_tokens',
    'total_tokens',
    'prompt_tokens',
    'completion_tokens',
    'input_tokens_details.cached_tokens',
    'prompt_tokens_details.cached_tokens',
    'output_tokens_details.reasoning_tokens',
  ]

  const comparison: string[] = ['[cache-probe] COMPARISON']
  comparison.push(
    `  ${'Field'.padEnd(42)} ${'Call 1'.padStart(8)}  ${'Call 2'.padStart(8)}  ${'Delta'.padStart(8)}`,
  )
  comparison.push(`  ${'-'.repeat(72)}`)

  for (const f of fields) {
    const v1 = getField(r1.usage, f)
    const v2 = getField(r2.usage, f)
    if (v1 === undefined && v2 === undefined) continue
    const d =
      typeof v1 === 'number' && typeof v2 === 'number' ? v2 - v1 : ''
    comparison.push(
      `  ${f.padEnd(42)} ${String(v1 ?? '-').padStart(8)}  ${String(v2 ?? '-').padStart(8)}  ${String(d).padStart(8)}`,
    )
  }

  comparison.push('')
  comparison.push(
    `  Latency: ${r1.elapsed}ms → ${r2.elapsed}ms (${r2.elapsed - r1.elapsed > 0 ? '+' : ''}${r2.elapsed - r1.elapsed}ms)`,
  )

  // Header comparison
  for (const h of ['openai-processing-ms', 'x-ms-region', 'x-ratelimit-remaining']) {
    const v1 = r1.headers[h]
    const v2 = r2.headers[h]
    if (v1 || v2) {
      comparison.push(`  ${h}: ${v1 ?? '-'} → ${v2 ?? '-'}`)
    }
  }

  // Verdict
  const cached2 =
    (getField(r2.usage, 'input_tokens_details.cached_tokens') as number) ??
    (getField(r2.usage, 'prompt_tokens_details.cached_tokens') as number) ??
    0
  const input1 =
    ((r1.usage?.input_tokens ?? r1.usage?.prompt_tokens) as number) ?? 0
  const input2 =
    ((r2.usage?.input_tokens ?? r2.usage?.prompt_tokens) as number) ?? 0

  let verdict: string
  if (cached2 > 0) {
    const rate = input2 > 0 ? Math.round((cached2 / input2) * 100) : '?'
    verdict = `CACHE HIT: ${cached2} cached tokens (${rate}% of input)`
  } else if (input1 === 0 && input2 === 0) {
    verdict = 'INCONCLUSIVE: Server returns 0 input_tokens — cannot measure'
  } else if (r2.elapsed < r1.elapsed * 0.6 && input1 > 100) {
    verdict = `POSSIBLE SILENT CACHING: Call 2 was ${Math.round((1 - r2.elapsed / r1.elapsed) * 100)}% faster but no cached_tokens reported`
  } else {
    verdict = 'NO CACHE DETECTED'
  }

  comparison.push(`\n  Verdict: ${verdict}`)

  // --- Simulate what main's shim code does with this usage ---
  // codexShim.ts makeUsage() — used for Responses API (GPT-5+/Codex)
  function mainMakeUsage(u: any) {
    return {
      input_tokens: u?.input_tokens ?? 0,
      output_tokens: u?.output_tokens ?? 0,
      cache_creation_input_tokens: 0,
      cache_read_input_tokens: 0,  // ← main hardcodes this to 0
    }
  }
  // openaiShim.ts convertChunkUsage() — used for Chat Completions
  function mainConvertChunkUsage(u: any) {
    return {
      input_tokens: u?.prompt_tokens ?? 0,
      output_tokens: u?.completion_tokens ?? 0,
      cache_creation_input_tokens: 0,
      cache_read_input_tokens: u?.prompt_tokens_details?.cached_tokens ?? 0,
    }
  }

  const shimFn = useResponses ? mainMakeUsage : mainConvertChunkUsage
  const shim1 = shimFn(r1.usage)
  const shim2 = shimFn(r2.usage)

  comparison.push('')
  comparison.push(`  --- What main's shim reports (${useResponses ? 'codexShim.makeUsage' : 'openaiShim.convertChunkUsage'}) ---`)
  comparison.push(`  Call 1: cache_read_input_tokens=${shim1.cache_read_input_tokens}`)
  comparison.push(`  Call 2: cache_read_input_tokens=${shim2.cache_read_input_tokens}`)
  if (useResponses && cached2 > 0) {
    comparison.push(`  BUG: Server returned ${cached2} cached tokens but main's makeUsage() drops it → reports 0`)
  } else if (!useResponses && shim2.cache_read_input_tokens > 0) {
    comparison.push(`  OK: Chat Completions path on main correctly reads cached_tokens`)
  }

  logForDebugging(comparison.join('\n'))

  // User-facing summary
  const mode = noKey ? ' (NO cache key sent)' : ''
  const shimLabel = useResponses ? 'codexShim.makeUsage()' : 'openaiShim.convertChunkUsage()'
  const summary = [
    `Cache Probe — ${request.resolvedModel} via ${useResponses ? 'Responses API' : 'Chat Completions'}${mode}`,
    '',
    `Call 1: ${r1.elapsed}ms, input=${input1}, cached=${(getField(r1.usage, 'input_tokens_details.cached_tokens') as number) ?? (getField(r1.usage, 'prompt_tokens_details.cached_tokens') as number) ?? 0}`,
    `Call 2: ${r2.elapsed}ms, input=${input2}, cached=${cached2}`,
    '',
    verdict,
    '',
    `What main's ${shimLabel} reports:`,
    `  Call 2 cache_read_input_tokens = ${shim2.cache_read_input_tokens}${useResponses && cached2 > 0 ? '  ← BUG: server sent ' + cached2 + ' but main drops it' : ''}`,
    '',
    'Full details written to debug log.',
  ].join('\n')

  return { type: 'text', value: summary }
}