feat: add model caching and benchmarking utilities (#671)

* feat: add model caching and benchmarking utilities - Add modelCache.ts for disk caching of model lists - Add benchmark.ts for testing model speed/quality * fix: address review feedback - async fs, multi-provider support, error handling * feat: add /benchmark slash command and unit tests * feat: add /benchmark slash command and unit tests
2026-04-21 11:36:16 +01:00
parent 6a62e3ff76
commit 2b15e16421
4 changed files with 456 additions and 0 deletions
--- a/src/commands/benchmark.ts
+++ b/src/commands/benchmark.ts
@@ -0,0 +1,56 @@
+import type { ToolUseContext } from '../Tool.js'
+import type { Command } from '../types/command.js'
+import {
+  benchmarkModel,
+  benchmarkMultipleModels,
+  formatBenchmarkResults,
+  isBenchmarkSupported,
+} from '../utils/model/benchmark.js'
+import { getOllamaModelOptions } from '../utils/model/ollamaModels.js'
+
+async function runBenchmark(
+  model?: string,
+  context?: ToolUseContext,
+): Promise<void> {
+  if (!isBenchmarkSupported()) {
+    context?.stdout?.write(
+      'Benchmark not supported for this provider.\n' +
+        'Supported: OpenAI-compatible endpoints (Ollama, NVIDIA NIM, MiniMax)\n',
+    )
+    return
+  }
+
+  let modelsToBenchmark: string[]
+
+  if (model) {
+    modelsToBenchmark = [model]
+  } else {
+    const ollamaModels = getOllamaModelOptions()
+    modelsToBenchmark = ollamaModels.slice(0, 3).map((m) => m.value)
+  }
+
+  context?.stdout?.write(`Benchmarking ${modelsToBenchmark.length} model(s)...\n`)
+
+  const results = await benchmarkMultipleModels(
+    modelsToBenchmark,
+    (completed, total, result) => {
+      context?.stdout?.write(
+        `[${completed}/${total}] ${result.model}: ` +
+          `${result.success ? result.tokensPerSecond.toFixed(1) + ' tps' : 'FAILED'}\n`,
+      )
+    },
+  )
+
+  context?.stdout?.write('\n' + formatBenchmarkResults(results) + '\n')
+}
+
+export const benchmark: Command = {
+  name: 'benchmark',
+
+  async onExecute(context: ToolUseContext): Promise<void> {
+    const args = context.args ?? {}
+    const model = args.model as string | undefined
+
+    await runBenchmark(model, context)
+  },
+}