feat: add model caching and benchmarking utilities (#671)
* feat: add model caching and benchmarking utilities - Add modelCache.ts for disk caching of model lists - Add benchmark.ts for testing model speed/quality * fix: address review feedback - async fs, multi-provider support, error handling * feat: add /benchmark slash command and unit tests * feat: add /benchmark slash command and unit tests
This commit is contained in:
committed by
GitHub
parent
6a62e3ff76
commit
2b15e16421
56
src/commands/benchmark.ts
Normal file
56
src/commands/benchmark.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import type { ToolUseContext } from '../Tool.js'
|
||||
import type { Command } from '../types/command.js'
|
||||
import {
|
||||
benchmarkModel,
|
||||
benchmarkMultipleModels,
|
||||
formatBenchmarkResults,
|
||||
isBenchmarkSupported,
|
||||
} from '../utils/model/benchmark.js'
|
||||
import { getOllamaModelOptions } from '../utils/model/ollamaModels.js'
|
||||
|
||||
async function runBenchmark(
|
||||
model?: string,
|
||||
context?: ToolUseContext,
|
||||
): Promise<void> {
|
||||
if (!isBenchmarkSupported()) {
|
||||
context?.stdout?.write(
|
||||
'Benchmark not supported for this provider.\n' +
|
||||
'Supported: OpenAI-compatible endpoints (Ollama, NVIDIA NIM, MiniMax)\n',
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
let modelsToBenchmark: string[]
|
||||
|
||||
if (model) {
|
||||
modelsToBenchmark = [model]
|
||||
} else {
|
||||
const ollamaModels = getOllamaModelOptions()
|
||||
modelsToBenchmark = ollamaModels.slice(0, 3).map((m) => m.value)
|
||||
}
|
||||
|
||||
context?.stdout?.write(`Benchmarking ${modelsToBenchmark.length} model(s)...\n`)
|
||||
|
||||
const results = await benchmarkMultipleModels(
|
||||
modelsToBenchmark,
|
||||
(completed, total, result) => {
|
||||
context?.stdout?.write(
|
||||
`[${completed}/${total}] ${result.model}: ` +
|
||||
`${result.success ? result.tokensPerSecond.toFixed(1) + ' tps' : 'FAILED'}\n`,
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
context?.stdout?.write('\n' + formatBenchmarkResults(results) + '\n')
|
||||
}
|
||||
|
||||
export const benchmark: Command = {
|
||||
name: 'benchmark',
|
||||
|
||||
async onExecute(context: ToolUseContext): Promise<void> {
|
||||
const args = context.args ?? {}
|
||||
const model = args.model as string | undefined
|
||||
|
||||
await runBenchmark(model, context)
|
||||
},
|
||||
}
|
||||
Reference in New Issue
Block a user