Add DeepSeek V4 flash/pro support and DeepSeek thinking compatibility (#877)

* Add DeepSeek V4 support and thinking compatibility

* Fix DeepSeek profile persistence regression

* Align multi-model handling with openai-multi-model
This commit is contained in:
JATMN
2026-04-24 11:29:46 -07:00
committed by GitHub
parent c4cb98a4f0
commit ff2a380723
15 changed files with 356 additions and 31 deletions

View File

@@ -96,7 +96,13 @@ const OPENAI_CONTEXT_WINDOWS: Record<string, number> = {
'o3-mini': 200_000,
'o4-mini': 200_000,
// DeepSeek (V3: 128k context per official docs)
// DeepSeek V4 coding-agent models. DeepSeek's official coding-agent guide
// publishes V4 Pro at 1,048,576 context / 262,144 output; Flash is treated
// as the same family for local budgeting until a dedicated public model card
// lands.
'deepseek-v4-flash': 1_048_576,
'deepseek-v4-pro': 1_048_576,
// Legacy DeepSeek API aliases documented in the public pricing/model pages.
'deepseek-chat': 128_000,
'deepseek-reasoner': 128_000,
@@ -316,9 +322,12 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record<string, number> = {
'o3-mini': 100_000,
'o4-mini': 100_000,
// DeepSeek
// DeepSeek V4 coding-agent models. See context-window note above.
'deepseek-v4-flash': 262_144,
'deepseek-v4-pro': 262_144,
// Legacy DeepSeek API aliases documented in the public pricing/model pages.
'deepseek-chat': 8_192,
'deepseek-reasoner': 32_768,
'deepseek-reasoner': 65_536,
// Groq
'llama-3.3-70b-versatile': 32_768,