Add DeepSeek V4 flash/pro support and DeepSeek thinking compatibility (#877)

* Add DeepSeek V4 support and thinking compatibility

* Fix DeepSeek profile persistence regression

* Align multi-model handling with openai-multi-model
This commit is contained in:
JATMN
2026-04-24 11:29:46 -07:00
committed by GitHub
parent c4cb98a4f0
commit ff2a380723
15 changed files with 356 additions and 31 deletions

View File

@@ -3415,10 +3415,7 @@ test('Moonshot: echoes reasoning_content on assistant tool-call messages', async
)
})
test('non-Moonshot providers do NOT receive reasoning_content on assistant messages', async () => {
// Guard: only Moonshot opts in. DeepSeek/OpenRouter/etc. receive the
// outgoing assistant message without reasoning_content to avoid
// unknown-field rejections from strict servers.
test('DeepSeek echoes reasoning_content on assistant tool-call messages', async () => {
process.env.OPENAI_BASE_URL = 'https://api.deepseek.com/v1'
process.env.OPENAI_API_KEY = 'sk-deepseek'
@@ -3428,7 +3425,7 @@ test('non-Moonshot providers do NOT receive reasoning_content on assistant messa
return new Response(
JSON.stringify({
id: 'chatcmpl-1',
model: 'deepseek-chat',
model: 'deepseek-v4-flash',
choices: [
{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
],
@@ -3440,7 +3437,65 @@ test('non-Moonshot providers do NOT receive reasoning_content on assistant messa
const client = createOpenAIShimClient({}) as OpenAIShimClient
await client.beta.messages.create({
model: 'deepseek-chat',
model: 'deepseek-v4-flash',
system: 'test',
messages: [
{ role: 'user', content: 'hi' },
{
role: 'assistant',
content: [
{ type: 'thinking', thinking: 'thought' },
{ type: 'text', text: 'hello' },
{
type: 'tool_use',
id: 'call_1',
name: 'Bash',
input: { command: 'ls' },
},
],
},
{
role: 'user',
content: [
{ type: 'tool_result', tool_use_id: 'call_1', content: 'files' },
],
},
],
max_tokens: 32,
stream: false,
})
const messages = requestBody?.messages as Array<Record<string, unknown>>
const assistantWithToolCall = messages.find(
m => m.role === 'assistant' && Array.isArray(m.tool_calls),
)
expect(assistantWithToolCall).toBeDefined()
expect(assistantWithToolCall?.reasoning_content).toBe('thought')
})
test('generic OpenAI-compatible providers do not echo reasoning_content on assistant tool-call messages', async () => {
process.env.OPENAI_BASE_URL = 'https://api.openai.com/v1'
process.env.OPENAI_API_KEY = 'sk-openai-test'
let requestBody: Record<string, unknown> | undefined
globalThis.fetch = (async (_input, init) => {
requestBody = JSON.parse(String(init?.body))
return new Response(
JSON.stringify({
id: 'chatcmpl-1',
model: 'gpt-4o',
choices: [
{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
],
usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
}),
{ headers: { 'Content-Type': 'application/json' } },
)
}) as FetchType
const client = createOpenAIShimClient({}) as OpenAIShimClient
await client.beta.messages.create({
model: 'gpt-4o',
system: 'test',
messages: [
{ role: 'user', content: 'hi' },
@@ -3508,6 +3563,112 @@ test('Moonshot: cn host is also detected', async () => {
expect(requestBody?.store).toBeUndefined()
})
test('DeepSeek sends thinking toggle and normalized reasoning effort', async () => {
process.env.OPENAI_BASE_URL = 'https://api.deepseek.com/v1'
process.env.OPENAI_API_KEY = 'sk-deepseek'
let requestBody: Record<string, unknown> | undefined
globalThis.fetch = (async (_input, init) => {
requestBody = JSON.parse(String(init?.body))
return new Response(
JSON.stringify({
id: 'chatcmpl-1',
model: 'deepseek-v4-pro',
choices: [
{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
],
usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
}),
{ headers: { 'Content-Type': 'application/json' } },
)
}) as FetchType
const client = createOpenAIShimClient({
reasoningEffort: 'xhigh',
}) as OpenAIShimClient
await client.beta.messages.create({
model: 'deepseek-v4-pro',
system: 'test',
messages: [{ role: 'user', content: 'hi' }],
max_tokens: 64,
stream: false,
thinking: { type: 'enabled' },
})
expect(requestBody?.thinking).toEqual({ type: 'enabled' })
expect(requestBody?.reasoning_effort).toBe('max')
expect(requestBody?.max_tokens).toBe(64)
expect(requestBody?.max_completion_tokens).toBeUndefined()
expect(requestBody?.store).toBeUndefined()
})
test('DeepSeek omits thinking controls when the Anthropic-side request does not set them', async () => {
process.env.OPENAI_BASE_URL = 'https://api.deepseek.com/v1'
process.env.OPENAI_API_KEY = 'sk-deepseek'
let requestBody: Record<string, unknown> | undefined
globalThis.fetch = (async (_input, init) => {
requestBody = JSON.parse(String(init?.body))
return new Response(
JSON.stringify({
id: 'chatcmpl-1',
model: 'deepseek-v4-flash',
choices: [
{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
],
usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
}),
{ headers: { 'Content-Type': 'application/json' } },
)
}) as FetchType
const client = createOpenAIShimClient({}) as OpenAIShimClient
await client.beta.messages.create({
model: 'deepseek-v4-flash',
system: 'test',
messages: [{ role: 'user', content: 'hi' }],
max_tokens: 32,
stream: false,
})
expect(requestBody?.thinking).toBeUndefined()
expect(requestBody?.reasoning_effort).toBeUndefined()
})
test('DeepSeek forwards an explicit thinking disable toggle for V4 models', async () => {
process.env.OPENAI_BASE_URL = 'https://api.deepseek.com/v1'
process.env.OPENAI_API_KEY = 'sk-deepseek'
let requestBody: Record<string, unknown> | undefined
globalThis.fetch = (async (_input, init) => {
requestBody = JSON.parse(String(init?.body))
return new Response(
JSON.stringify({
id: 'chatcmpl-1',
model: 'deepseek-v4-flash',
choices: [
{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
],
usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
}),
{ headers: { 'Content-Type': 'application/json' } },
)
}) as FetchType
const client = createOpenAIShimClient({}) as OpenAIShimClient
await client.beta.messages.create({
model: 'deepseek-v4-flash',
system: 'test',
messages: [{ role: 'user', content: 'hi' }],
max_tokens: 32,
stream: false,
thinking: { type: 'disabled' },
})
expect(requestBody?.thinking).toEqual({ type: 'disabled' })
expect(requestBody?.reasoning_effort).toBeUndefined()
})
test('collapses multiple text blocks in tool_result to string for DeepSeek compatibility (issue #774)', async () => {
let requestBody: Record<string, unknown> | undefined

View File

@@ -88,6 +88,9 @@ const MOONSHOT_API_HOSTS = new Set([
'api.moonshot.ai',
'api.moonshot.cn',
])
const DEEPSEEK_API_HOSTS = new Set([
'api.deepseek.com',
])
const COPILOT_HEADERS: Record<string, string> = {
'User-Agent': 'GitHubCopilotChat/0.26.7',
@@ -162,6 +165,21 @@ function isMoonshotBaseUrl(baseUrl: string | undefined): boolean {
}
}
function isDeepSeekBaseUrl(baseUrl: string | undefined): boolean {
if (!baseUrl) return false
try {
return DEEPSEEK_API_HOSTS.has(new URL(baseUrl).hostname.toLowerCase())
} catch {
return false
}
}
function normalizeDeepSeekReasoningEffort(
effort: 'low' | 'medium' | 'high' | 'xhigh',
): 'high' | 'max' {
return effort === 'xhigh' ? 'max' : 'high'
}
function formatRetryAfterHint(response: Response): string {
const ra = response.headers.get('retry-after')
return ra ? ` (Retry-After: ${ra})` : ''
@@ -1487,9 +1505,11 @@ class OpenAIShimMessages {
)
const openaiMessages = convertMessages(compressedMessages, params.system, {
// Moonshot requires every assistant tool-call message to carry
// reasoning_content when its thinking feature is active. Echo it back
// from the thinking block we captured on the inbound response.
preserveReasoningContent: isMoonshotBaseUrl(request.baseUrl),
// reasoning_content when its thinking feature is active. DeepSeek does
// the same for tool-call turns in thinking mode. Echo it back from the
// thinking block we captured on the inbound response.
preserveReasoningContent:
isMoonshotBaseUrl(request.baseUrl) || isDeepSeekBaseUrl(request.baseUrl),
})
const body: Record<string, unknown> = {
@@ -1527,8 +1547,9 @@ class OpenAIShimMessages {
const isGithubModels = isGithub && (githubEndpointType === 'models' || githubEndpointType === 'custom')
const isMoonshot = isMoonshotBaseUrl(request.baseUrl)
const isDeepSeek = isDeepSeekBaseUrl(request.baseUrl)
if ((isGithub || isMistral || isLocal || isMoonshot) && body.max_completion_tokens !== undefined) {
if ((isGithub || isMistral || isLocal || isMoonshot || isDeepSeek) && body.max_completion_tokens !== undefined) {
body.max_tokens = body.max_completion_tokens
delete body.max_completion_tokens
}
@@ -1538,13 +1559,34 @@ class OpenAIShimMessages {
// Moonshot (api.moonshot.ai/.cn) has not published support for the
// parameter either; strip it preemptively to avoid the same class of
// error on strict-parse providers.
if (isMistral || isGeminiMode() || isMoonshot) {
if (isMistral || isGeminiMode() || isMoonshot || isDeepSeek) {
delete body.store
}
if (params.temperature !== undefined) body.temperature = params.temperature
if (params.top_p !== undefined) body.top_p = params.top_p
if (isDeepSeek) {
const requestedThinkingType = (params.thinking as { type?: string } | undefined)?.type
const deepSeekThinkingType =
requestedThinkingType === 'disabled'
? 'disabled'
: requestedThinkingType === 'enabled' || requestedThinkingType === 'adaptive'
? 'enabled'
: undefined
if (deepSeekThinkingType) {
body.thinking = { type: deepSeekThinkingType }
}
if (deepSeekThinkingType === 'enabled') {
const effort = request.reasoning?.effort
if (effort) {
body.reasoning_effort = normalizeDeepSeekReasoningEffort(effort)
}
}
}
if (params.tools && params.tools.length > 0) {
const converted = convertTools(
params.tools as Array<{