feat(zai): add Z.AI GLM Coding Plan provider preset (#896)

* feat(zai): add Z.AI GLM Coding Plan provider preset

Add dedicated Z.AI provider support for the GLM Coding Plan, enabling
use of GLM-5.1, GLM-5-Turbo, GLM-4.7, and GLM-4.5-Air models through
the OpenAI-compatible shim with proper thinking mode (reasoning_content),
max_tokens handling, and context window sizing.

* fix(zai): unify GLM max output token limits across casing variants

glm-5/glm-4.7 had conservative 16K max output while GLM-5/GLM-4.7
had 131K. Use consistent Z.AI coding plan limits for all GLM variants.

* fix(zai): restore DashScope GLM limits, enable GLM thinking support

- Restore lowercase glm-5/glm-4.7 to 16_384 max output (DashScope limits)
  while keeping Z.AI coding plan high limits on uppercase GLM-* keys only
- Add GLM model support to modelSupportsThinking() so reasoning_content
  is enabled when using GLM-5.x/GLM-4.7 models on Z.AI

* fix(zai): tighten GLM regexes, fix misleading context window comment

- Use precise regex in thinking.ts: exact GLM model matches only,
  no false positives on glm-50/glm-4, includes glm-4.5-air
- Use uppercase-only match in StartupScreen rawModel fallback so
  DashScope lowercase glm-* models aren't mislabeled as Z.AI
- Clarify context window comment: lowercase glm-5.1/glm-5-turbo/
  glm-4.5-air are Z.AI-specific aliases, not DashScope

* fix(zai): scope GLM detection to Z.AI

* improve readability of max_completion_tokens check

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
chioarub
2026-04-26 03:18:59 +03:00
committed by GitHub
parent 29f7579377
commit a0d657ee18
16 changed files with 342 additions and 6 deletions

View File

@@ -150,6 +150,10 @@ ANTHROPIC_API_KEY=sk-ant-your-key-here
# OPENAI_MODEL=deepseek-v4-flash
# Optional: OPENAI_MODEL=deepseek-v4-pro
# Legacy aliases also work: deepseek-chat and deepseek-reasoner
# For Z.AI GLM Coding Plan, set:
# OPENAI_BASE_URL=https://api.z.ai/api/coding/paas/v4
# OPENAI_MODEL=GLM-5.1
# Optional: OPENAI_MODEL=GLM-5-Turbo, GLM-4.7, or GLM-4.5-Air
# Use a custom OpenAI-compatible endpoint (optional — defaults to api.openai.com)
# OPENAI_BASE_URL=https://api.openai.com/v1

View File

@@ -125,6 +125,7 @@ const PRESET_ORDER = [
'OpenAI',
'OpenRouter',
'Together AI',
'Z.AI - GLM Coding Plan',
'Custom',
] as const

View File

@@ -1345,6 +1345,11 @@ export function ProviderManager({ mode, onDone }: Props): React.ReactNode {
label: 'Together AI',
description: 'Together chat/completions endpoint',
},
{
value: 'zai',
label: 'Z.AI - GLM Coding Plan',
description: 'Z.AI GLM coding subscription endpoint',
},
{
value: 'custom',
label: 'Custom',

View File

@@ -116,6 +116,11 @@ describe('detectProvider — direct vendor endpoints', () => {
expect(detectProvider().name).toBe('Mistral')
})
test('api.z.ai labels as Z.AI GLM', () => {
setupOpenAIMode('https://api.z.ai/api/coding/paas/v4', 'GLM-5.1')
expect(detectProvider().name).toBe('Z.AI - GLM')
})
test('default OpenAI URL + gpt-4o labels as OpenAI', () => {
setupOpenAIMode('https://api.openai.com/v1', 'gpt-4o')
expect(detectProvider().name).toBe('OpenAI')
@@ -149,6 +154,21 @@ describe('detectProvider — rawModel fallback when URL is generic', () => {
setupOpenAIMode('https://my-proxy.internal/v1', 'mistral-large-latest')
expect(detectProvider().name).toBe('Mistral')
})
test('custom proxy + exact uppercase GLM ID falls back to Z.AI GLM', () => {
setupOpenAIMode('https://my-proxy.internal/v1', 'GLM-5.1')
expect(detectProvider().name).toBe('Z.AI - GLM')
})
test('custom proxy + lowercase glm ID stays generic OpenAI', () => {
setupOpenAIMode('https://my-proxy.internal/v1', 'glm-5.1')
expect(detectProvider().name).toBe('OpenAI')
})
test('DashScope lowercase glm ID is not mislabeled as Z.AI', () => {
setupOpenAIMode('https://dashscope.aliyuncs.com/compatible-mode/v1', 'glm-5.1')
expect(detectProvider().name).toBe('OpenAI')
})
})
// --- Explicit env flags win over URL heuristics ---

View File

@@ -9,6 +9,7 @@ import { isLocalProviderUrl, resolveProviderRequest } from '../services/api/prov
import { getLocalOpenAICompatibleProviderLabel } from '../utils/providerDiscovery.js'
import { getSettings_DEPRECATED } from '../utils/settings/settings.js'
import { parseUserSpecifiedModel } from '../utils/model/model.js'
import { containsExactZaiGlmModelId, isZaiBaseUrl } from '../utils/zaiProvider.js'
declare const MACRO: { VERSION: string; DISPLAY_VERSION?: string }
@@ -137,6 +138,7 @@ export function detectProvider(): { name: string; model: string; baseUrl: string
else if (/api\.kimi\.com/i.test(baseUrl)) name = 'Moonshot AI - Kimi Code'
else if (/moonshot/i.test(baseUrl)) name = 'Moonshot AI - API'
else if (/deepseek/i.test(baseUrl)) name = 'DeepSeek'
else if (isZaiBaseUrl(baseUrl)) name = 'Z.AI - GLM'
else if (/mistral/i.test(baseUrl)) name = 'Mistral'
// rawModel fallback — fires only when base URL is generic/custom.
else if (/nvidia/i.test(rawModel)) name = 'NVIDIA NIM'
@@ -146,6 +148,7 @@ export function detectProvider(): { name: string; model: string; baseUrl: string
else if (/\bkimi-k/i.test(rawModel) || /moonshot/i.test(rawModel))
name = 'Moonshot AI - API'
else if (/deepseek/i.test(rawModel)) name = 'DeepSeek'
else if (containsExactZaiGlmModelId(rawModel)) name = 'Z.AI - GLM'
else if (/mistral/i.test(rawModel)) name = 'Mistral'
else if (/llama/i.test(rawModel)) name = 'Meta Llama'
else if (/bankr/i.test(baseUrl)) name = 'Bankr'

View File

@@ -3991,3 +3991,79 @@ test('preserves mixed text and image tool results as multipart content', async (
expect(content[0].type).toBe('text')
expect(content[1].type).toBe('image_url')
})
test('Z.AI: uses max_tokens (not max_completion_tokens) and strips store', async () => {
process.env.OPENAI_BASE_URL = 'https://api.z.ai/api/coding/paas/v4'
process.env.OPENAI_API_KEY = 'sk-zai-test'
let requestBody: Record<string, unknown> | undefined
globalThis.fetch = (async (_input, init) => {
requestBody = JSON.parse(String(init?.body))
return new Response(
JSON.stringify({
id: 'chatcmpl-1',
model: 'GLM-5.1',
choices: [
{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' },
],
usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
}),
{ headers: { 'Content-Type': 'application/json' } },
)
}) as FetchType
const client = createOpenAIShimClient({}) as OpenAIShimClient
await client.beta.messages.create({
model: 'GLM-5.1',
system: 'you are glm',
messages: [{ role: 'user', content: 'hi' }],
max_tokens: 256,
stream: false,
})
expect(requestBody?.max_tokens).toBe(256)
expect(requestBody?.max_completion_tokens).toBeUndefined()
expect(requestBody?.store).toBeUndefined()
})
test('Z.AI: thinking mode enabled when requested', async () => {
process.env.OPENAI_BASE_URL = 'https://api.z.ai/api/coding/paas/v4'
process.env.OPENAI_API_KEY = 'sk-zai-test'
let requestBody: Record<string, unknown> | undefined
globalThis.fetch = (async (_input, init) => {
requestBody = JSON.parse(String(init?.body))
return new Response(
JSON.stringify({
id: 'chatcmpl-1',
model: 'GLM-5.1',
choices: [
{
message: {
role: 'assistant',
content: null,
reasoning_content: 'Let me think...',
},
finish_reason: 'stop',
},
],
usage: { prompt_tokens: 3, completion_tokens: 1, total_tokens: 4 },
}),
{ headers: { 'Content-Type': 'application/json' } },
)
}) as FetchType
const client = createOpenAIShimClient({}) as OpenAIShimClient
await client.beta.messages.create({
model: 'GLM-5.1',
system: 'you are glm',
messages: [{ role: 'user', content: 'think hard' }],
max_tokens: 1024,
stream: false,
thinking: { type: 'enabled', budget_tokens: 1024 },
})
expect((requestBody?.thinking as Record<string, string>)?.type).toBe('enabled')
expect(requestBody?.max_completion_tokens).toBeUndefined()
expect(requestBody?.max_tokens).toBe(1024)
})

View File

@@ -64,6 +64,7 @@ import {
} from './openaiErrorClassification.js'
import { sanitizeSchemaForOpenAICompat } from '../../utils/schemaSanitizer.js'
import { redactSecretValueForDisplay } from '../../utils/providerProfile.js'
import { isZaiBaseUrl } from '../../utils/zaiProvider.js'
import {
normalizeToolArguments,
hasToolFieldMapping,
@@ -93,7 +94,6 @@ const KIMI_CODE_API_HOST = 'api.kimi.com'
const DEEPSEEK_API_HOSTS = new Set([
'api.deepseek.com',
])
const COPILOT_HEADERS: Record<string, string> = {
'User-Agent': 'GitHubCopilotChat/0.26.7',
'Editor-Version': 'vscode/1.99.3',
@@ -1514,7 +1514,8 @@ class OpenAIShimMessages {
// thinking block we captured on the inbound response.
preserveReasoningContent:
isMoonshotCompatibleBaseUrl(request.baseUrl) ||
isDeepSeekBaseUrl(request.baseUrl),
isDeepSeekBaseUrl(request.baseUrl) ||
isZaiBaseUrl(request.baseUrl),
})
const body: Record<string, unknown> = {
@@ -1553,8 +1554,19 @@ class OpenAIShimMessages {
const isMoonshot = isMoonshotCompatibleBaseUrl(request.baseUrl)
const isDeepSeek = isDeepSeekBaseUrl(request.baseUrl)
const isZai = isZaiBaseUrl(request.baseUrl)
if ((isGithub || isMistral || isLocal || isMoonshot || isDeepSeek) && body.max_completion_tokens !== undefined) {
if (
(
isGithub ||
isMistral ||
isLocal ||
isMoonshot ||
isDeepSeek ||
isZai
) &&
body.max_completion_tokens !== undefined
) {
body.max_tokens = body.max_completion_tokens
delete body.max_completion_tokens
}
@@ -1562,10 +1574,10 @@ class OpenAIShimMessages {
// mistral and gemini don't recognize body.store — Gemini returns 400
// "Invalid JSON payload received. Unknown name 'store': Cannot find field."
// Moonshot direct API, Kimi Code's OpenAI-compatible coding endpoint,
// and DeepSeek have not published support for the parameter either;
// DeepSeek, and Z.AI have not published support for the parameter either;
// strip it preemptively to avoid the same class of error on strict-parse
// providers.
if (isMistral || isGeminiMode() || isMoonshot || isDeepSeek) {
if (isMistral || isGeminiMode() || isMoonshot || isDeepSeek || isZai) {
delete body.store
}
@@ -1593,6 +1605,17 @@ class OpenAIShimMessages {
}
}
// Z.AI uses the same thinking format as DeepSeek: { type: "enabled" | "disabled" }
// with reasoning_content in responses.
if (isZai) {
const requestedThinkingType = (params.thinking as { type?: string } | undefined)?.type
if (requestedThinkingType && requestedThinkingType !== 'disabled') {
body.thinking = { type: 'enabled' }
} else if (requestedThinkingType === 'disabled') {
body.thinking = { type: 'disabled' }
}
}
if (params.tools && params.tools.length > 0) {
const converted = convertTools(
params.tools as Array<{

View File

@@ -1,4 +1,4 @@
import { afterEach, expect, test } from 'bun:test'
import { afterEach, beforeEach, expect, test } from 'bun:test'
import { getMaxOutputTokensForModel } from '../services/api/claude.ts'
import {
@@ -12,6 +12,12 @@ const originalEnv = {
OPENAI_MODEL: process.env.OPENAI_MODEL,
}
beforeEach(() => {
delete process.env.CLAUDE_CODE_USE_OPENAI
delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
delete process.env.OPENAI_MODEL
})
afterEach(() => {
if (originalEnv.CLAUDE_CODE_USE_OPENAI === undefined) {
delete process.env.CLAUDE_CODE_USE_OPENAI
@@ -265,6 +271,43 @@ test('DashScope glm-4.7 uses provider-specific context and output caps', () => {
})
})
test('Z.AI uppercase GLM models use Coding Plan output caps', () => {
process.env.CLAUDE_CODE_USE_OPENAI = '1'
delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
expect(getContextWindowForModel('GLM-5.1')).toBe(202_752)
expect(getModelMaxOutputTokens('GLM-5.1')).toEqual({
default: 131_072,
upperLimit: 131_072,
})
expect(getModelMaxOutputTokens('GLM-5-Turbo')).toEqual({
default: 131_072,
upperLimit: 131_072,
})
expect(getModelMaxOutputTokens('GLM-4.5-Air')).toEqual({
default: 65_536,
upperLimit: 65_536,
})
})
test('lowercase GLM aliases keep conservative output caps', () => {
process.env.CLAUDE_CODE_USE_OPENAI = '1'
delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
expect(getModelMaxOutputTokens('glm-5.1')).toEqual({
default: 16_384,
upperLimit: 16_384,
})
expect(getModelMaxOutputTokens('glm-5-turbo')).toEqual({
default: 16_384,
upperLimit: 16_384,
})
expect(getModelMaxOutputTokens('glm-4.5-air')).toEqual({
default: 16_384,
upperLimit: 16_384,
})
})
test('DashScope models clamp oversized max output overrides to the provider limit', () => {
process.env.CLAUDE_CODE_USE_OPENAI = '1'
process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = '100000'
@@ -275,4 +318,5 @@ test('DashScope models clamp oversized max output overrides to the provider limi
expect(getMaxOutputTokensForModel('qwen3-max')).toBe(32_768)
expect(getMaxOutputTokensForModel('kimi-k2.5')).toBe(32_768)
expect(getMaxOutputTokensForModel('glm-5')).toBe(16_384)
expect(getMaxOutputTokensForModel('glm-5.1')).toBe(16_384)
})

View File

@@ -232,6 +232,15 @@ const OPENAI_CONTEXT_WINDOWS: Record<string, number> = {
'kimi-k2.5': 262_144,
'glm-5': 202_752,
'glm-4.7': 202_752,
'glm-5.1': 202_752,
'glm-5-turbo': 202_752,
'glm-4.5-air': 128_000,
// Z.AI Coding Plan models (uppercase variants)
'GLM-5.1': 202_752,
'GLM-5-Turbo': 202_752,
'GLM-5': 202_752,
'GLM-4.7': 202_752,
'GLM-4.5-Air': 128_000,
// Moonshot AI direct API (api.moonshot.ai/v1). Values from Moonshot's
// published model card — all K2 tier share 256K context. Prefix matching
@@ -420,8 +429,18 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record<string, number> = {
'qwen3-max': 32_768,
'qwen3-max-2026-01-23': 32_768,
'kimi-k2.5': 32_768,
// GLM models — lowercase aliases stay on conservative DashScope-style
// limits; Z.AI Coding Plan's exact uppercase GLM-* IDs use higher limits.
'glm-5': 16_384,
'glm-4.7': 16_384,
'glm-5.1': 16_384,
'glm-5-turbo': 16_384,
'glm-4.5-air': 16_384,
'GLM-5': 131_072,
'GLM-5.1': 131_072,
'GLM-5-Turbo': 131_072,
'GLM-4.7': 131_072,
'GLM-4.5-Air': 65_536,
// Moonshot AI direct API
'kimi-for-coding': 32_768,

View File

@@ -1,5 +1,6 @@
import type { OllamaModelDescriptor } from './providerRecommendation.ts'
import { DEFAULT_OPENAI_BASE_URL } from '../services/api/providerConfig.js'
import { isZaiBaseUrl } from './zaiProvider.js'
export const DEFAULT_OLLAMA_BASE_URL = 'http://localhost:11434'
export const DEFAULT_ATOMIC_CHAT_BASE_URL = 'http://127.0.0.1:1337'
@@ -205,6 +206,10 @@ export function getLocalOpenAICompatibleProviderLabel(baseUrl?: string): string
if (host.includes('bankr') || haystack.includes('bankr')) {
return 'Bankr'
}
// Z.AI GLM Coding Plan
if (isZaiBaseUrl(parsed.href)) {
return 'Z.AI - GLM'
}
// Moonshot AI direct API
if (
host.includes('moonshot') ||

View File

@@ -15,6 +15,7 @@
export const VALID_PROVIDERS = [
'anthropic',
'bankr',
'zai',
'openai',
'gemini',
'mistral',
@@ -159,6 +160,13 @@ export function applyProviderFlag(
process.env.OPENAI_API_KEY = process.env.BNKR_API_KEY
}
break
case 'zai':
process.env.CLAUDE_CODE_USE_OPENAI = '1'
process.env.OPENAI_BASE_URL ??= 'https://api.z.ai/api/coding/paas/v4'
process.env.OPENAI_MODEL ??= 'GLM-5.1'
if (model) process.env.OPENAI_MODEL = model
break
}
return {}

View File

@@ -625,6 +625,18 @@ describe('getProviderPresetDefaults', () => {
)
expect(defaults.requiresApiKey).toBe(true)
})
test('zai preset defaults to Z.AI GLM Coding Plan endpoint', async () => {
const { getProviderPresetDefaults } = await importFreshProviderProfileModules()
const defaults = getProviderPresetDefaults('zai')
expect(defaults.provider).toBe('openai')
expect(defaults.name).toBe('Z.AI - GLM Coding Plan')
expect(defaults.baseUrl).toBe('https://api.z.ai/api/coding/paas/v4')
expect(defaults.model).toBe('GLM-5.1, GLM-5-Turbo, GLM-4.7, GLM-4.5-Air')
expect(defaults.requiresApiKey).toBe(true)
})
})
describe('setActiveProviderProfile', () => {

View File

@@ -36,6 +36,7 @@ export type ProviderPreset =
| 'custom'
| 'nvidia-nim'
| 'minimax'
| 'zai'
| 'bankr'
| 'atomic-chat'
@@ -317,6 +318,15 @@ export function getProviderPresetDefaults(
apiKey: process.env.BNKR_API_KEY ?? '',
requiresApiKey: true,
}
case 'zai':
return {
provider: 'openai',
name: 'Z.AI - GLM Coding Plan',
baseUrl: 'https://api.z.ai/api/coding/paas/v4',
model: 'GLM-5.1, GLM-5-Turbo, GLM-4.7, GLM-4.5-Air',
apiKey: '',
requiresApiKey: true,
}
case 'ollama':
default:
return {

View File

@@ -0,0 +1,64 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import { modelSupportsThinking } from './thinking.js'
const ENV_KEYS = [
'CLAUDE_CODE_USE_OPENAI',
'CLAUDE_CODE_USE_GEMINI',
'CLAUDE_CODE_USE_GITHUB',
'CLAUDE_CODE_USE_MISTRAL',
'CLAUDE_CODE_USE_BEDROCK',
'CLAUDE_CODE_USE_VERTEX',
'CLAUDE_CODE_USE_FOUNDRY',
'OPENAI_BASE_URL',
'OPENAI_API_BASE',
'OPENAI_MODEL',
'NVIDIA_NIM',
'MINIMAX_API_KEY',
'USER_TYPE',
]
const originalEnv: Record<string, string | undefined> = {}
beforeEach(() => {
for (const key of ENV_KEYS) {
originalEnv[key] = process.env[key]
delete process.env[key]
}
})
afterEach(() => {
for (const key of ENV_KEYS) {
if (originalEnv[key] === undefined) {
delete process.env[key]
} else {
process.env[key] = originalEnv[key]
}
}
})
describe('modelSupportsThinking — Z.AI GLM', () => {
test('enables thinking for exact GLM models on api.z.ai', () => {
process.env.CLAUDE_CODE_USE_OPENAI = '1'
process.env.OPENAI_BASE_URL = 'https://api.z.ai/api/coding/paas/v4'
expect(modelSupportsThinking('GLM-5.1')).toBe(true)
expect(modelSupportsThinking('GLM-5-Turbo')).toBe(true)
expect(modelSupportsThinking('GLM-4.7')).toBe(true)
expect(modelSupportsThinking('GLM-4.5-Air')).toBe(true)
})
test('does not enable GLM thinking on non-Z.AI OpenAI-compatible endpoints', () => {
process.env.CLAUDE_CODE_USE_OPENAI = '1'
process.env.OPENAI_BASE_URL = 'https://dashscope.aliyuncs.com/compatible-mode/v1'
expect(modelSupportsThinking('glm-5.1')).toBe(false)
expect(modelSupportsThinking('GLM-5.1')).toBe(false)
})
test('does not match unrelated GLM-looking model names', () => {
process.env.CLAUDE_CODE_USE_OPENAI = '1'
process.env.OPENAI_BASE_URL = 'https://api.z.ai/api/coding/paas/v4'
expect(modelSupportsThinking('glm-50')).toBe(false)
})
})

View File

@@ -6,6 +6,7 @@ import { getCanonicalName } from './model/model.js'
import { get3PModelCapabilityOverride } from './model/modelSupportOverrides.js'
import { getAPIProvider } from './model/providers.js'
import { getSettingsWithErrors } from './settings/settings.js'
import { isZaiBaseUrl, isZaiGlmModel } from './zaiProvider.js'
export type ThinkingConfig =
| { type: 'adaptive' }
@@ -111,6 +112,13 @@ export function modelSupportsThinking(model: string): boolean {
) {
return true
}
if (
provider === 'openai' &&
isZaiBaseUrl(process.env.OPENAI_BASE_URL ?? process.env.OPENAI_API_BASE) &&
isZaiGlmModel(canonical)
) {
return true
}
// 3P (Bedrock/Vertex): only Opus 4+ and Sonnet 4+
return canonical.includes('sonnet-4') || canonical.includes('opus-4')
}

34
src/utils/zaiProvider.ts Normal file
View File

@@ -0,0 +1,34 @@
const ZAI_API_HOSTS = new Set([
'api.z.ai',
])
const ZAI_GLM_MODEL_IDS = new Set([
'GLM-5.1',
'GLM-5-Turbo',
'GLM-5',
'GLM-4.7',
'GLM-4.5-Air',
])
const ZAI_GLM_MODEL_IDS_LOWER = new Set(
[...ZAI_GLM_MODEL_IDS].map(model => model.toLowerCase()),
)
export function isZaiBaseUrl(baseUrl: string | undefined): boolean {
if (!baseUrl) return false
try {
return ZAI_API_HOSTS.has(new URL(baseUrl).hostname.toLowerCase())
} catch {
return false
}
}
export function isZaiGlmModel(model: string): boolean {
return ZAI_GLM_MODEL_IDS_LOWER.has(model.trim().toLowerCase())
}
export function containsExactZaiGlmModelId(model: string): boolean {
return model
.split(',')
.some(entry => ZAI_GLM_MODEL_IDS.has(entry.trim()))
}