feat(llm): GLM dual-region split + catalog refresh

Zhipu serves GLM under two brands with separate accounts (Z.AI
international vs BigModel China); the CLI URL pointed at one while
the openai_client default pointed at the other. Split into glm +
glm-cn with secondary region prompt (same UX as Qwen + MiniMax).
Catalog adds glm-5-turbo and glm-4.5-air per docs.z.ai.
This commit is contained in:
Yijia-Xiao
2026-05-11 04:19:50 +00:00
parent faaeebac70
commit d0dd0420ad
8 changed files with 72 additions and 17 deletions

View File

@@ -4,7 +4,9 @@ from .base_client import BaseLLMClient
# Providers that use the OpenAI-compatible chat completions API
_OPENAI_COMPATIBLE = (
"openai", "xai", "deepseek", "qwen", "glm",
"openai", "xai", "deepseek",
"qwen", "qwen-cn",
"glm", "glm-cn",
"minimax", "minimax-cn",
"ollama", "openrouter",
)

View File

@@ -8,6 +8,25 @@ ModelOption = Tuple[str, str]
ProviderModeOptions = Dict[str, Dict[str, List[ModelOption]]]
# Shared model list for GLM via Z.AI (international) and BigModel (China).
# Source: docs.z.ai (GLM Coding Plan supported models + LLM guides).
# All GLM 4.7+ entries support thinking mode via thinking={"type":"enabled"}.
_GLM_MODELS: Dict[str, List[ModelOption]] = {
"quick": [
("GLM-5-Turbo - Fast, switchable thinking modes", "glm-5-turbo"),
("GLM-4.7 - Previous-gen flagship", "glm-4.7"),
("GLM-4.5-Air - Lightweight, cost-efficient", "glm-4.5-air"),
("Custom model ID", "custom"),
],
"deep": [
("GLM-5.1 - Latest flagship, 204K ctx", "glm-5.1"),
("GLM-5 - Flagship, 204K ctx", "glm-5"),
("GLM-4.7 - Previous-gen flagship", "glm-4.7"),
("Custom model ID", "custom"),
],
}
# Shared model list for Qwen's global (dashscope-intl) and CN (dashscope) endpoints.
# Source: modelstudio.console.alibabacloud.com (Featured Models — Flagship + Cost-optimized).
#
@@ -126,18 +145,10 @@ MODEL_OPTIONS: ProviderModeOptions = {
# (dashscope) endpoints, so the two provider keys share one model list.
"qwen": _QWEN_MODELS,
"qwen-cn": _QWEN_MODELS,
"glm": {
"quick": [
("GLM-4.7", "glm-4.7"),
("GLM-5", "glm-5"),
("Custom model ID", "custom"),
],
"deep": [
("GLM-5.1", "glm-5.1"),
("GLM-5", "glm-5"),
("Custom model ID", "custom"),
],
},
# GLM: Z.AI (international) and BigModel (China) host the same model
# IDs; the two provider keys share one model list.
"glm": _GLM_MODELS,
"glm-cn": _GLM_MODELS,
# MiniMax: same model IDs across global (.io) and China (.com) regions,
# so the two provider keys share one model list.
"minimax": _MINIMAX_MODELS,

View File

@@ -139,8 +139,16 @@ _PASSTHROUGH_KWARGS = (
_PROVIDER_CONFIG = {
"xai": ("https://api.x.ai/v1", "XAI_API_KEY"),
"deepseek": ("https://api.deepseek.com", "DEEPSEEK_API_KEY"),
# DashScope exposes two regional endpoints with separate accounts; an
# international key won't authenticate against the China endpoint and
# vice versa (fixes issue #758).
"qwen": ("https://dashscope-intl.aliyuncs.com/compatible-mode/v1", "DASHSCOPE_API_KEY"),
"qwen-cn": ("https://dashscope.aliyuncs.com/compatible-mode/v1", "DASHSCOPE_CN_API_KEY"),
# Zhipu exposes the same GLM models under two brands with separate
# accounts: Z.AI (international, api.z.ai) and BigModel
# (open.bigmodel.cn, China). Keys aren't interchangeable across them.
"glm": ("https://api.z.ai/api/paas/v4/", "ZHIPU_API_KEY"),
"glm-cn": ("https://open.bigmodel.cn/api/paas/v4/", "ZHIPU_CN_API_KEY"),
# MiniMax exposes two regional endpoints with separate keys; mainland
# Chinese users hit .com while global users hit .io.
"minimax": ("https://api.minimax.io/v1", "MINIMAX_API_KEY"),