feat(llm): unify OpenAI-compatible providers behind a registry + generic endpoint

The OpenAI-compatible family (openai, xAI, DeepSeek, Qwen, GLM, MiniMax,
OpenRouter, Ollama) all speak the same Chat Completions API and differ only by
base_url, key, and two narrow wire-format quirks already isolated in subclasses.
Replace the scattered base-URL dict, key handling, and client-class branches with
one ProviderSpec registry that get_llm and the factory drive off; provider quirks
stay in their subclasses. Add a generic "openai_compatible" provider for any
OpenAI-compatible server (vLLM, LM Studio, llama.cpp, relays) via backend_url +
optional key — adding a provider is now one registry row. Native Anthropic/Google
keep their own clients (genuinely different APIs). Also fixes the env backend URL
being ignored when the provider was chosen interactively (#978).
This commit is contained in:
Yijia-Xiao
2026-06-14 03:22:24 +00:00
parent 4e7821d574
commit 20d3b0782f
11 changed files with 301 additions and 85 deletions

View File

@@ -32,6 +32,10 @@ PROVIDER_API_KEY_ENV: dict[str, Optional[str]] = {
"openrouter": "OPENROUTER_API_KEY",
# Local runtimes do not authenticate.
"ollama": None,
# Generic OpenAI-compatible endpoint: the client reads this when set (keyed
# relays), but it is marked key-optional in the provider registry so the CLI
# never forces a prompt and keyless local servers still work.
"openai_compatible": "OPENAI_COMPATIBLE_API_KEY",
}

View File

@@ -2,16 +2,6 @@ from typing import Optional
from .base_client import BaseLLMClient
# Providers that use the OpenAI-compatible chat completions API
_OPENAI_COMPATIBLE = (
"openai", "xai", "deepseek",
"qwen", "qwen-cn",
"glm", "glm-cn",
"minimax", "minimax-cn",
"ollama", "openrouter",
)
def create_llm_client(
provider: str,
model: str,
@@ -38,10 +28,9 @@ def create_llm_client(
"""
provider_lower = provider.lower()
if provider_lower in _OPENAI_COMPATIBLE:
from .openai_client import OpenAIClient
return OpenAIClient(model, base_url, provider=provider_lower, **kwargs)
# Native (non-OpenAI) APIs are matched first so their string check doesn't
# import the OpenAI client. Everything else is OpenAI-compatible and routes
# through the provider registry (single source of truth).
if provider_lower == "anthropic":
from .anthropic_client import AnthropicClient
return AnthropicClient(model, base_url, **kwargs)
@@ -54,4 +43,8 @@ def create_llm_client(
from .azure_client import AzureOpenAIClient
return AzureOpenAIClient(model, base_url, **kwargs)
from .openai_client import OpenAIClient, is_openai_compatible
if is_openai_compatible(provider_lower):
return OpenAIClient(model, base_url, provider=provider_lower, **kwargs)
raise ValueError(f"Unsupported LLM provider: {provider}")

View File

@@ -175,6 +175,12 @@ MODEL_OPTIONS: ProviderModeOptions = {
("Custom model ID", "custom"),
],
},
# Generic OpenAI-compatible endpoint: the model is whatever the user's
# server serves, so only "Custom model ID" is offered.
"openai_compatible": {
"quick": [("Custom model ID", "custom")],
"deep": [("Custom model ID", "custom")],
},
}

View File

@@ -1,4 +1,5 @@
import os
from dataclasses import dataclass
from typing import Any, Optional
from langchain_core.messages import AIMessage
@@ -148,39 +149,56 @@ _PASSTHROUGH_KWARGS = (
"api_key", "callbacks", "http_client", "http_async_client",
)
# Provider base URLs. API-key env vars live in api_key_env.PROVIDER_API_KEY_ENV
# (one canonical mapping consulted by both this client and the CLI's
# interactive key-prompt). Dual-region providers (qwen/glm/minimax) keep
# separate endpoints because international and China accounts cannot share
# credentials (#758).
_PROVIDER_BASE_URL = {
"xai": "https://api.x.ai/v1",
"deepseek": "https://api.deepseek.com",
"qwen": "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
"qwen-cn": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"glm": "https://api.z.ai/api/paas/v4/",
"glm-cn": "https://open.bigmodel.cn/api/paas/v4/",
"minimax": "https://api.minimax.io/v1",
"minimax-cn": "https://api.minimaxi.com/v1",
"openrouter": "https://openrouter.ai/api/v1",
"ollama": "http://localhost:11434/v1",
@dataclass(frozen=True)
class ProviderSpec:
"""Declarative config for one OpenAI-compatible provider.
The OpenAI-compatible family (OpenAI, xAI, DeepSeek, Qwen, GLM, MiniMax,
OpenRouter, Ollama, and any user endpoint) all speak the same Chat
Completions API and differ only by these fields — so one row here replaces
the former per-provider base-URL dict, auth handling, and client-class
branches. Native Anthropic / Google use their own clients (genuinely
different APIs) and are intentionally NOT in this registry.
The API-key env var stays in ``api_key_env.PROVIDER_API_KEY_ENV`` (the single
source consulted by both this client and the CLI prompt); only behavior that
is provider-specific (base URL, key optionality, wire-format quirks via
``chat_class``) lives here.
"""
chat_class: type = NormalizedChatOpenAI # provider quirks live in the subclass
base_url: Optional[str] = None # default endpoint (None -> SDK default)
base_url_env: Optional[str] = None # env var that overrides base_url (e.g. OLLAMA_BASE_URL)
key_optional: bool = False # don't require/prompt; send a placeholder if unset
placeholder_key: str = "EMPTY" # sent when no key is available (keyless local servers)
require_base_url: bool = False # error if no base_url is resolved (generic endpoint)
use_responses_api: bool = False # native OpenAI Responses API
# Single source of truth for the OpenAI-compatible provider family. Dual-region
# providers (qwen/glm/minimax) keep separate endpoints because international and
# China accounts cannot share credentials (#758).
OPENAI_COMPATIBLE_PROVIDERS: dict[str, ProviderSpec] = {
"openai": ProviderSpec(use_responses_api=True),
"xai": ProviderSpec(base_url="https://api.x.ai/v1"),
"deepseek": ProviderSpec(base_url="https://api.deepseek.com", chat_class=DeepSeekChatOpenAI),
"qwen": ProviderSpec(base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1"),
"qwen-cn": ProviderSpec(base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"),
"glm": ProviderSpec(base_url="https://api.z.ai/api/paas/v4/"),
"glm-cn": ProviderSpec(base_url="https://open.bigmodel.cn/api/paas/v4/"),
"minimax": ProviderSpec(base_url="https://api.minimax.io/v1", chat_class=MinimaxChatOpenAI),
"minimax-cn": ProviderSpec(base_url="https://api.minimaxi.com/v1", chat_class=MinimaxChatOpenAI),
"openrouter": ProviderSpec(base_url="https://openrouter.ai/api/v1"),
"ollama": ProviderSpec(base_url="http://localhost:11434/v1", base_url_env="OLLAMA_BASE_URL",
key_optional=True, placeholder_key="ollama"),
# Generic endpoint: user supplies base_url; key optional (keyless local).
"openai_compatible": ProviderSpec(require_base_url=True, key_optional=True),
}
def _resolve_provider_base_url(provider: str) -> Optional[str]:
"""Default base URL for ``provider``, with env-var overrides where defined.
Currently only Ollama supports an env-var override (``OLLAMA_BASE_URL``),
matching the convention in the broader Ollama tooling ecosystem so users
can point at a remote ollama-serve without editing code. The check is
call-time, not import-time, so tests that monkeypatch the env after
import behave correctly.
"""
if provider == "ollama":
env_url = os.environ.get("OLLAMA_BASE_URL")
if env_url:
return env_url
return _PROVIDER_BASE_URL.get(provider)
def is_openai_compatible(provider: str) -> bool:
"""Whether ``provider`` is served by the OpenAI-compatible registry."""
return provider.lower() in OPENAI_COMPATIBLE_PROVIDERS
class OpenAIClient(BaseLLMClient):
@@ -203,28 +221,47 @@ class OpenAIClient(BaseLLMClient):
self.provider = provider.lower()
def get_llm(self) -> Any:
"""Return configured ChatOpenAI instance."""
"""Return a configured ChatOpenAI instance, driven by the provider registry."""
self.warn_if_unknown_model()
llm_kwargs = {"model": self.model}
spec = OPENAI_COMPATIBLE_PROVIDERS.get(self.provider)
chat_cls = NormalizedChatOpenAI
# Provider-specific base URL and auth. An explicit base_url on the
# client (e.g. a corporate proxy) takes precedence over the
# provider default so users can route through their own gateway.
if self.provider in _PROVIDER_BASE_URL:
llm_kwargs["base_url"] = self.base_url or _resolve_provider_base_url(self.provider)
if spec is not None:
chat_cls = spec.chat_class
# base_url precedence: explicit client base_url (carries the config /
# TRADINGAGENTS_LLM_BACKEND_URL value) > provider env override (e.g.
# OLLAMA_BASE_URL) > provider default. None means use the SDK default.
env_base_url = os.environ.get(spec.base_url_env) if spec.base_url_env else None
base_url = self.base_url or env_base_url or spec.base_url
if spec.require_base_url and not base_url:
raise ValueError(
f"Provider '{self.provider}' requires a base_url. Set it via "
"backend_url / TRADINGAGENTS_LLM_BACKEND_URL to your endpoint, "
"e.g. http://localhost:8000/v1 (vLLM) or http://localhost:1234/v1 "
"(LM Studio)."
)
if base_url:
llm_kwargs["base_url"] = base_url
# API key: required unless key_optional; keyless local servers get a
# placeholder. The env-var name is the single source in api_key_env.
api_key_env = get_api_key_env(self.provider)
if api_key_env:
api_key = os.environ.get(api_key_env)
if api_key:
llm_kwargs["api_key"] = api_key
else:
raise ValueError(
f"API key for provider '{self.provider}' is not set. "
f"Please set the {api_key_env} environment variable "
f"(e.g. add {api_key_env}=your_key to your .env file)."
)
else:
llm_kwargs["api_key"] = "ollama"
api_key = os.environ.get(api_key_env) if api_key_env else None
if api_key:
llm_kwargs["api_key"] = api_key
elif spec.key_optional:
llm_kwargs["api_key"] = spec.placeholder_key
elif api_key_env:
raise ValueError(
f"API key for provider '{self.provider}' is not set. "
f"Please set the {api_key_env} environment variable "
f"(e.g. add {api_key_env}=your_key to your .env file)."
)
if spec.use_responses_api:
llm_kwargs["use_responses_api"] = True
elif self.base_url:
llm_kwargs["base_url"] = self.base_url
@@ -233,19 +270,7 @@ class OpenAIClient(BaseLLMClient):
if key in self.kwargs:
llm_kwargs[key] = self.kwargs[key]
# Native OpenAI: use Responses API for consistent behavior across
# all model families. Third-party providers use Chat Completions.
if self.provider == "openai":
llm_kwargs["use_responses_api"] = True
# Provider-specific quirks live in their own subclasses so the
# base NormalizedChatOpenAI stays free of provider branches.
if self.provider == "deepseek":
chat_cls = DeepSeekChatOpenAI
elif self.provider in ("minimax", "minimax-cn"):
chat_cls = MinimaxChatOpenAI
else:
chat_cls = NormalizedChatOpenAI
# The subclass (provider quirks) comes from the registry spec.
return chat_cls(**llm_kwargs)
def validate_model(self) -> bool:

View File

@@ -3,21 +3,25 @@
from .model_catalog import get_known_models
# Providers whose model names are user-defined (local servers, relays, any
# OpenAI-compatible endpoint), so any model string is accepted without warning.
_ANY_MODEL_PROVIDERS = ("ollama", "openrouter", "openai_compatible")
VALID_MODELS = {
provider: models
for provider, models in get_known_models().items()
if provider not in ("ollama", "openrouter")
if provider not in _ANY_MODEL_PROVIDERS
}
def validate_model(provider: str, model: str) -> bool:
"""Check if model name is valid for the given provider.
For ollama, openrouter - any model is accepted.
For ollama, openrouter, and openai_compatible - any model is accepted.
"""
provider_lower = provider.lower()
if provider_lower in ("ollama", "openrouter"):
if provider_lower in _ANY_MODEL_PROVIDERS:
return True
if provider_lower not in VALID_MODELS: