feat(llm): unify OpenAI-compatible providers behind a registry + generic endpoint

The OpenAI-compatible family (openai, xAI, DeepSeek, Qwen, GLM, MiniMax, OpenRouter, Ollama) all speak the same Chat Completions API and differ only by base_url, key, and two narrow wire-format quirks already isolated in subclasses. Replace the scattered base-URL dict, key handling, and client-class branches with one ProviderSpec registry that get_llm and the factory drive off; provider quirks stay in their subclasses. Add a generic "openai_compatible" provider for any OpenAI-compatible server (vLLM, LM Studio, llama.cpp, relays) via backend_url + optional key — adding a provider is now one registry row. Native Anthropic/Google keep their own clients (genuinely different APIs). Also fixes the env backend URL being ignored when the provider was chosen interactively (#978).
2026-06-16 21:06:15 +03:00 · 2026-06-14 03:22:24 +00:00
parent 4e7821d574
commit 20d3b0782f
11 changed files with 301 additions and 85 deletions
--- a/tradingagents/llm_clients/api_key_env.py
+++ b/tradingagents/llm_clients/api_key_env.py
@@ -32,6 +32,10 @@ PROVIDER_API_KEY_ENV: dict[str, Optional[str]] = {
    "openrouter": "OPENROUTER_API_KEY",
    # Local runtimes do not authenticate.
    "ollama":     None,
+    # Generic OpenAI-compatible endpoint: the client reads this when set (keyed
+    # relays), but it is marked key-optional in the provider registry so the CLI
+    # never forces a prompt and keyless local servers still work.
+    "openai_compatible": "OPENAI_COMPATIBLE_API_KEY",
 }


--- a/tradingagents/llm_clients/factory.py
+++ b/tradingagents/llm_clients/factory.py
@@ -2,16 +2,6 @@ from typing import Optional

 from .base_client import BaseLLMClient

-# Providers that use the OpenAI-compatible chat completions API
-_OPENAI_COMPATIBLE = (
-    "openai", "xai", "deepseek",
-    "qwen", "qwen-cn",
-    "glm", "glm-cn",
-    "minimax", "minimax-cn",
-    "ollama", "openrouter",
-)
-
-
 def create_llm_client(
    provider: str,
    model: str,
@@ -38,10 +28,9 @@ def create_llm_client(
    """
    provider_lower = provider.lower()

-    if provider_lower in _OPENAI_COMPATIBLE:
-        from .openai_client import OpenAIClient
-        return OpenAIClient(model, base_url, provider=provider_lower, **kwargs)
-
+    # Native (non-OpenAI) APIs are matched first so their string check doesn't
+    # import the OpenAI client. Everything else is OpenAI-compatible and routes
+    # through the provider registry (single source of truth).
    if provider_lower == "anthropic":
        from .anthropic_client import AnthropicClient
        return AnthropicClient(model, base_url, **kwargs)
@@ -54,4 +43,8 @@ def create_llm_client(
        from .azure_client import AzureOpenAIClient
        return AzureOpenAIClient(model, base_url, **kwargs)

+    from .openai_client import OpenAIClient, is_openai_compatible
+    if is_openai_compatible(provider_lower):
+        return OpenAIClient(model, base_url, provider=provider_lower, **kwargs)
+
    raise ValueError(f"Unsupported LLM provider: {provider}")
--- a/tradingagents/llm_clients/model_catalog.py
+++ b/tradingagents/llm_clients/model_catalog.py
@@ -175,6 +175,12 @@ MODEL_OPTIONS: ProviderModeOptions = {
            ("Custom model ID", "custom"),
        ],
    },
+    # Generic OpenAI-compatible endpoint: the model is whatever the user's
+    # server serves, so only "Custom model ID" is offered.
+    "openai_compatible": {
+        "quick": [("Custom model ID", "custom")],
+        "deep": [("Custom model ID", "custom")],
+    },
 }


--- a/tradingagents/llm_clients/openai_client.py
+++ b/tradingagents/llm_clients/openai_client.py
@@ -1,4 +1,5 @@
 import os
+from dataclasses import dataclass
 from typing import Any, Optional

 from langchain_core.messages import AIMessage
@@ -148,39 +149,56 @@ _PASSTHROUGH_KWARGS = (
    "api_key", "callbacks", "http_client", "http_async_client",
 )

-# Provider base URLs. API-key env vars live in api_key_env.PROVIDER_API_KEY_ENV
-# (one canonical mapping consulted by both this client and the CLI's
-# interactive key-prompt). Dual-region providers (qwen/glm/minimax) keep
-# separate endpoints because international and China accounts cannot share
-# credentials (#758).
-_PROVIDER_BASE_URL = {
-    "xai":        "https://api.x.ai/v1",
-    "deepseek":   "https://api.deepseek.com",
-    "qwen":       "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
-    "qwen-cn":    "https://dashscope.aliyuncs.com/compatible-mode/v1",
-    "glm":        "https://api.z.ai/api/paas/v4/",
-    "glm-cn":     "https://open.bigmodel.cn/api/paas/v4/",
-    "minimax":    "https://api.minimax.io/v1",
-    "minimax-cn": "https://api.minimaxi.com/v1",
-    "openrouter": "https://openrouter.ai/api/v1",
-    "ollama":     "http://localhost:11434/v1",
+@dataclass(frozen=True)
+class ProviderSpec:
+    """Declarative config for one OpenAI-compatible provider.
+
+    The OpenAI-compatible family (OpenAI, xAI, DeepSeek, Qwen, GLM, MiniMax,
+    OpenRouter, Ollama, and any user endpoint) all speak the same Chat
+    Completions API and differ only by these fields — so one row here replaces
+    the former per-provider base-URL dict, auth handling, and client-class
+    branches. Native Anthropic / Google use their own clients (genuinely
+    different APIs) and are intentionally NOT in this registry.
+
+    The API-key env var stays in ``api_key_env.PROVIDER_API_KEY_ENV`` (the single
+    source consulted by both this client and the CLI prompt); only behavior that
+    is provider-specific (base URL, key optionality, wire-format quirks via
+    ``chat_class``) lives here.
+    """
+
+    chat_class: type = NormalizedChatOpenAI   # provider quirks live in the subclass
+    base_url: Optional[str] = None            # default endpoint (None -> SDK default)
+    base_url_env: Optional[str] = None        # env var that overrides base_url (e.g. OLLAMA_BASE_URL)
+    key_optional: bool = False                # don't require/prompt; send a placeholder if unset
+    placeholder_key: str = "EMPTY"            # sent when no key is available (keyless local servers)
+    require_base_url: bool = False            # error if no base_url is resolved (generic endpoint)
+    use_responses_api: bool = False           # native OpenAI Responses API
+
+
+# Single source of truth for the OpenAI-compatible provider family. Dual-region
+# providers (qwen/glm/minimax) keep separate endpoints because international and
+# China accounts cannot share credentials (#758).
+OPENAI_COMPATIBLE_PROVIDERS: dict[str, ProviderSpec] = {
+    "openai":     ProviderSpec(use_responses_api=True),
+    "xai":        ProviderSpec(base_url="https://api.x.ai/v1"),
+    "deepseek":   ProviderSpec(base_url="https://api.deepseek.com", chat_class=DeepSeekChatOpenAI),
+    "qwen":       ProviderSpec(base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1"),
+    "qwen-cn":    ProviderSpec(base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"),
+    "glm":        ProviderSpec(base_url="https://api.z.ai/api/paas/v4/"),
+    "glm-cn":     ProviderSpec(base_url="https://open.bigmodel.cn/api/paas/v4/"),
+    "minimax":    ProviderSpec(base_url="https://api.minimax.io/v1", chat_class=MinimaxChatOpenAI),
+    "minimax-cn": ProviderSpec(base_url="https://api.minimaxi.com/v1", chat_class=MinimaxChatOpenAI),
+    "openrouter": ProviderSpec(base_url="https://openrouter.ai/api/v1"),
+    "ollama":     ProviderSpec(base_url="http://localhost:11434/v1", base_url_env="OLLAMA_BASE_URL",
+                               key_optional=True, placeholder_key="ollama"),
+    # Generic endpoint: user supplies base_url; key optional (keyless local).
+    "openai_compatible": ProviderSpec(require_base_url=True, key_optional=True),
 }


-def _resolve_provider_base_url(provider: str) -> Optional[str]:
-    """Default base URL for ``provider``, with env-var overrides where defined.
-
-    Currently only Ollama supports an env-var override (``OLLAMA_BASE_URL``),
-    matching the convention in the broader Ollama tooling ecosystem so users
-    can point at a remote ollama-serve without editing code. The check is
-    call-time, not import-time, so tests that monkeypatch the env after
-    import behave correctly.
-    """
-    if provider == "ollama":
-        env_url = os.environ.get("OLLAMA_BASE_URL")
-        if env_url:
-            return env_url
-    return _PROVIDER_BASE_URL.get(provider)
+def is_openai_compatible(provider: str) -> bool:
+    """Whether ``provider`` is served by the OpenAI-compatible registry."""
+    return provider.lower() in OPENAI_COMPATIBLE_PROVIDERS


 class OpenAIClient(BaseLLMClient):
@@ -203,28 +221,47 @@ class OpenAIClient(BaseLLMClient):
        self.provider = provider.lower()

    def get_llm(self) -> Any:
-        """Return configured ChatOpenAI instance."""
+        """Return a configured ChatOpenAI instance, driven by the provider registry."""
        self.warn_if_unknown_model()
        llm_kwargs = {"model": self.model}
+        spec = OPENAI_COMPATIBLE_PROVIDERS.get(self.provider)
+        chat_cls = NormalizedChatOpenAI

-        # Provider-specific base URL and auth. An explicit base_url on the
-        # client (e.g. a corporate proxy) takes precedence over the
-        # provider default so users can route through their own gateway.
-        if self.provider in _PROVIDER_BASE_URL:
-            llm_kwargs["base_url"] = self.base_url or _resolve_provider_base_url(self.provider)
+        if spec is not None:
+            chat_cls = spec.chat_class
+
+            # base_url precedence: explicit client base_url (carries the config /
+            # TRADINGAGENTS_LLM_BACKEND_URL value) > provider env override (e.g.
+            # OLLAMA_BASE_URL) > provider default. None means use the SDK default.
+            env_base_url = os.environ.get(spec.base_url_env) if spec.base_url_env else None
+            base_url = self.base_url or env_base_url or spec.base_url
+            if spec.require_base_url and not base_url:
+                raise ValueError(
+                    f"Provider '{self.provider}' requires a base_url. Set it via "
+                    "backend_url / TRADINGAGENTS_LLM_BACKEND_URL to your endpoint, "
+                    "e.g. http://localhost:8000/v1 (vLLM) or http://localhost:1234/v1 "
+                    "(LM Studio)."
+                )
+            if base_url:
+                llm_kwargs["base_url"] = base_url
+
+            # API key: required unless key_optional; keyless local servers get a
+            # placeholder. The env-var name is the single source in api_key_env.
            api_key_env = get_api_key_env(self.provider)
-            if api_key_env:
-                api_key = os.environ.get(api_key_env)
-                if api_key:
-                    llm_kwargs["api_key"] = api_key
-                else:
-                    raise ValueError(
-                        f"API key for provider '{self.provider}' is not set. "
-                        f"Please set the {api_key_env} environment variable "
-                        f"(e.g. add {api_key_env}=your_key to your .env file)."
-                    )
-            else:
-                llm_kwargs["api_key"] = "ollama"
+            api_key = os.environ.get(api_key_env) if api_key_env else None
+            if api_key:
+                llm_kwargs["api_key"] = api_key
+            elif spec.key_optional:
+                llm_kwargs["api_key"] = spec.placeholder_key
+            elif api_key_env:
+                raise ValueError(
+                    f"API key for provider '{self.provider}' is not set. "
+                    f"Please set the {api_key_env} environment variable "
+                    f"(e.g. add {api_key_env}=your_key to your .env file)."
+                )
+
+            if spec.use_responses_api:
+                llm_kwargs["use_responses_api"] = True
        elif self.base_url:
            llm_kwargs["base_url"] = self.base_url

@@ -233,19 +270,7 @@ class OpenAIClient(BaseLLMClient):
            if key in self.kwargs:
                llm_kwargs[key] = self.kwargs[key]

-        # Native OpenAI: use Responses API for consistent behavior across
-        # all model families. Third-party providers use Chat Completions.
-        if self.provider == "openai":
-            llm_kwargs["use_responses_api"] = True
-
-        # Provider-specific quirks live in their own subclasses so the
-        # base NormalizedChatOpenAI stays free of provider branches.
-        if self.provider == "deepseek":
-            chat_cls = DeepSeekChatOpenAI
-        elif self.provider in ("minimax", "minimax-cn"):
-            chat_cls = MinimaxChatOpenAI
-        else:
-            chat_cls = NormalizedChatOpenAI
+        # The subclass (provider quirks) comes from the registry spec.
        return chat_cls(**llm_kwargs)

    def validate_model(self) -> bool:
--- a/tradingagents/llm_clients/validators.py
+++ b/tradingagents/llm_clients/validators.py
@@ -3,21 +3,25 @@
 from .model_catalog import get_known_models


+# Providers whose model names are user-defined (local servers, relays, any
+# OpenAI-compatible endpoint), so any model string is accepted without warning.
+_ANY_MODEL_PROVIDERS = ("ollama", "openrouter", "openai_compatible")
+
 VALID_MODELS = {
    provider: models
    for provider, models in get_known_models().items()
-    if provider not in ("ollama", "openrouter")
+    if provider not in _ANY_MODEL_PROVIDERS
 }


 def validate_model(provider: str, model: str) -> bool:
    """Check if model name is valid for the given provider.

-    For ollama, openrouter - any model is accepted.
+    For ollama, openrouter, and openai_compatible - any model is accepted.
    """
    provider_lower = provider.lower()

-    if provider_lower in ("ollama", "openrouter"):
+    if provider_lower in _ANY_MODEL_PROVIDERS:
        return True

    if provider_lower not in VALID_MODELS: