feat(ollama): OLLAMA_BASE_URL end-to-end with endpoint confirmation

OLLAMA_BASE_URL now flows through both the CLI dropdown and the programmatic client (call-time evaluation so tests behave). After provider selection, the CLI prints the resolved endpoint and marks when it came from the env var, plus a soft warning when the URL is missing a scheme or non-default port. Drops the stale "(local)" suffix from Ollama model labels since the endpoint is now dynamic.
2026-06-16 21:06:15 +03:00 · 2026-05-11 08:46:21 +00:00
parent 879e2bb5da
commit f10daa2824
6 changed files with 230 additions and 8 deletions
--- a/tradingagents/llm_clients/model_catalog.py
+++ b/tradingagents/llm_clients/model_catalog.py
@@ -154,16 +154,21 @@ MODEL_OPTIONS: ProviderModeOptions = {
    "minimax": _MINIMAX_MODELS,
    "minimax-cn": _MINIMAX_MODELS,
    # OpenRouter: fetched dynamically. Azure: any deployed model name.
+    # Ollama display labels intentionally omit a "local" marker — the
+    # endpoint is now configurable via OLLAMA_BASE_URL, so the same labels
+    # apply whether the user runs ollama-serve on localhost or against a
+    # remote host. The actual resolved endpoint is surfaced separately by
+    # cli.utils.confirm_ollama_endpoint() right after provider selection.
    "ollama": {
        "quick": [
-            ("Qwen3:latest (8B, local)", "qwen3:latest"),
-            ("GPT-OSS:latest (20B, local)", "gpt-oss:latest"),
-            ("GLM-4.7-Flash:latest (30B, local)", "glm-4.7-flash:latest"),
+            ("Qwen3:latest (8B)", "qwen3:latest"),
+            ("GPT-OSS:latest (20B)", "gpt-oss:latest"),
+            ("GLM-4.7-Flash:latest (30B)", "glm-4.7-flash:latest"),
        ],
        "deep": [
-            ("GLM-4.7-Flash:latest (30B, local)", "glm-4.7-flash:latest"),
-            ("GPT-OSS:latest (20B, local)", "gpt-oss:latest"),
-            ("Qwen3:latest (8B, local)", "qwen3:latest"),
+            ("GLM-4.7-Flash:latest (30B)", "glm-4.7-flash:latest"),
+            ("GPT-OSS:latest (20B)", "gpt-oss:latest"),
+            ("Qwen3:latest (8B)", "qwen3:latest"),
        ],
    },
 }
--- a/tradingagents/llm_clients/openai_client.py
+++ b/tradingagents/llm_clients/openai_client.py
@@ -155,6 +155,22 @@ _PROVIDER_BASE_URL = {
 }


+def _resolve_provider_base_url(provider: str) -> Optional[str]:
+    """Default base URL for ``provider``, with env-var overrides where defined.
+
+    Currently only Ollama supports an env-var override (``OLLAMA_BASE_URL``),
+    matching the convention in the broader Ollama tooling ecosystem so users
+    can point at a remote ollama-serve without editing code. The check is
+    call-time, not import-time, so tests that monkeypatch the env after
+    import behave correctly.
+    """
+    if provider == "ollama":
+        env_url = os.environ.get("OLLAMA_BASE_URL")
+        if env_url:
+            return env_url
+    return _PROVIDER_BASE_URL.get(provider)
+
+
 class OpenAIClient(BaseLLMClient):
    """Client for OpenAI, Ollama, OpenRouter, and xAI providers.

@@ -183,7 +199,7 @@ class OpenAIClient(BaseLLMClient):
        # client (e.g. a corporate proxy) takes precedence over the
        # provider default so users can route through their own gateway.
        if self.provider in _PROVIDER_BASE_URL:
-            llm_kwargs["base_url"] = self.base_url or _PROVIDER_BASE_URL[self.provider]
+            llm_kwargs["base_url"] = self.base_url or _resolve_provider_base_url(self.provider)
            api_key_env = get_api_key_env(self.provider)
            if api_key_env:
                api_key = os.environ.get(api_key_env)