feat(ollama): OLLAMA_BASE_URL end-to-end with endpoint confirmation

OLLAMA_BASE_URL now flows through both the CLI dropdown and the
programmatic client (call-time evaluation so tests behave). After
provider selection, the CLI prints the resolved endpoint and marks
when it came from the env var, plus a soft warning when the URL is
missing a scheme or non-default port. Drops the stale "(local)"
suffix from Ollama model labels since the endpoint is now dynamic.
This commit is contained in:
Yijia-Xiao
2026-05-11 08:46:21 +00:00
parent 879e2bb5da
commit f10daa2824
6 changed files with 230 additions and 8 deletions

View File

@@ -154,16 +154,21 @@ MODEL_OPTIONS: ProviderModeOptions = {
"minimax": _MINIMAX_MODELS,
"minimax-cn": _MINIMAX_MODELS,
# OpenRouter: fetched dynamically. Azure: any deployed model name.
# Ollama display labels intentionally omit a "local" marker — the
# endpoint is now configurable via OLLAMA_BASE_URL, so the same labels
# apply whether the user runs ollama-serve on localhost or against a
# remote host. The actual resolved endpoint is surfaced separately by
# cli.utils.confirm_ollama_endpoint() right after provider selection.
"ollama": {
"quick": [
("Qwen3:latest (8B, local)", "qwen3:latest"),
("GPT-OSS:latest (20B, local)", "gpt-oss:latest"),
("GLM-4.7-Flash:latest (30B, local)", "glm-4.7-flash:latest"),
("Qwen3:latest (8B)", "qwen3:latest"),
("GPT-OSS:latest (20B)", "gpt-oss:latest"),
("GLM-4.7-Flash:latest (30B)", "glm-4.7-flash:latest"),
],
"deep": [
("GLM-4.7-Flash:latest (30B, local)", "glm-4.7-flash:latest"),
("GPT-OSS:latest (20B, local)", "gpt-oss:latest"),
("Qwen3:latest (8B, local)", "qwen3:latest"),
("GLM-4.7-Flash:latest (30B)", "glm-4.7-flash:latest"),
("GPT-OSS:latest (20B)", "gpt-oss:latest"),
("Qwen3:latest (8B)", "qwen3:latest"),
],
},
}

View File

@@ -155,6 +155,22 @@ _PROVIDER_BASE_URL = {
}
def _resolve_provider_base_url(provider: str) -> Optional[str]:
"""Default base URL for ``provider``, with env-var overrides where defined.
Currently only Ollama supports an env-var override (``OLLAMA_BASE_URL``),
matching the convention in the broader Ollama tooling ecosystem so users
can point at a remote ollama-serve without editing code. The check is
call-time, not import-time, so tests that monkeypatch the env after
import behave correctly.
"""
if provider == "ollama":
env_url = os.environ.get("OLLAMA_BASE_URL")
if env_url:
return env_url
return _PROVIDER_BASE_URL.get(provider)
class OpenAIClient(BaseLLMClient):
"""Client for OpenAI, Ollama, OpenRouter, and xAI providers.
@@ -183,7 +199,7 @@ class OpenAIClient(BaseLLMClient):
# client (e.g. a corporate proxy) takes precedence over the
# provider default so users can route through their own gateway.
if self.provider in _PROVIDER_BASE_URL:
llm_kwargs["base_url"] = self.base_url or _PROVIDER_BASE_URL[self.provider]
llm_kwargs["base_url"] = self.base_url or _resolve_provider_base_url(self.provider)
api_key_env = get_api_key_env(self.provider)
if api_key_env:
api_key = os.environ.get(api_key_env)