feat(llm): unify OpenAI-compatible providers behind a registry + generic endpoint

The OpenAI-compatible family (openai, xAI, DeepSeek, Qwen, GLM, MiniMax,
OpenRouter, Ollama) all speak the same Chat Completions API and differ only by
base_url, key, and two narrow wire-format quirks already isolated in subclasses.
Replace the scattered base-URL dict, key handling, and client-class branches with
one ProviderSpec registry that get_llm and the factory drive off; provider quirks
stay in their subclasses. Add a generic "openai_compatible" provider for any
OpenAI-compatible server (vLLM, LM Studio, llama.cpp, relays) via backend_url +
optional key — adding a provider is now one registry row. Native Anthropic/Google
keep their own clients (genuinely different APIs). Also fixes the env backend URL
being ignored when the provider was chosen interactively (#978).
This commit is contained in:
Yijia-Xiao
2026-06-14 03:22:24 +00:00
parent 4e7821d574
commit 20d3b0782f
11 changed files with 301 additions and 85 deletions

View File

@@ -24,7 +24,7 @@ def _resync_reloaded_modules():
importlib.reload(cli.main)
# ---- openai_client side: _resolve_provider_base_url -----------------------
# ---- openai_client side: registry-driven base_url resolution --------------
def _reload_client():
@@ -32,16 +32,20 @@ def _reload_client():
return importlib.reload(mod)
def _base_url(mod, provider, **kwargs):
return str(mod.OpenAIClient(model="m", provider=provider, **kwargs).get_llm().openai_api_base)
def test_resolver_returns_default_when_env_unset(monkeypatch):
monkeypatch.delenv("OLLAMA_BASE_URL", raising=False)
mod = _reload_client()
assert mod._resolve_provider_base_url("ollama") == "http://localhost:11434/v1"
assert _base_url(mod, "ollama") == "http://localhost:11434/v1"
def test_resolver_returns_env_when_set(monkeypatch):
monkeypatch.setenv("OLLAMA_BASE_URL", "http://remote-ollama:11434/v1")
mod = _reload_client()
assert mod._resolve_provider_base_url("ollama") == "http://remote-ollama:11434/v1"
assert _base_url(mod, "ollama") == "http://remote-ollama:11434/v1"
def test_resolver_evaluation_is_call_time(monkeypatch):
@@ -49,15 +53,15 @@ def test_resolver_evaluation_is_call_time(monkeypatch):
monkeypatch.delenv("OLLAMA_BASE_URL", raising=False)
mod = _reload_client()
monkeypatch.setenv("OLLAMA_BASE_URL", "http://late-set:11434/v1")
assert mod._resolve_provider_base_url("ollama") == "http://late-set:11434/v1"
assert _base_url(mod, "ollama") == "http://late-set:11434/v1"
def test_resolver_does_not_affect_other_providers(monkeypatch):
"""OLLAMA_BASE_URL should NOT leak into xai/deepseek/etc."""
monkeypatch.setenv("OLLAMA_BASE_URL", "http://elsewhere/v1")
mod = _reload_client()
assert mod._resolve_provider_base_url("xai") == "https://api.x.ai/v1"
assert mod._resolve_provider_base_url("deepseek") == "https://api.deepseek.com"
assert _base_url(mod, "xai") == "https://api.x.ai/v1"
assert _base_url(mod, "deepseek") == "https://api.deepseek.com"
def test_client_get_llm_picks_up_env(monkeypatch):