From 7bb16c5daa31eac602723526865d46b7cd35f04c Mon Sep 17 00:00:00 2001 From: Yijia-Xiao Date: Sun, 21 Jun 2026 21:03:05 +0000 Subject: [PATCH] chore(models): retire deprecated models, simplify thinking config Trim each provider to current-generation models and drop the special-casing they required: - OpenAI: remove gpt-4.1 (deprecated; the only non-reasoning model). - Anthropic: remove Claude Sonnet 4.5 (legacy; the only Sonnet that 400s on effort). - Google: remove the Gemini 2.5 line (superseded by 3.x). - Gemini client: drop the integer thinking_budget mapping; 3.x takes the string thinking_level directly. Effort/reasoning gates stay as defense in depth for custom model IDs. All kept IDs verified against live APIs. --- tests/test_google_api_key.py | 2 +- tests/test_google_thinking_level.py | 46 ++++++++++++++++++++++ tradingagents/llm_clients/google_client.py | 20 ++++------ tradingagents/llm_clients/model_catalog.py | 8 +--- 4 files changed, 55 insertions(+), 21 deletions(-) create mode 100644 tests/test_google_thinking_level.py diff --git a/tests/test_google_api_key.py b/tests/test_google_api_key.py index 53376ab10..9bad87cea 100644 --- a/tests/test_google_api_key.py +++ b/tests/test_google_api_key.py @@ -21,7 +21,7 @@ class TestGoogleApiKeyStandardization(unittest.TestCase): for msg, kwargs, expected_key in test_cases: with self.subTest(msg=msg): mock_chat.reset_mock() - client = GoogleClient("gemini-2.5-flash", **kwargs) + client = GoogleClient("gemini-3.5-flash", **kwargs) client.get_llm() call_kwargs = mock_chat.call_args[1] self.assertEqual(call_kwargs.get("google_api_key"), expected_key) diff --git a/tests/test_google_thinking_level.py b/tests/test_google_thinking_level.py new file mode 100644 index 000000000..f24ceff2f --- /dev/null +++ b/tests/test_google_thinking_level.py @@ -0,0 +1,46 @@ +"""Gemini thinking_level forwarding (Gemini 3.x). + +The catalog is Gemini 3.x only, which takes the string ``thinking_level`` +directly. Pro accepts low/high; Flash also accepts minimal/medium — an +unsupported "minimal" on Pro is mapped to "low". +""" + +from unittest import mock + +import pytest + +from tradingagents.llm_clients.google_client import GoogleClient + + +def _captured_kwargs(model, **kwargs): + captured = {} + with mock.patch.object( + __import__("tradingagents.llm_clients.google_client", fromlist=["x"]), + "NormalizedChatGoogleGenerativeAI", + lambda **kw: captured.setdefault("kw", kw), + ): + GoogleClient(model, api_key="x", **kwargs).get_llm() + return captured["kw"] + + +@pytest.mark.parametrize("level", ["minimal", "low", "medium", "high"]) +def test_flash_passes_thinking_level_through(level): + kw = _captured_kwargs("gemini-3.5-flash", thinking_level=level) + assert kw["thinking_level"] == level + assert "thinking_budget" not in kw # the 2.5-era param is gone + + +def test_pro_remaps_minimal_to_low(): + kw = _captured_kwargs("gemini-3.1-pro-preview", thinking_level="minimal") + assert kw["thinking_level"] == "low" # Pro doesn't accept "minimal" + + +def test_pro_keeps_high(): + kw = _captured_kwargs("gemini-3.1-pro-preview", thinking_level="high") + assert kw["thinking_level"] == "high" + + +def test_no_thinking_level_is_omitted(): + kw = _captured_kwargs("gemini-3.5-flash") + assert "thinking_level" not in kw + assert "thinking_budget" not in kw diff --git a/tradingagents/llm_clients/google_client.py b/tradingagents/llm_clients/google_client.py index df83b6cc8..93bb1d11a 100644 --- a/tradingagents/llm_clients/google_client.py +++ b/tradingagents/llm_clients/google_client.py @@ -40,21 +40,15 @@ class GoogleClient(BaseLLMClient): if google_api_key: llm_kwargs["google_api_key"] = google_api_key - # Map thinking_level to appropriate API param based on model - # Gemini 3 Pro: low, high - # Gemini 3 Flash: minimal, low, medium, high - # Gemini 2.5: thinking_budget (0=disable, -1=dynamic) + # Gemini 3.x takes the string ``thinking_level`` (the integer + # ``thinking_budget`` was for the now-retired 2.5 line). Pro accepts + # low/high; Flash also accepts minimal/medium — so map an unsupported + # "minimal" on Pro to the nearest level it does accept. thinking_level = self.kwargs.get("thinking_level") if thinking_level: - model_lower = self.model.lower() - if "gemini-3" in model_lower: - # Gemini 3 Pro doesn't support "minimal", use "low" instead - if "pro" in model_lower and thinking_level == "minimal": - thinking_level = "low" - llm_kwargs["thinking_level"] = thinking_level - else: - # Gemini 2.5: map to thinking_budget - llm_kwargs["thinking_budget"] = -1 if thinking_level == "high" else 0 + if "pro" in self.model.lower() and thinking_level == "minimal": + thinking_level = "low" + llm_kwargs["thinking_level"] = thinking_level return NormalizedChatGoogleGenerativeAI(**llm_kwargs) diff --git a/tradingagents/llm_clients/model_catalog.py b/tradingagents/llm_clients/model_catalog.py index 5dff0aa09..bcec44add 100644 --- a/tradingagents/llm_clients/model_catalog.py +++ b/tradingagents/llm_clients/model_catalog.py @@ -84,7 +84,6 @@ MODEL_OPTIONS: ProviderModeOptions = { ("GPT-5.4 Mini - Fast, strong coding and tool use", "gpt-5.4-mini"), ("GPT-5.4 Nano - Cheapest, high-volume tasks", "gpt-5.4-nano"), ("GPT-5.5 - Latest frontier, 1M context", "gpt-5.5"), - ("GPT-4.1 - Smartest non-reasoning model", "gpt-4.1"), ], "deep": [ ("GPT-5.5 - Latest frontier, 1M context", "gpt-5.5"), @@ -97,7 +96,6 @@ MODEL_OPTIONS: ProviderModeOptions = { "quick": [ ("Claude Sonnet 4.6 - Best speed and intelligence balance", "claude-sonnet-4-6"), ("Claude Haiku 4.5 - Fastest with near-frontier intelligence", "claude-haiku-4-5"), - ("Claude Sonnet 4.5 - High-performance for agents and coding", "claude-sonnet-4-5"), ], "deep": [ ("Claude Opus 4.8 - Latest frontier, agentic coding and reasoning", "claude-opus-4-8"), @@ -109,15 +107,11 @@ MODEL_OPTIONS: ProviderModeOptions = { "google": { "quick": [ ("Gemini 3.5 Flash - Latest, frontier agentic + coding (GA)", "gemini-3.5-flash"), - ("Gemini 3.1 Flash Lite - Most cost-efficient (GA)", "gemini-3.1-flash-lite"), - ("Gemini 2.5 Flash - Balanced, stable", "gemini-2.5-flash"), - ("Gemini 2.5 Flash Lite - Fast, low-cost", "gemini-2.5-flash-lite"), + ("Gemini 3.1 Flash Lite - Most cost-efficient", "gemini-3.1-flash-lite"), ], "deep": [ ("Gemini 3.1 Pro - Reasoning-first, complex workflows (preview)", "gemini-3.1-pro-preview"), ("Gemini 3.5 Flash - Latest GA, strong agentic + coding", "gemini-3.5-flash"), - ("Gemini 2.5 Pro - Stable pro model", "gemini-2.5-pro"), - ("Gemini 2.5 Flash - Balanced, stable", "gemini-2.5-flash"), ], }, "xai": {