From a420ad0f3b7d8f2ebb48afd85eb1632b62f7cbb0 Mon Sep 17 00:00:00 2001 From: Yijia-Xiao Date: Sun, 21 Jun 2026 21:03:05 +0000 Subject: [PATCH] fix(cli): honor env precedence for LLM and run config Interactive selections and flag defaults overrode TRADINGAGENTS_* env vars. Rule: an explicit env value or CLI flag wins; otherwise the env-applied default is kept. - Research depth: skip the prompt when both round-count env vars are set, and stop overwriting them (#977). - Checkpoint: --checkpoint/--no-checkpoint is tri-state; omitting it keeps TRADINGAGENTS_CHECKPOINT_ENABLED (#976). - Docker ollama: use TRADINGAGENTS_LLM_PROVIDER + OLLAMA_BASE_URL, not a bare LLM_PROVIDER the overlay never reads (#975). - Reasoning/thinking knobs: settable via env; the prompt is skipped when set. - Effort gating: forward effort only to models that accept it (Anthropic Opus 4.5+/Sonnet 4.6+, OpenAI reasoning models); drop it elsewhere. - Boolean env values: raise a named error on invalid input instead of silently becoming False. --- .env.example | 5 + cli/main.py | 119 ++++++++++++------ docker-compose.yml | 3 +- tests/test_anthropic_effort.py | 18 +-- tests/test_cli_config_precedence.py | 69 ++++++++++ tests/test_cli_env_skip.py | 63 ++++++++++ tests/test_env_overrides.py | 33 ++++- tests/test_openai_reasoning_effort.py | 42 +++++++ tradingagents/default_config.py | 31 ++++- tradingagents/llm_clients/anthropic_client.py | 19 ++- tradingagents/llm_clients/openai_client.py | 20 ++- 11 files changed, 363 insertions(+), 59 deletions(-) create mode 100644 tests/test_cli_config_precedence.py create mode 100644 tests/test_openai_reasoning_effort.py diff --git a/.env.example b/.env.example index 669b8962d..2174afb8e 100644 --- a/.env.example +++ b/.env.example @@ -55,3 +55,8 @@ NVIDIA_API_KEY= # honor it). Unset leaves each provider at its default. See the README # "Reproducibility" note — no setting makes LLM output fully deterministic. #TRADINGAGENTS_TEMPERATURE=0.0 +# Provider-specific reasoning/thinking depth (optional; unset = provider +# default). Setting one also skips the matching interactive prompt. +#TRADINGAGENTS_OPENAI_REASONING_EFFORT=medium +#TRADINGAGENTS_GOOGLE_THINKING_LEVEL=high +#TRADINGAGENTS_ANTHROPIC_EFFORT=high diff --git a/cli/main.py b/cli/main.py index df3d69554..047e31057 100644 --- a/cli/main.py +++ b/cli/main.py @@ -517,6 +517,20 @@ def get_user_selections(): box_content += f"\n[dim]Default: {default}[/dim]" return Panel(box_content, border_style="blue", padding=(1, 2)) + def thinking_value_or_prompt(env_var, config_key, label, box_title, box_body, prompt_fn): + """Return the env-configured reasoning/thinking value, or prompt for it. + + When ``env_var`` is set the interactive choice is skipped and the value + the env overlay placed on DEFAULT_CONFIG is used — mirroring the + env-precedence rule applied to the other selection steps. + """ + if os.environ.get(env_var): + value = DEFAULT_CONFIG[config_key] + console.print(f"[green]✓ {label} from environment:[/green] {value}") + return value + console.print(create_question_box(box_title, box_body)) + return prompt_fn() + # Step 1: Ticker symbol console.print( create_question_box( @@ -571,13 +585,27 @@ def get_user_selections(): f"[green]Selected analysts:[/green] {', '.join(analyst.value for analyst in selected_analysts)}" ) - # Step 5: Research depth - console.print( - create_question_box( - "Step 5: Research Depth", "Select your research depth level" - ) + # Step 5: Research depth (skipped when both round counts are set via env). + # Research depth maps to the debate + risk round counts; when both are + # supplied through TRADINGAGENTS_MAX_DEBATE_ROUNDS / _MAX_RISK_ROUNDS we keep + # the run non-interactive and honor the env values (#977). + depth_from_env = bool(os.environ.get("TRADINGAGENTS_MAX_DEBATE_ROUNDS")) and bool( + os.environ.get("TRADINGAGENTS_MAX_RISK_ROUNDS") ) - selected_research_depth = select_research_depth() + if depth_from_env: + selected_research_depth = DEFAULT_CONFIG["max_debate_rounds"] + console.print( + f"[green]✓ Research depth from environment:[/green] " + f"{DEFAULT_CONFIG['max_debate_rounds']} debate / " + f"{DEFAULT_CONFIG['max_risk_discuss_rounds']} risk rounds" + ) + else: + console.print( + create_question_box( + "Step 5: Research Depth", "Select your research depth level" + ) + ) + selected_research_depth = select_research_depth() # Step 6: LLM Provider (skipped when set via TRADINGAGENTS_LLM_PROVIDER). # The backend URL comes from TRADINGAGENTS_LLM_BACKEND_URL when set, @@ -649,43 +677,38 @@ def get_user_selections(): selected_shallow_thinker = select_shallow_thinking_agent(selected_llm_provider) selected_deep_thinker = select_deep_thinking_agent(selected_llm_provider) - # Step 8: Provider-specific thinking configuration + # Step 8: Provider-specific reasoning/thinking configuration. Each knob is + # settable via its TRADINGAGENTS_* env var; when that var is set (or the + # provider itself came from env) the prompt is skipped and the configured + # value is used — same env-precedence rule as the steps above. None = each + # provider's own default. thinking_level = None reasoning_effort = None anthropic_effort = None provider_lower = selected_llm_provider.lower() - # When the provider is configured via environment we keep the run fully - # non-interactive and use the config defaults (None = each provider's own - # default reasoning/thinking behavior) instead of prompting. if provider_from_env: thinking_level = DEFAULT_CONFIG["google_thinking_level"] reasoning_effort = DEFAULT_CONFIG["openai_reasoning_effort"] anthropic_effort = DEFAULT_CONFIG["anthropic_effort"] elif provider_lower == "google": - console.print( - create_question_box( - "Step 8: Thinking Mode", - "Configure Gemini thinking mode" - ) + thinking_level = thinking_value_or_prompt( + "TRADINGAGENTS_GOOGLE_THINKING_LEVEL", "google_thinking_level", + "Gemini thinking mode", "Step 8: Thinking Mode", + "Configure Gemini thinking mode", ask_gemini_thinking_config, ) - thinking_level = ask_gemini_thinking_config() elif provider_lower == "openai": - console.print( - create_question_box( - "Step 8: Reasoning Effort", - "Configure OpenAI reasoning effort level" - ) + reasoning_effort = thinking_value_or_prompt( + "TRADINGAGENTS_OPENAI_REASONING_EFFORT", "openai_reasoning_effort", + "Reasoning effort", "Step 8: Reasoning Effort", + "Configure OpenAI reasoning effort level", ask_openai_reasoning_effort, ) - reasoning_effort = ask_openai_reasoning_effort() elif provider_lower == "anthropic": - console.print( - create_question_box( - "Step 8: Effort Level", - "Configure Claude effort level" - ) + anthropic_effort = thinking_value_or_prompt( + "TRADINGAGENTS_ANTHROPIC_EFFORT", "anthropic_effort", + "Claude effort", "Step 8: Effort Level", + "Configure Claude effort level", ask_anthropic_effort, ) - anthropic_effort = ask_anthropic_effort() return { "ticker": selected_ticker, @@ -1019,14 +1042,20 @@ def format_tool_args(args, max_length=80) -> str: return result[:max_length - 3] + "..." return result -def run_analysis(checkpoint: bool = False): - # First get all user selections - selections = get_user_selections() +def _build_run_config(selections: dict, checkpoint: bool | None) -> dict: + """Assemble the run config from interactive selections, honoring env precedence. - # Create config with selected research depth + Round counts and checkpoint follow "explicit env/flag wins": an env-applied + value on DEFAULT_CONFIG is preserved unless the user overrode it on the CLI. + """ config = DEFAULT_CONFIG.copy() - config["max_debate_rounds"] = selections["research_depth"] - config["max_risk_discuss_rounds"] = selections["research_depth"] + # Research depth sets both round counts, but an explicit env override + # (TRADINGAGENTS_MAX_DEBATE_ROUNDS / _MAX_RISK_ROUNDS) wins over the + # interactive selection — leave the env-applied value in place (#977). + if not os.environ.get("TRADINGAGENTS_MAX_DEBATE_ROUNDS"): + config["max_debate_rounds"] = selections["research_depth"] + if not os.environ.get("TRADINGAGENTS_MAX_RISK_ROUNDS"): + config["max_risk_discuss_rounds"] = selections["research_depth"] config["quick_think_llm"] = selections["shallow_thinker"] config["deep_think_llm"] = selections["deep_thinker"] config["backend_url"] = selections["backend_url"] @@ -1036,7 +1065,18 @@ def run_analysis(checkpoint: bool = False): config["openai_reasoning_effort"] = selections.get("openai_reasoning_effort") config["anthropic_effort"] = selections.get("anthropic_effort") config["output_language"] = selections.get("output_language", "English") - config["checkpoint_enabled"] = checkpoint + # --checkpoint/--no-checkpoint overrides only when explicitly given; omitting + # the flag preserves TRADINGAGENTS_CHECKPOINT_ENABLED / the default (#976). + if checkpoint is not None: + config["checkpoint_enabled"] = checkpoint + return config + + +def run_analysis(checkpoint: bool | None = None): + # First get all user selections + selections = get_user_selections() + + config = _build_run_config(selections, checkpoint) # Create stats callback handler for tracking LLM/tool calls stats_handler = StatsCallbackHandler() @@ -1316,10 +1356,11 @@ def run_analysis(checkpoint: bool = False): @app.command() def analyze( - checkpoint: bool = typer.Option( - False, - "--checkpoint", - help="Enable checkpoint/resume: save state after each node so a crashed run can resume.", + checkpoint: bool | None = typer.Option( + None, + "--checkpoint/--no-checkpoint", + help="Enable/disable checkpoint-resume (save state after each node so a " + "crashed run can resume). Omit to honor TRADINGAGENTS_CHECKPOINT_ENABLED.", ), clear_checkpoints: bool = typer.Option( False, diff --git a/docker-compose.yml b/docker-compose.yml index d28135b3c..411c989c8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,7 +20,8 @@ services: env_file: - .env environment: - - LLM_PROVIDER=ollama + - TRADINGAGENTS_LLM_PROVIDER=ollama + - OLLAMA_BASE_URL=http://ollama:11434/v1 volumes: - tradingagents_data:/home/appuser/.tradingagents depends_on: diff --git a/tests/test_anthropic_effort.py b/tests/test_anthropic_effort.py index bdef7317f..7c4b1e5bf 100644 --- a/tests/test_anthropic_effort.py +++ b/tests/test_anthropic_effort.py @@ -1,9 +1,9 @@ """Tests for Anthropic effort-parameter gating (#831). -Haiku 4.5 (and current Haiku versions) reject the ``effort`` parameter -with a 400. Opus 4.5+ and Sonnet 4.5+ accept it. The gate uses a -forward-compat regex so future ``claude-{opus,sonnet}-X-Y`` releases -inherit support automatically. +Haiku (any version) and Sonnet 4.5 reject the ``effort`` parameter with a +400. Only Opus 4.5+ and Sonnet 4.6+ accept it. The gate uses a per-family +minimum version so future ``claude-{opus,sonnet}-X-Y`` releases inherit +support automatically. """ import pytest @@ -24,9 +24,13 @@ def _capture_kwargs(monkeypatch): class TestEffortGate: @pytest.mark.parametrize( "model", - ["claude-haiku-4-5", "claude-haiku-5-0", "claude-haiku-4-7-preview"], + [ + "claude-haiku-4-5", "claude-haiku-5-0", "claude-haiku-4-7-preview", + # Sonnet 4.5 (and earlier) 400 on effort — only Sonnet 4.6+ supports it. + "claude-sonnet-4-5", "claude-sonnet-4-0", + ], ) - def test_haiku_does_not_receive_effort(self, monkeypatch, model): + def test_unsupported_models_do_not_receive_effort(self, monkeypatch, model): captured = _capture_kwargs(monkeypatch) mod.AnthropicClient(model=model, effort="medium", api_key="x").get_llm() assert "effort" not in captured["kwargs"] @@ -35,7 +39,7 @@ class TestEffortGate: "model", [ "claude-opus-4-5", "claude-opus-4-6", "claude-opus-4-7", - "claude-sonnet-4-5", "claude-sonnet-4-6", + "claude-sonnet-4-6", ], ) def test_current_opus_and_sonnet_receive_effort(self, monkeypatch, model): diff --git a/tests/test_cli_config_precedence.py b/tests/test_cli_config_precedence.py new file mode 100644 index 000000000..e2ba90d14 --- /dev/null +++ b/tests/test_cli_config_precedence.py @@ -0,0 +1,69 @@ +"""CLI config precedence (#976, #977). + +An explicit environment override for the debate/risk round counts, or the +checkpoint flag, must win over the interactive research-depth selection — the CLI +must not clobber an env-configured value back to a prompt/flag default. +""" + +from unittest import mock + +import pytest + +import cli.main as m + +# Minimal selections dict shaped like get_user_selections()'s return value. +SELECTIONS = { + "research_depth": 5, + "shallow_thinker": "gpt-5.4-mini", + "deep_thinker": "gpt-5.5", + "backend_url": None, + "llm_provider": "openai", + "google_thinking_level": None, + "openai_reasoning_effort": None, + "anthropic_effort": None, + "output_language": "English", +} + + +def test_research_depth_sets_both_rounds_without_env(monkeypatch): + for var in ("TRADINGAGENTS_MAX_DEBATE_ROUNDS", "TRADINGAGENTS_MAX_RISK_ROUNDS"): + monkeypatch.delenv(var, raising=False) + cfg = m._build_run_config(SELECTIONS, checkpoint=None) + assert cfg["max_debate_rounds"] == 5 + assert cfg["max_risk_discuss_rounds"] == 5 + + +def test_env_round_counts_win_over_selection(monkeypatch): + monkeypatch.setenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", "2") + monkeypatch.setenv("TRADINGAGENTS_MAX_RISK_ROUNDS", "4") + # DEFAULT_CONFIG already reflects the env (applied at import); emulate that. + patched = dict(m.DEFAULT_CONFIG, max_debate_rounds=2, max_risk_discuss_rounds=4) + with mock.patch.object(m, "DEFAULT_CONFIG", patched): + cfg = m._build_run_config(SELECTIONS, checkpoint=None) + assert cfg["max_debate_rounds"] == 2 # env value, not research_depth=5 + assert cfg["max_risk_discuss_rounds"] == 4 + + +def test_partial_env_only_overrides_that_count(monkeypatch): + monkeypatch.setenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", "2") + monkeypatch.delenv("TRADINGAGENTS_MAX_RISK_ROUNDS", raising=False) + patched = dict(m.DEFAULT_CONFIG, max_debate_rounds=2) + with mock.patch.object(m, "DEFAULT_CONFIG", patched): + cfg = m._build_run_config(SELECTIONS, checkpoint=None) + assert cfg["max_debate_rounds"] == 2 # env wins + assert cfg["max_risk_discuss_rounds"] == 5 # falls through to research_depth + + +def test_checkpoint_none_preserves_env_default(): + patched = dict(m.DEFAULT_CONFIG, checkpoint_enabled=True) # e.g. env-enabled + with mock.patch.object(m, "DEFAULT_CONFIG", patched): + cfg = m._build_run_config(SELECTIONS, checkpoint=None) + assert cfg["checkpoint_enabled"] is True # not clobbered back to False + + +@pytest.mark.parametrize("flag", [True, False]) +def test_checkpoint_flag_overrides_env(flag): + patched = dict(m.DEFAULT_CONFIG, checkpoint_enabled=not flag) + with mock.patch.object(m, "DEFAULT_CONFIG", patched): + cfg = m._build_run_config(SELECTIONS, checkpoint=flag) + assert cfg["checkpoint_enabled"] is flag diff --git a/tests/test_cli_env_skip.py b/tests/test_cli_env_skip.py index b9497d74d..c98c24549 100644 --- a/tests/test_cli_env_skip.py +++ b/tests/test_cli_env_skip.py @@ -82,5 +82,68 @@ class TestCliSkipsPromptsFromEnv(unittest.TestCase): self.assertEqual(sel["output_language"], "Japanese") +@pytest.mark.unit +class TestResearchDepthSkippedFromEnv(unittest.TestCase): + def test_both_round_envs_skip_depth_prompt(self): + import cli.main as m + + env = { + "TRADINGAGENTS_MAX_DEBATE_ROUNDS": "2", + "TRADINGAGENTS_MAX_RISK_ROUNDS": "4", + } + fake_cfg = dict(m.DEFAULT_CONFIG) + fake_cfg.update({"max_debate_rounds": 2, "max_risk_discuss_rounds": 4}) + + with mock.patch.dict(os.environ, env, clear=False), \ + mock.patch.object(m, "DEFAULT_CONFIG", fake_cfg), \ + mock.patch.object(m, "fetch_announcements", return_value=None), \ + mock.patch.object(m, "display_announcements"), \ + mock.patch.object(m, "get_ticker", return_value="AAPL"), \ + mock.patch.object(m, "get_analysis_date", return_value="2026-05-29"), \ + mock.patch.object(m, "select_analysts", return_value=[]), \ + mock.patch.object(m, "select_research_depth") as prompt_depth, \ + mock.patch.object(m, "ensure_api_key"), \ + mock.patch.object(m, "select_llm_provider", return_value=("openai", None)), \ + mock.patch.object(m, "ask_output_language", return_value="English"), \ + mock.patch.object(m, "select_shallow_thinking_agent", return_value="gpt-5.4-mini"), \ + mock.patch.object(m, "select_deep_thinking_agent", return_value="gpt-5.5"), \ + mock.patch.object(m, "ask_openai_reasoning_effort", return_value=None): + sel = m.get_user_selections() + + # The research-depth prompt is skipped; the value comes from the env config. + prompt_depth.assert_not_called() + self.assertEqual(sel["research_depth"], 2) + + +@pytest.mark.unit +class TestReasoningEffortSkippedFromEnv(unittest.TestCase): + def test_effort_env_skips_step8_prompt(self): + import cli.main as m + + env = {"TRADINGAGENTS_OPENAI_REASONING_EFFORT": "high"} + fake_cfg = dict(m.DEFAULT_CONFIG) + fake_cfg.update({"openai_reasoning_effort": "high"}) + + with mock.patch.dict(os.environ, env, clear=False), \ + mock.patch.object(m, "DEFAULT_CONFIG", fake_cfg), \ + mock.patch.object(m, "fetch_announcements", return_value=None), \ + mock.patch.object(m, "display_announcements"), \ + mock.patch.object(m, "get_ticker", return_value="AAPL"), \ + mock.patch.object(m, "get_analysis_date", return_value="2026-05-29"), \ + mock.patch.object(m, "select_analysts", return_value=[]), \ + mock.patch.object(m, "select_research_depth", return_value=1), \ + mock.patch.object(m, "ensure_api_key"), \ + mock.patch.object(m, "select_llm_provider", return_value=("openai", None)), \ + mock.patch.object(m, "ask_output_language", return_value="English"), \ + mock.patch.object(m, "select_shallow_thinking_agent", return_value="gpt-5.4-mini"), \ + mock.patch.object(m, "select_deep_thinking_agent", return_value="gpt-5.5"), \ + mock.patch.object(m, "ask_openai_reasoning_effort") as prompt_effort: + sel = m.get_user_selections() + + # The reasoning-effort prompt is skipped; the value comes from env config. + prompt_effort.assert_not_called() + self.assertEqual(sel["openai_reasoning_effort"], "high") + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_env_overrides.py b/tests/test_env_overrides.py index f02661ebd..b9fd59cbf 100644 --- a/tests/test_env_overrides.py +++ b/tests/test_env_overrides.py @@ -68,6 +68,27 @@ def test_bool_coercion(monkeypatch, raw, expected): assert dc.DEFAULT_CONFIG["checkpoint_enabled"] is expected +def test_reasoning_thinking_overrides(monkeypatch): + """The provider reasoning/thinking knobs are env-configurable (non-interactive runs).""" + dc = _reload_with_env( + monkeypatch, + TRADINGAGENTS_OPENAI_REASONING_EFFORT="high", + TRADINGAGENTS_GOOGLE_THINKING_LEVEL="minimal", + TRADINGAGENTS_ANTHROPIC_EFFORT="low", + ) + assert dc.DEFAULT_CONFIG["openai_reasoning_effort"] == "high" + assert dc.DEFAULT_CONFIG["google_thinking_level"] == "minimal" + assert dc.DEFAULT_CONFIG["anthropic_effort"] == "low" + + +def test_reasoning_effort_defaults_to_none(monkeypatch): + """Unset reasoning/thinking knobs stay None so each provider uses its own default.""" + dc = _reload_with_env(monkeypatch) + assert dc.DEFAULT_CONFIG["openai_reasoning_effort"] is None + assert dc.DEFAULT_CONFIG["google_thinking_level"] is None + assert dc.DEFAULT_CONFIG["anthropic_effort"] is None + + def test_empty_env_value_is_passthrough(monkeypatch): """Empty TRADINGAGENTS_* values must not clobber the built-in default.""" dc = _reload_with_env( @@ -82,13 +103,23 @@ def test_empty_env_value_is_passthrough(monkeypatch): def test_invalid_int_raises(monkeypatch): """Garbage int values should surface a ValueError at import, not silently misconfigure.""" monkeypatch.setenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", "not-a-number") - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="TRADINGAGENTS_MAX_DEBATE_ROUNDS"): importlib.reload(default_config_module) # Restore module state for subsequent tests in this process monkeypatch.delenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", raising=False) importlib.reload(default_config_module) +@pytest.mark.parametrize("bad", ["treu", "flase", "maybe", "2", "enabled"]) +def test_invalid_bool_raises(monkeypatch, bad): + """A misspelled boolean must fail loudly (like ints) instead of silently False.""" + monkeypatch.setenv("TRADINGAGENTS_CHECKPOINT_ENABLED", bad) + with pytest.raises(ValueError, match="TRADINGAGENTS_CHECKPOINT_ENABLED"): + importlib.reload(default_config_module) + monkeypatch.delenv("TRADINGAGENTS_CHECKPOINT_ENABLED", raising=False) + importlib.reload(default_config_module) + + def test_unknown_env_var_is_ignored(monkeypatch): """Env vars outside _ENV_OVERRIDES must not bleed into DEFAULT_CONFIG.""" dc = _reload_with_env( diff --git a/tests/test_openai_reasoning_effort.py b/tests/test_openai_reasoning_effort.py new file mode 100644 index 000000000..f58a4e20c --- /dev/null +++ b/tests/test_openai_reasoning_effort.py @@ -0,0 +1,42 @@ +"""OpenAI ``reasoning_effort`` is gated to reasoning models. + +Non-reasoning OpenAI models (gpt-4.1, gpt-4o, ...) 400 with "Unsupported +parameter: 'reasoning.effort'". The client must drop the kwarg for those rather +than forward it and crash the run. The GPT-5 family and the o-series accept it. +""" + +import pytest + +from tradingagents.llm_clients.openai_client import ( + OpenAIClient, + _supports_reasoning_effort, +) + + +@pytest.mark.parametrize( + "model,expected", + [ + ("gpt-5.5", True), ("gpt-5.4", True), ("gpt-5.4-mini", True), + ("gpt-5.5-pro", True), ("o1", True), ("o3-mini", True), + ("gpt-4.1", False), ("gpt-4o", False), ("gpt-4o-mini", False), + ("gpt-3.5-turbo", False), + ], +) +def test_supports_reasoning_effort(model, expected): + assert _supports_reasoning_effort(model) is expected + + +def _effort_on(model, monkeypatch): + # A fake key lets get_llm() construct the client without a network call. + monkeypatch.setenv("OPENAI_API_KEY", "test-key") + llm = OpenAIClient(model, provider="openai", reasoning_effort="low").get_llm() + return getattr(llm, "reasoning_effort", None) + + +def test_reasoning_model_receives_effort(monkeypatch): + assert _effort_on("gpt-5.4-mini", monkeypatch) == "low" + + +def test_non_reasoning_model_drops_effort(monkeypatch): + # gpt-4.1 would 400 with reasoning_effort — it must be dropped. + assert _effort_on("gpt-4.1", monkeypatch) is None diff --git a/tradingagents/default_config.py b/tradingagents/default_config.py index 6601a38ac..3c5b09c1c 100644 --- a/tradingagents/default_config.py +++ b/tradingagents/default_config.py @@ -18,13 +18,35 @@ _ENV_OVERRIDES = { "TRADINGAGENTS_CHECKPOINT_ENABLED": "checkpoint_enabled", "TRADINGAGENTS_BENCHMARK_TICKER": "benchmark_ticker", "TRADINGAGENTS_TEMPERATURE": "temperature", + # Provider-specific reasoning/thinking knobs (None = each provider's own + # default). Settable here for non-interactive runs; the CLI also offers an + # interactive choice, which is skipped when the matching var is set. + "TRADINGAGENTS_GOOGLE_THINKING_LEVEL": "google_thinking_level", + "TRADINGAGENTS_OPENAI_REASONING_EFFORT": "openai_reasoning_effort", + "TRADINGAGENTS_ANTHROPIC_EFFORT": "anthropic_effort", } +_BOOL_TRUE = ("true", "1", "yes", "on") +_BOOL_FALSE = ("false", "0", "no", "off") + + def _coerce(value: str, reference): - """Coerce env-var string to the type of the existing default value.""" + """Coerce env-var string to the type of the existing default value. + + Invalid values raise ``ValueError`` rather than silently falling back to a + default — a misspelled boolean (e.g. ``treu``) or non-numeric int should fail + loudly at startup, not quietly misconfigure an unattended run. + """ if isinstance(reference, bool): - return value.strip().lower() in ("true", "1", "yes", "on") + normalized = value.strip().lower() + if normalized in _BOOL_TRUE: + return True + if normalized in _BOOL_FALSE: + return False + raise ValueError( + f"expected a boolean ({'/'.join(_BOOL_TRUE + _BOOL_FALSE)}), got {value!r}" + ) if isinstance(reference, int) and not isinstance(reference, bool): return int(value) if isinstance(reference, float): @@ -38,7 +60,10 @@ def _apply_env_overrides(config: dict) -> dict: raw = os.environ.get(env_var) if raw is None or raw == "": continue - config[key] = _coerce(raw, config.get(key)) + try: + config[key] = _coerce(raw, config.get(key)) + except ValueError as exc: + raise ValueError(f"Invalid value for {env_var}: {exc}") from exc return config diff --git a/tradingagents/llm_clients/anthropic_client.py b/tradingagents/llm_clients/anthropic_client.py index 8449ba680..880381e4f 100644 --- a/tradingagents/llm_clients/anthropic_client.py +++ b/tradingagents/llm_clients/anthropic_client.py @@ -12,20 +12,27 @@ _PASSTHROUGH_KWARGS = ( ) # Anthropic's extended-thinking ``effort`` parameter is accepted by Opus 4.5+ -# and Sonnet 4.5+ only. Haiku (any version shipped to date) 400s with -# ``"This model does not support the effort parameter"`` (#831). Future -# ``claude-{opus,sonnet}-X-Y`` releases inherit effort support via the -# forward-compat pattern below; future Haiku stays excluded by default. +# and Sonnet 4.6+ only. Sonnet 4.5 and any Haiku version 400 with +# ``"This model does not support the effort parameter"`` (#831). The per-family +# minimum version below is forward-compatible: future ``claude-{opus,sonnet}-X-Y`` +# releases inherit support automatically, while Sonnet 4.5 and Haiku stay excluded. _EFFORT_EXACT = { "claude-mythos-preview", # non-standard preview name; effort-capable } -_EFFORT_PATTERN = re.compile(r"^claude-(opus|sonnet)-\d+-\d+$") +_EFFORT_MODEL = re.compile(r"^claude-(opus|sonnet)-(\d+)-(\d+)$") +_EFFORT_MIN_VERSION = {"opus": (4, 5), "sonnet": (4, 6)} def _supports_effort(model: str) -> bool: """Whether Anthropic accepts the ``effort`` parameter for this model.""" model_lc = model.lower() - return model_lc in _EFFORT_EXACT or bool(_EFFORT_PATTERN.match(model_lc)) + if model_lc in _EFFORT_EXACT: + return True + match = _EFFORT_MODEL.match(model_lc) + if not match: + return False + family, major, minor = match.group(1), int(match.group(2)), int(match.group(3)) + return (major, minor) >= _EFFORT_MIN_VERSION[family] class NormalizedChatAnthropic(ChatAnthropic): diff --git a/tradingagents/llm_clients/openai_client.py b/tradingagents/llm_clients/openai_client.py index c818a9101..da65f58f9 100644 --- a/tradingagents/llm_clients/openai_client.py +++ b/tradingagents/llm_clients/openai_client.py @@ -1,4 +1,5 @@ import os +import re from dataclasses import dataclass from typing import Any from urllib.parse import urlparse @@ -150,6 +151,18 @@ _PASSTHROUGH_KWARGS = ( "api_key", "callbacks", "http_client", "http_async_client", ) +# OpenAI's ``reasoning_effort`` is only accepted by reasoning models — the GPT-5 +# family and the o-series. Non-reasoning models (gpt-4.1, gpt-4o, ...) 400 with +# "Unsupported parameter: 'reasoning.effort' is not supported with this model". +# Drop the kwarg for those rather than crash the run. +_OPENAI_REASONING_MODEL = re.compile(r"^(gpt-5|o[1-9])") + + +def _supports_reasoning_effort(model: str) -> bool: + """Whether the (native OpenAI) model accepts ``reasoning_effort``.""" + return bool(_OPENAI_REASONING_MODEL.match(model.lower().strip())) + + @dataclass(frozen=True) class ProviderSpec: """Declarative config for one OpenAI-compatible provider. @@ -291,8 +304,11 @@ class OpenAIClient(BaseLLMClient): # Forward user-provided kwargs for key in _PASSTHROUGH_KWARGS: - if key in self.kwargs: - llm_kwargs[key] = self.kwargs[key] + if key not in self.kwargs: + continue + if key == "reasoning_effort" and not _supports_reasoning_effort(self.model): + continue + llm_kwargs[key] = self.kwargs[key] # The subclass (provider quirks) comes from the registry spec. return chat_cls(**llm_kwargs)