mirror of
https://github.com/TauricResearch/TradingAgents.git
synced 2026-06-29 19:26:24 +03:00
fix(cli): honor env precedence for LLM and run config
Interactive selections and flag defaults overrode TRADINGAGENTS_* env vars. Rule: an explicit env value or CLI flag wins; otherwise the env-applied default is kept. - Research depth: skip the prompt when both round-count env vars are set, and stop overwriting them (#977). - Checkpoint: --checkpoint/--no-checkpoint is tri-state; omitting it keeps TRADINGAGENTS_CHECKPOINT_ENABLED (#976). - Docker ollama: use TRADINGAGENTS_LLM_PROVIDER + OLLAMA_BASE_URL, not a bare LLM_PROVIDER the overlay never reads (#975). - Reasoning/thinking knobs: settable via env; the prompt is skipped when set. - Effort gating: forward effort only to models that accept it (Anthropic Opus 4.5+/Sonnet 4.6+, OpenAI reasoning models); drop it elsewhere. - Boolean env values: raise a named error on invalid input instead of silently becoming False.
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
"""Tests for Anthropic effort-parameter gating (#831).
|
||||
|
||||
Haiku 4.5 (and current Haiku versions) reject the ``effort`` parameter
|
||||
with a 400. Opus 4.5+ and Sonnet 4.5+ accept it. The gate uses a
|
||||
forward-compat regex so future ``claude-{opus,sonnet}-X-Y`` releases
|
||||
inherit support automatically.
|
||||
Haiku (any version) and Sonnet 4.5 reject the ``effort`` parameter with a
|
||||
400. Only Opus 4.5+ and Sonnet 4.6+ accept it. The gate uses a per-family
|
||||
minimum version so future ``claude-{opus,sonnet}-X-Y`` releases inherit
|
||||
support automatically.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
@@ -24,9 +24,13 @@ def _capture_kwargs(monkeypatch):
|
||||
class TestEffortGate:
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
["claude-haiku-4-5", "claude-haiku-5-0", "claude-haiku-4-7-preview"],
|
||||
[
|
||||
"claude-haiku-4-5", "claude-haiku-5-0", "claude-haiku-4-7-preview",
|
||||
# Sonnet 4.5 (and earlier) 400 on effort — only Sonnet 4.6+ supports it.
|
||||
"claude-sonnet-4-5", "claude-sonnet-4-0",
|
||||
],
|
||||
)
|
||||
def test_haiku_does_not_receive_effort(self, monkeypatch, model):
|
||||
def test_unsupported_models_do_not_receive_effort(self, monkeypatch, model):
|
||||
captured = _capture_kwargs(monkeypatch)
|
||||
mod.AnthropicClient(model=model, effort="medium", api_key="x").get_llm()
|
||||
assert "effort" not in captured["kwargs"]
|
||||
@@ -35,7 +39,7 @@ class TestEffortGate:
|
||||
"model",
|
||||
[
|
||||
"claude-opus-4-5", "claude-opus-4-6", "claude-opus-4-7",
|
||||
"claude-sonnet-4-5", "claude-sonnet-4-6",
|
||||
"claude-sonnet-4-6",
|
||||
],
|
||||
)
|
||||
def test_current_opus_and_sonnet_receive_effort(self, monkeypatch, model):
|
||||
|
||||
69
tests/test_cli_config_precedence.py
Normal file
69
tests/test_cli_config_precedence.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""CLI config precedence (#976, #977).
|
||||
|
||||
An explicit environment override for the debate/risk round counts, or the
|
||||
checkpoint flag, must win over the interactive research-depth selection — the CLI
|
||||
must not clobber an env-configured value back to a prompt/flag default.
|
||||
"""
|
||||
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
import cli.main as m
|
||||
|
||||
# Minimal selections dict shaped like get_user_selections()'s return value.
|
||||
SELECTIONS = {
|
||||
"research_depth": 5,
|
||||
"shallow_thinker": "gpt-5.4-mini",
|
||||
"deep_thinker": "gpt-5.5",
|
||||
"backend_url": None,
|
||||
"llm_provider": "openai",
|
||||
"google_thinking_level": None,
|
||||
"openai_reasoning_effort": None,
|
||||
"anthropic_effort": None,
|
||||
"output_language": "English",
|
||||
}
|
||||
|
||||
|
||||
def test_research_depth_sets_both_rounds_without_env(monkeypatch):
|
||||
for var in ("TRADINGAGENTS_MAX_DEBATE_ROUNDS", "TRADINGAGENTS_MAX_RISK_ROUNDS"):
|
||||
monkeypatch.delenv(var, raising=False)
|
||||
cfg = m._build_run_config(SELECTIONS, checkpoint=None)
|
||||
assert cfg["max_debate_rounds"] == 5
|
||||
assert cfg["max_risk_discuss_rounds"] == 5
|
||||
|
||||
|
||||
def test_env_round_counts_win_over_selection(monkeypatch):
|
||||
monkeypatch.setenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", "2")
|
||||
monkeypatch.setenv("TRADINGAGENTS_MAX_RISK_ROUNDS", "4")
|
||||
# DEFAULT_CONFIG already reflects the env (applied at import); emulate that.
|
||||
patched = dict(m.DEFAULT_CONFIG, max_debate_rounds=2, max_risk_discuss_rounds=4)
|
||||
with mock.patch.object(m, "DEFAULT_CONFIG", patched):
|
||||
cfg = m._build_run_config(SELECTIONS, checkpoint=None)
|
||||
assert cfg["max_debate_rounds"] == 2 # env value, not research_depth=5
|
||||
assert cfg["max_risk_discuss_rounds"] == 4
|
||||
|
||||
|
||||
def test_partial_env_only_overrides_that_count(monkeypatch):
|
||||
monkeypatch.setenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", "2")
|
||||
monkeypatch.delenv("TRADINGAGENTS_MAX_RISK_ROUNDS", raising=False)
|
||||
patched = dict(m.DEFAULT_CONFIG, max_debate_rounds=2)
|
||||
with mock.patch.object(m, "DEFAULT_CONFIG", patched):
|
||||
cfg = m._build_run_config(SELECTIONS, checkpoint=None)
|
||||
assert cfg["max_debate_rounds"] == 2 # env wins
|
||||
assert cfg["max_risk_discuss_rounds"] == 5 # falls through to research_depth
|
||||
|
||||
|
||||
def test_checkpoint_none_preserves_env_default():
|
||||
patched = dict(m.DEFAULT_CONFIG, checkpoint_enabled=True) # e.g. env-enabled
|
||||
with mock.patch.object(m, "DEFAULT_CONFIG", patched):
|
||||
cfg = m._build_run_config(SELECTIONS, checkpoint=None)
|
||||
assert cfg["checkpoint_enabled"] is True # not clobbered back to False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("flag", [True, False])
|
||||
def test_checkpoint_flag_overrides_env(flag):
|
||||
patched = dict(m.DEFAULT_CONFIG, checkpoint_enabled=not flag)
|
||||
with mock.patch.object(m, "DEFAULT_CONFIG", patched):
|
||||
cfg = m._build_run_config(SELECTIONS, checkpoint=flag)
|
||||
assert cfg["checkpoint_enabled"] is flag
|
||||
@@ -82,5 +82,68 @@ class TestCliSkipsPromptsFromEnv(unittest.TestCase):
|
||||
self.assertEqual(sel["output_language"], "Japanese")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestResearchDepthSkippedFromEnv(unittest.TestCase):
|
||||
def test_both_round_envs_skip_depth_prompt(self):
|
||||
import cli.main as m
|
||||
|
||||
env = {
|
||||
"TRADINGAGENTS_MAX_DEBATE_ROUNDS": "2",
|
||||
"TRADINGAGENTS_MAX_RISK_ROUNDS": "4",
|
||||
}
|
||||
fake_cfg = dict(m.DEFAULT_CONFIG)
|
||||
fake_cfg.update({"max_debate_rounds": 2, "max_risk_discuss_rounds": 4})
|
||||
|
||||
with mock.patch.dict(os.environ, env, clear=False), \
|
||||
mock.patch.object(m, "DEFAULT_CONFIG", fake_cfg), \
|
||||
mock.patch.object(m, "fetch_announcements", return_value=None), \
|
||||
mock.patch.object(m, "display_announcements"), \
|
||||
mock.patch.object(m, "get_ticker", return_value="AAPL"), \
|
||||
mock.patch.object(m, "get_analysis_date", return_value="2026-05-29"), \
|
||||
mock.patch.object(m, "select_analysts", return_value=[]), \
|
||||
mock.patch.object(m, "select_research_depth") as prompt_depth, \
|
||||
mock.patch.object(m, "ensure_api_key"), \
|
||||
mock.patch.object(m, "select_llm_provider", return_value=("openai", None)), \
|
||||
mock.patch.object(m, "ask_output_language", return_value="English"), \
|
||||
mock.patch.object(m, "select_shallow_thinking_agent", return_value="gpt-5.4-mini"), \
|
||||
mock.patch.object(m, "select_deep_thinking_agent", return_value="gpt-5.5"), \
|
||||
mock.patch.object(m, "ask_openai_reasoning_effort", return_value=None):
|
||||
sel = m.get_user_selections()
|
||||
|
||||
# The research-depth prompt is skipped; the value comes from the env config.
|
||||
prompt_depth.assert_not_called()
|
||||
self.assertEqual(sel["research_depth"], 2)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestReasoningEffortSkippedFromEnv(unittest.TestCase):
|
||||
def test_effort_env_skips_step8_prompt(self):
|
||||
import cli.main as m
|
||||
|
||||
env = {"TRADINGAGENTS_OPENAI_REASONING_EFFORT": "high"}
|
||||
fake_cfg = dict(m.DEFAULT_CONFIG)
|
||||
fake_cfg.update({"openai_reasoning_effort": "high"})
|
||||
|
||||
with mock.patch.dict(os.environ, env, clear=False), \
|
||||
mock.patch.object(m, "DEFAULT_CONFIG", fake_cfg), \
|
||||
mock.patch.object(m, "fetch_announcements", return_value=None), \
|
||||
mock.patch.object(m, "display_announcements"), \
|
||||
mock.patch.object(m, "get_ticker", return_value="AAPL"), \
|
||||
mock.patch.object(m, "get_analysis_date", return_value="2026-05-29"), \
|
||||
mock.patch.object(m, "select_analysts", return_value=[]), \
|
||||
mock.patch.object(m, "select_research_depth", return_value=1), \
|
||||
mock.patch.object(m, "ensure_api_key"), \
|
||||
mock.patch.object(m, "select_llm_provider", return_value=("openai", None)), \
|
||||
mock.patch.object(m, "ask_output_language", return_value="English"), \
|
||||
mock.patch.object(m, "select_shallow_thinking_agent", return_value="gpt-5.4-mini"), \
|
||||
mock.patch.object(m, "select_deep_thinking_agent", return_value="gpt-5.5"), \
|
||||
mock.patch.object(m, "ask_openai_reasoning_effort") as prompt_effort:
|
||||
sel = m.get_user_selections()
|
||||
|
||||
# The reasoning-effort prompt is skipped; the value comes from env config.
|
||||
prompt_effort.assert_not_called()
|
||||
self.assertEqual(sel["openai_reasoning_effort"], "high")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -68,6 +68,27 @@ def test_bool_coercion(monkeypatch, raw, expected):
|
||||
assert dc.DEFAULT_CONFIG["checkpoint_enabled"] is expected
|
||||
|
||||
|
||||
def test_reasoning_thinking_overrides(monkeypatch):
|
||||
"""The provider reasoning/thinking knobs are env-configurable (non-interactive runs)."""
|
||||
dc = _reload_with_env(
|
||||
monkeypatch,
|
||||
TRADINGAGENTS_OPENAI_REASONING_EFFORT="high",
|
||||
TRADINGAGENTS_GOOGLE_THINKING_LEVEL="minimal",
|
||||
TRADINGAGENTS_ANTHROPIC_EFFORT="low",
|
||||
)
|
||||
assert dc.DEFAULT_CONFIG["openai_reasoning_effort"] == "high"
|
||||
assert dc.DEFAULT_CONFIG["google_thinking_level"] == "minimal"
|
||||
assert dc.DEFAULT_CONFIG["anthropic_effort"] == "low"
|
||||
|
||||
|
||||
def test_reasoning_effort_defaults_to_none(monkeypatch):
|
||||
"""Unset reasoning/thinking knobs stay None so each provider uses its own default."""
|
||||
dc = _reload_with_env(monkeypatch)
|
||||
assert dc.DEFAULT_CONFIG["openai_reasoning_effort"] is None
|
||||
assert dc.DEFAULT_CONFIG["google_thinking_level"] is None
|
||||
assert dc.DEFAULT_CONFIG["anthropic_effort"] is None
|
||||
|
||||
|
||||
def test_empty_env_value_is_passthrough(monkeypatch):
|
||||
"""Empty TRADINGAGENTS_* values must not clobber the built-in default."""
|
||||
dc = _reload_with_env(
|
||||
@@ -82,13 +103,23 @@ def test_empty_env_value_is_passthrough(monkeypatch):
|
||||
def test_invalid_int_raises(monkeypatch):
|
||||
"""Garbage int values should surface a ValueError at import, not silently misconfigure."""
|
||||
monkeypatch.setenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", "not-a-number")
|
||||
with pytest.raises(ValueError):
|
||||
with pytest.raises(ValueError, match="TRADINGAGENTS_MAX_DEBATE_ROUNDS"):
|
||||
importlib.reload(default_config_module)
|
||||
# Restore module state for subsequent tests in this process
|
||||
monkeypatch.delenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", raising=False)
|
||||
importlib.reload(default_config_module)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bad", ["treu", "flase", "maybe", "2", "enabled"])
|
||||
def test_invalid_bool_raises(monkeypatch, bad):
|
||||
"""A misspelled boolean must fail loudly (like ints) instead of silently False."""
|
||||
monkeypatch.setenv("TRADINGAGENTS_CHECKPOINT_ENABLED", bad)
|
||||
with pytest.raises(ValueError, match="TRADINGAGENTS_CHECKPOINT_ENABLED"):
|
||||
importlib.reload(default_config_module)
|
||||
monkeypatch.delenv("TRADINGAGENTS_CHECKPOINT_ENABLED", raising=False)
|
||||
importlib.reload(default_config_module)
|
||||
|
||||
|
||||
def test_unknown_env_var_is_ignored(monkeypatch):
|
||||
"""Env vars outside _ENV_OVERRIDES must not bleed into DEFAULT_CONFIG."""
|
||||
dc = _reload_with_env(
|
||||
|
||||
42
tests/test_openai_reasoning_effort.py
Normal file
42
tests/test_openai_reasoning_effort.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""OpenAI ``reasoning_effort`` is gated to reasoning models.
|
||||
|
||||
Non-reasoning OpenAI models (gpt-4.1, gpt-4o, ...) 400 with "Unsupported
|
||||
parameter: 'reasoning.effort'". The client must drop the kwarg for those rather
|
||||
than forward it and crash the run. The GPT-5 family and the o-series accept it.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from tradingagents.llm_clients.openai_client import (
|
||||
OpenAIClient,
|
||||
_supports_reasoning_effort,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model,expected",
|
||||
[
|
||||
("gpt-5.5", True), ("gpt-5.4", True), ("gpt-5.4-mini", True),
|
||||
("gpt-5.5-pro", True), ("o1", True), ("o3-mini", True),
|
||||
("gpt-4.1", False), ("gpt-4o", False), ("gpt-4o-mini", False),
|
||||
("gpt-3.5-turbo", False),
|
||||
],
|
||||
)
|
||||
def test_supports_reasoning_effort(model, expected):
|
||||
assert _supports_reasoning_effort(model) is expected
|
||||
|
||||
|
||||
def _effort_on(model, monkeypatch):
|
||||
# A fake key lets get_llm() construct the client without a network call.
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
||||
llm = OpenAIClient(model, provider="openai", reasoning_effort="low").get_llm()
|
||||
return getattr(llm, "reasoning_effort", None)
|
||||
|
||||
|
||||
def test_reasoning_model_receives_effort(monkeypatch):
|
||||
assert _effort_on("gpt-5.4-mini", monkeypatch) == "low"
|
||||
|
||||
|
||||
def test_non_reasoning_model_drops_effort(monkeypatch):
|
||||
# gpt-4.1 would 400 with reasoning_effort — it must be dropped.
|
||||
assert _effort_on("gpt-4.1", monkeypatch) is None
|
||||
Reference in New Issue
Block a user