mirror of
https://github.com/TauricResearch/TradingAgents.git
synced 2026-06-30 11:44:18 +03:00
fix(cli): honor env precedence for LLM and run config
Interactive selections and flag defaults overrode TRADINGAGENTS_* env vars. Rule: an explicit env value or CLI flag wins; otherwise the env-applied default is kept. - Research depth: skip the prompt when both round-count env vars are set, and stop overwriting them (#977). - Checkpoint: --checkpoint/--no-checkpoint is tri-state; omitting it keeps TRADINGAGENTS_CHECKPOINT_ENABLED (#976). - Docker ollama: use TRADINGAGENTS_LLM_PROVIDER + OLLAMA_BASE_URL, not a bare LLM_PROVIDER the overlay never reads (#975). - Reasoning/thinking knobs: settable via env; the prompt is skipped when set. - Effort gating: forward effort only to models that accept it (Anthropic Opus 4.5+/Sonnet 4.6+, OpenAI reasoning models); drop it elsewhere. - Boolean env values: raise a named error on invalid input instead of silently becoming False.
This commit is contained in:
@@ -55,3 +55,8 @@ NVIDIA_API_KEY=
|
|||||||
# honor it). Unset leaves each provider at its default. See the README
|
# honor it). Unset leaves each provider at its default. See the README
|
||||||
# "Reproducibility" note — no setting makes LLM output fully deterministic.
|
# "Reproducibility" note — no setting makes LLM output fully deterministic.
|
||||||
#TRADINGAGENTS_TEMPERATURE=0.0
|
#TRADINGAGENTS_TEMPERATURE=0.0
|
||||||
|
# Provider-specific reasoning/thinking depth (optional; unset = provider
|
||||||
|
# default). Setting one also skips the matching interactive prompt.
|
||||||
|
#TRADINGAGENTS_OPENAI_REASONING_EFFORT=medium
|
||||||
|
#TRADINGAGENTS_GOOGLE_THINKING_LEVEL=high
|
||||||
|
#TRADINGAGENTS_ANTHROPIC_EFFORT=high
|
||||||
|
|||||||
119
cli/main.py
119
cli/main.py
@@ -517,6 +517,20 @@ def get_user_selections():
|
|||||||
box_content += f"\n[dim]Default: {default}[/dim]"
|
box_content += f"\n[dim]Default: {default}[/dim]"
|
||||||
return Panel(box_content, border_style="blue", padding=(1, 2))
|
return Panel(box_content, border_style="blue", padding=(1, 2))
|
||||||
|
|
||||||
|
def thinking_value_or_prompt(env_var, config_key, label, box_title, box_body, prompt_fn):
|
||||||
|
"""Return the env-configured reasoning/thinking value, or prompt for it.
|
||||||
|
|
||||||
|
When ``env_var`` is set the interactive choice is skipped and the value
|
||||||
|
the env overlay placed on DEFAULT_CONFIG is used — mirroring the
|
||||||
|
env-precedence rule applied to the other selection steps.
|
||||||
|
"""
|
||||||
|
if os.environ.get(env_var):
|
||||||
|
value = DEFAULT_CONFIG[config_key]
|
||||||
|
console.print(f"[green]✓ {label} from environment:[/green] {value}")
|
||||||
|
return value
|
||||||
|
console.print(create_question_box(box_title, box_body))
|
||||||
|
return prompt_fn()
|
||||||
|
|
||||||
# Step 1: Ticker symbol
|
# Step 1: Ticker symbol
|
||||||
console.print(
|
console.print(
|
||||||
create_question_box(
|
create_question_box(
|
||||||
@@ -571,13 +585,27 @@ def get_user_selections():
|
|||||||
f"[green]Selected analysts:[/green] {', '.join(analyst.value for analyst in selected_analysts)}"
|
f"[green]Selected analysts:[/green] {', '.join(analyst.value for analyst in selected_analysts)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Step 5: Research depth
|
# Step 5: Research depth (skipped when both round counts are set via env).
|
||||||
console.print(
|
# Research depth maps to the debate + risk round counts; when both are
|
||||||
create_question_box(
|
# supplied through TRADINGAGENTS_MAX_DEBATE_ROUNDS / _MAX_RISK_ROUNDS we keep
|
||||||
"Step 5: Research Depth", "Select your research depth level"
|
# the run non-interactive and honor the env values (#977).
|
||||||
)
|
depth_from_env = bool(os.environ.get("TRADINGAGENTS_MAX_DEBATE_ROUNDS")) and bool(
|
||||||
|
os.environ.get("TRADINGAGENTS_MAX_RISK_ROUNDS")
|
||||||
)
|
)
|
||||||
selected_research_depth = select_research_depth()
|
if depth_from_env:
|
||||||
|
selected_research_depth = DEFAULT_CONFIG["max_debate_rounds"]
|
||||||
|
console.print(
|
||||||
|
f"[green]✓ Research depth from environment:[/green] "
|
||||||
|
f"{DEFAULT_CONFIG['max_debate_rounds']} debate / "
|
||||||
|
f"{DEFAULT_CONFIG['max_risk_discuss_rounds']} risk rounds"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
console.print(
|
||||||
|
create_question_box(
|
||||||
|
"Step 5: Research Depth", "Select your research depth level"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
selected_research_depth = select_research_depth()
|
||||||
|
|
||||||
# Step 6: LLM Provider (skipped when set via TRADINGAGENTS_LLM_PROVIDER).
|
# Step 6: LLM Provider (skipped when set via TRADINGAGENTS_LLM_PROVIDER).
|
||||||
# The backend URL comes from TRADINGAGENTS_LLM_BACKEND_URL when set,
|
# The backend URL comes from TRADINGAGENTS_LLM_BACKEND_URL when set,
|
||||||
@@ -649,43 +677,38 @@ def get_user_selections():
|
|||||||
selected_shallow_thinker = select_shallow_thinking_agent(selected_llm_provider)
|
selected_shallow_thinker = select_shallow_thinking_agent(selected_llm_provider)
|
||||||
selected_deep_thinker = select_deep_thinking_agent(selected_llm_provider)
|
selected_deep_thinker = select_deep_thinking_agent(selected_llm_provider)
|
||||||
|
|
||||||
# Step 8: Provider-specific thinking configuration
|
# Step 8: Provider-specific reasoning/thinking configuration. Each knob is
|
||||||
|
# settable via its TRADINGAGENTS_* env var; when that var is set (or the
|
||||||
|
# provider itself came from env) the prompt is skipped and the configured
|
||||||
|
# value is used — same env-precedence rule as the steps above. None = each
|
||||||
|
# provider's own default.
|
||||||
thinking_level = None
|
thinking_level = None
|
||||||
reasoning_effort = None
|
reasoning_effort = None
|
||||||
anthropic_effort = None
|
anthropic_effort = None
|
||||||
|
|
||||||
provider_lower = selected_llm_provider.lower()
|
provider_lower = selected_llm_provider.lower()
|
||||||
# When the provider is configured via environment we keep the run fully
|
|
||||||
# non-interactive and use the config defaults (None = each provider's own
|
|
||||||
# default reasoning/thinking behavior) instead of prompting.
|
|
||||||
if provider_from_env:
|
if provider_from_env:
|
||||||
thinking_level = DEFAULT_CONFIG["google_thinking_level"]
|
thinking_level = DEFAULT_CONFIG["google_thinking_level"]
|
||||||
reasoning_effort = DEFAULT_CONFIG["openai_reasoning_effort"]
|
reasoning_effort = DEFAULT_CONFIG["openai_reasoning_effort"]
|
||||||
anthropic_effort = DEFAULT_CONFIG["anthropic_effort"]
|
anthropic_effort = DEFAULT_CONFIG["anthropic_effort"]
|
||||||
elif provider_lower == "google":
|
elif provider_lower == "google":
|
||||||
console.print(
|
thinking_level = thinking_value_or_prompt(
|
||||||
create_question_box(
|
"TRADINGAGENTS_GOOGLE_THINKING_LEVEL", "google_thinking_level",
|
||||||
"Step 8: Thinking Mode",
|
"Gemini thinking mode", "Step 8: Thinking Mode",
|
||||||
"Configure Gemini thinking mode"
|
"Configure Gemini thinking mode", ask_gemini_thinking_config,
|
||||||
)
|
|
||||||
)
|
)
|
||||||
thinking_level = ask_gemini_thinking_config()
|
|
||||||
elif provider_lower == "openai":
|
elif provider_lower == "openai":
|
||||||
console.print(
|
reasoning_effort = thinking_value_or_prompt(
|
||||||
create_question_box(
|
"TRADINGAGENTS_OPENAI_REASONING_EFFORT", "openai_reasoning_effort",
|
||||||
"Step 8: Reasoning Effort",
|
"Reasoning effort", "Step 8: Reasoning Effort",
|
||||||
"Configure OpenAI reasoning effort level"
|
"Configure OpenAI reasoning effort level", ask_openai_reasoning_effort,
|
||||||
)
|
|
||||||
)
|
)
|
||||||
reasoning_effort = ask_openai_reasoning_effort()
|
|
||||||
elif provider_lower == "anthropic":
|
elif provider_lower == "anthropic":
|
||||||
console.print(
|
anthropic_effort = thinking_value_or_prompt(
|
||||||
create_question_box(
|
"TRADINGAGENTS_ANTHROPIC_EFFORT", "anthropic_effort",
|
||||||
"Step 8: Effort Level",
|
"Claude effort", "Step 8: Effort Level",
|
||||||
"Configure Claude effort level"
|
"Configure Claude effort level", ask_anthropic_effort,
|
||||||
)
|
|
||||||
)
|
)
|
||||||
anthropic_effort = ask_anthropic_effort()
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"ticker": selected_ticker,
|
"ticker": selected_ticker,
|
||||||
@@ -1019,14 +1042,20 @@ def format_tool_args(args, max_length=80) -> str:
|
|||||||
return result[:max_length - 3] + "..."
|
return result[:max_length - 3] + "..."
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def run_analysis(checkpoint: bool = False):
|
def _build_run_config(selections: dict, checkpoint: bool | None) -> dict:
|
||||||
# First get all user selections
|
"""Assemble the run config from interactive selections, honoring env precedence.
|
||||||
selections = get_user_selections()
|
|
||||||
|
|
||||||
# Create config with selected research depth
|
Round counts and checkpoint follow "explicit env/flag wins": an env-applied
|
||||||
|
value on DEFAULT_CONFIG is preserved unless the user overrode it on the CLI.
|
||||||
|
"""
|
||||||
config = DEFAULT_CONFIG.copy()
|
config = DEFAULT_CONFIG.copy()
|
||||||
config["max_debate_rounds"] = selections["research_depth"]
|
# Research depth sets both round counts, but an explicit env override
|
||||||
config["max_risk_discuss_rounds"] = selections["research_depth"]
|
# (TRADINGAGENTS_MAX_DEBATE_ROUNDS / _MAX_RISK_ROUNDS) wins over the
|
||||||
|
# interactive selection — leave the env-applied value in place (#977).
|
||||||
|
if not os.environ.get("TRADINGAGENTS_MAX_DEBATE_ROUNDS"):
|
||||||
|
config["max_debate_rounds"] = selections["research_depth"]
|
||||||
|
if not os.environ.get("TRADINGAGENTS_MAX_RISK_ROUNDS"):
|
||||||
|
config["max_risk_discuss_rounds"] = selections["research_depth"]
|
||||||
config["quick_think_llm"] = selections["shallow_thinker"]
|
config["quick_think_llm"] = selections["shallow_thinker"]
|
||||||
config["deep_think_llm"] = selections["deep_thinker"]
|
config["deep_think_llm"] = selections["deep_thinker"]
|
||||||
config["backend_url"] = selections["backend_url"]
|
config["backend_url"] = selections["backend_url"]
|
||||||
@@ -1036,7 +1065,18 @@ def run_analysis(checkpoint: bool = False):
|
|||||||
config["openai_reasoning_effort"] = selections.get("openai_reasoning_effort")
|
config["openai_reasoning_effort"] = selections.get("openai_reasoning_effort")
|
||||||
config["anthropic_effort"] = selections.get("anthropic_effort")
|
config["anthropic_effort"] = selections.get("anthropic_effort")
|
||||||
config["output_language"] = selections.get("output_language", "English")
|
config["output_language"] = selections.get("output_language", "English")
|
||||||
config["checkpoint_enabled"] = checkpoint
|
# --checkpoint/--no-checkpoint overrides only when explicitly given; omitting
|
||||||
|
# the flag preserves TRADINGAGENTS_CHECKPOINT_ENABLED / the default (#976).
|
||||||
|
if checkpoint is not None:
|
||||||
|
config["checkpoint_enabled"] = checkpoint
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
def run_analysis(checkpoint: bool | None = None):
|
||||||
|
# First get all user selections
|
||||||
|
selections = get_user_selections()
|
||||||
|
|
||||||
|
config = _build_run_config(selections, checkpoint)
|
||||||
|
|
||||||
# Create stats callback handler for tracking LLM/tool calls
|
# Create stats callback handler for tracking LLM/tool calls
|
||||||
stats_handler = StatsCallbackHandler()
|
stats_handler = StatsCallbackHandler()
|
||||||
@@ -1316,10 +1356,11 @@ def run_analysis(checkpoint: bool = False):
|
|||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
def analyze(
|
def analyze(
|
||||||
checkpoint: bool = typer.Option(
|
checkpoint: bool | None = typer.Option(
|
||||||
False,
|
None,
|
||||||
"--checkpoint",
|
"--checkpoint/--no-checkpoint",
|
||||||
help="Enable checkpoint/resume: save state after each node so a crashed run can resume.",
|
help="Enable/disable checkpoint-resume (save state after each node so a "
|
||||||
|
"crashed run can resume). Omit to honor TRADINGAGENTS_CHECKPOINT_ENABLED.",
|
||||||
),
|
),
|
||||||
clear_checkpoints: bool = typer.Option(
|
clear_checkpoints: bool = typer.Option(
|
||||||
False,
|
False,
|
||||||
|
|||||||
@@ -20,7 +20,8 @@ services:
|
|||||||
env_file:
|
env_file:
|
||||||
- .env
|
- .env
|
||||||
environment:
|
environment:
|
||||||
- LLM_PROVIDER=ollama
|
- TRADINGAGENTS_LLM_PROVIDER=ollama
|
||||||
|
- OLLAMA_BASE_URL=http://ollama:11434/v1
|
||||||
volumes:
|
volumes:
|
||||||
- tradingagents_data:/home/appuser/.tradingagents
|
- tradingagents_data:/home/appuser/.tradingagents
|
||||||
depends_on:
|
depends_on:
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
"""Tests for Anthropic effort-parameter gating (#831).
|
"""Tests for Anthropic effort-parameter gating (#831).
|
||||||
|
|
||||||
Haiku 4.5 (and current Haiku versions) reject the ``effort`` parameter
|
Haiku (any version) and Sonnet 4.5 reject the ``effort`` parameter with a
|
||||||
with a 400. Opus 4.5+ and Sonnet 4.5+ accept it. The gate uses a
|
400. Only Opus 4.5+ and Sonnet 4.6+ accept it. The gate uses a per-family
|
||||||
forward-compat regex so future ``claude-{opus,sonnet}-X-Y`` releases
|
minimum version so future ``claude-{opus,sonnet}-X-Y`` releases inherit
|
||||||
inherit support automatically.
|
support automatically.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@@ -24,9 +24,13 @@ def _capture_kwargs(monkeypatch):
|
|||||||
class TestEffortGate:
|
class TestEffortGate:
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"model",
|
"model",
|
||||||
["claude-haiku-4-5", "claude-haiku-5-0", "claude-haiku-4-7-preview"],
|
[
|
||||||
|
"claude-haiku-4-5", "claude-haiku-5-0", "claude-haiku-4-7-preview",
|
||||||
|
# Sonnet 4.5 (and earlier) 400 on effort — only Sonnet 4.6+ supports it.
|
||||||
|
"claude-sonnet-4-5", "claude-sonnet-4-0",
|
||||||
|
],
|
||||||
)
|
)
|
||||||
def test_haiku_does_not_receive_effort(self, monkeypatch, model):
|
def test_unsupported_models_do_not_receive_effort(self, monkeypatch, model):
|
||||||
captured = _capture_kwargs(monkeypatch)
|
captured = _capture_kwargs(monkeypatch)
|
||||||
mod.AnthropicClient(model=model, effort="medium", api_key="x").get_llm()
|
mod.AnthropicClient(model=model, effort="medium", api_key="x").get_llm()
|
||||||
assert "effort" not in captured["kwargs"]
|
assert "effort" not in captured["kwargs"]
|
||||||
@@ -35,7 +39,7 @@ class TestEffortGate:
|
|||||||
"model",
|
"model",
|
||||||
[
|
[
|
||||||
"claude-opus-4-5", "claude-opus-4-6", "claude-opus-4-7",
|
"claude-opus-4-5", "claude-opus-4-6", "claude-opus-4-7",
|
||||||
"claude-sonnet-4-5", "claude-sonnet-4-6",
|
"claude-sonnet-4-6",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_current_opus_and_sonnet_receive_effort(self, monkeypatch, model):
|
def test_current_opus_and_sonnet_receive_effort(self, monkeypatch, model):
|
||||||
|
|||||||
69
tests/test_cli_config_precedence.py
Normal file
69
tests/test_cli_config_precedence.py
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
"""CLI config precedence (#976, #977).
|
||||||
|
|
||||||
|
An explicit environment override for the debate/risk round counts, or the
|
||||||
|
checkpoint flag, must win over the interactive research-depth selection — the CLI
|
||||||
|
must not clobber an env-configured value back to a prompt/flag default.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import cli.main as m
|
||||||
|
|
||||||
|
# Minimal selections dict shaped like get_user_selections()'s return value.
|
||||||
|
SELECTIONS = {
|
||||||
|
"research_depth": 5,
|
||||||
|
"shallow_thinker": "gpt-5.4-mini",
|
||||||
|
"deep_thinker": "gpt-5.5",
|
||||||
|
"backend_url": None,
|
||||||
|
"llm_provider": "openai",
|
||||||
|
"google_thinking_level": None,
|
||||||
|
"openai_reasoning_effort": None,
|
||||||
|
"anthropic_effort": None,
|
||||||
|
"output_language": "English",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_research_depth_sets_both_rounds_without_env(monkeypatch):
|
||||||
|
for var in ("TRADINGAGENTS_MAX_DEBATE_ROUNDS", "TRADINGAGENTS_MAX_RISK_ROUNDS"):
|
||||||
|
monkeypatch.delenv(var, raising=False)
|
||||||
|
cfg = m._build_run_config(SELECTIONS, checkpoint=None)
|
||||||
|
assert cfg["max_debate_rounds"] == 5
|
||||||
|
assert cfg["max_risk_discuss_rounds"] == 5
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_round_counts_win_over_selection(monkeypatch):
|
||||||
|
monkeypatch.setenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", "2")
|
||||||
|
monkeypatch.setenv("TRADINGAGENTS_MAX_RISK_ROUNDS", "4")
|
||||||
|
# DEFAULT_CONFIG already reflects the env (applied at import); emulate that.
|
||||||
|
patched = dict(m.DEFAULT_CONFIG, max_debate_rounds=2, max_risk_discuss_rounds=4)
|
||||||
|
with mock.patch.object(m, "DEFAULT_CONFIG", patched):
|
||||||
|
cfg = m._build_run_config(SELECTIONS, checkpoint=None)
|
||||||
|
assert cfg["max_debate_rounds"] == 2 # env value, not research_depth=5
|
||||||
|
assert cfg["max_risk_discuss_rounds"] == 4
|
||||||
|
|
||||||
|
|
||||||
|
def test_partial_env_only_overrides_that_count(monkeypatch):
|
||||||
|
monkeypatch.setenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", "2")
|
||||||
|
monkeypatch.delenv("TRADINGAGENTS_MAX_RISK_ROUNDS", raising=False)
|
||||||
|
patched = dict(m.DEFAULT_CONFIG, max_debate_rounds=2)
|
||||||
|
with mock.patch.object(m, "DEFAULT_CONFIG", patched):
|
||||||
|
cfg = m._build_run_config(SELECTIONS, checkpoint=None)
|
||||||
|
assert cfg["max_debate_rounds"] == 2 # env wins
|
||||||
|
assert cfg["max_risk_discuss_rounds"] == 5 # falls through to research_depth
|
||||||
|
|
||||||
|
|
||||||
|
def test_checkpoint_none_preserves_env_default():
|
||||||
|
patched = dict(m.DEFAULT_CONFIG, checkpoint_enabled=True) # e.g. env-enabled
|
||||||
|
with mock.patch.object(m, "DEFAULT_CONFIG", patched):
|
||||||
|
cfg = m._build_run_config(SELECTIONS, checkpoint=None)
|
||||||
|
assert cfg["checkpoint_enabled"] is True # not clobbered back to False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("flag", [True, False])
|
||||||
|
def test_checkpoint_flag_overrides_env(flag):
|
||||||
|
patched = dict(m.DEFAULT_CONFIG, checkpoint_enabled=not flag)
|
||||||
|
with mock.patch.object(m, "DEFAULT_CONFIG", patched):
|
||||||
|
cfg = m._build_run_config(SELECTIONS, checkpoint=flag)
|
||||||
|
assert cfg["checkpoint_enabled"] is flag
|
||||||
@@ -82,5 +82,68 @@ class TestCliSkipsPromptsFromEnv(unittest.TestCase):
|
|||||||
self.assertEqual(sel["output_language"], "Japanese")
|
self.assertEqual(sel["output_language"], "Japanese")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestResearchDepthSkippedFromEnv(unittest.TestCase):
|
||||||
|
def test_both_round_envs_skip_depth_prompt(self):
|
||||||
|
import cli.main as m
|
||||||
|
|
||||||
|
env = {
|
||||||
|
"TRADINGAGENTS_MAX_DEBATE_ROUNDS": "2",
|
||||||
|
"TRADINGAGENTS_MAX_RISK_ROUNDS": "4",
|
||||||
|
}
|
||||||
|
fake_cfg = dict(m.DEFAULT_CONFIG)
|
||||||
|
fake_cfg.update({"max_debate_rounds": 2, "max_risk_discuss_rounds": 4})
|
||||||
|
|
||||||
|
with mock.patch.dict(os.environ, env, clear=False), \
|
||||||
|
mock.patch.object(m, "DEFAULT_CONFIG", fake_cfg), \
|
||||||
|
mock.patch.object(m, "fetch_announcements", return_value=None), \
|
||||||
|
mock.patch.object(m, "display_announcements"), \
|
||||||
|
mock.patch.object(m, "get_ticker", return_value="AAPL"), \
|
||||||
|
mock.patch.object(m, "get_analysis_date", return_value="2026-05-29"), \
|
||||||
|
mock.patch.object(m, "select_analysts", return_value=[]), \
|
||||||
|
mock.patch.object(m, "select_research_depth") as prompt_depth, \
|
||||||
|
mock.patch.object(m, "ensure_api_key"), \
|
||||||
|
mock.patch.object(m, "select_llm_provider", return_value=("openai", None)), \
|
||||||
|
mock.patch.object(m, "ask_output_language", return_value="English"), \
|
||||||
|
mock.patch.object(m, "select_shallow_thinking_agent", return_value="gpt-5.4-mini"), \
|
||||||
|
mock.patch.object(m, "select_deep_thinking_agent", return_value="gpt-5.5"), \
|
||||||
|
mock.patch.object(m, "ask_openai_reasoning_effort", return_value=None):
|
||||||
|
sel = m.get_user_selections()
|
||||||
|
|
||||||
|
# The research-depth prompt is skipped; the value comes from the env config.
|
||||||
|
prompt_depth.assert_not_called()
|
||||||
|
self.assertEqual(sel["research_depth"], 2)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestReasoningEffortSkippedFromEnv(unittest.TestCase):
|
||||||
|
def test_effort_env_skips_step8_prompt(self):
|
||||||
|
import cli.main as m
|
||||||
|
|
||||||
|
env = {"TRADINGAGENTS_OPENAI_REASONING_EFFORT": "high"}
|
||||||
|
fake_cfg = dict(m.DEFAULT_CONFIG)
|
||||||
|
fake_cfg.update({"openai_reasoning_effort": "high"})
|
||||||
|
|
||||||
|
with mock.patch.dict(os.environ, env, clear=False), \
|
||||||
|
mock.patch.object(m, "DEFAULT_CONFIG", fake_cfg), \
|
||||||
|
mock.patch.object(m, "fetch_announcements", return_value=None), \
|
||||||
|
mock.patch.object(m, "display_announcements"), \
|
||||||
|
mock.patch.object(m, "get_ticker", return_value="AAPL"), \
|
||||||
|
mock.patch.object(m, "get_analysis_date", return_value="2026-05-29"), \
|
||||||
|
mock.patch.object(m, "select_analysts", return_value=[]), \
|
||||||
|
mock.patch.object(m, "select_research_depth", return_value=1), \
|
||||||
|
mock.patch.object(m, "ensure_api_key"), \
|
||||||
|
mock.patch.object(m, "select_llm_provider", return_value=("openai", None)), \
|
||||||
|
mock.patch.object(m, "ask_output_language", return_value="English"), \
|
||||||
|
mock.patch.object(m, "select_shallow_thinking_agent", return_value="gpt-5.4-mini"), \
|
||||||
|
mock.patch.object(m, "select_deep_thinking_agent", return_value="gpt-5.5"), \
|
||||||
|
mock.patch.object(m, "ask_openai_reasoning_effort") as prompt_effort:
|
||||||
|
sel = m.get_user_selections()
|
||||||
|
|
||||||
|
# The reasoning-effort prompt is skipped; the value comes from env config.
|
||||||
|
prompt_effort.assert_not_called()
|
||||||
|
self.assertEqual(sel["openai_reasoning_effort"], "high")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -68,6 +68,27 @@ def test_bool_coercion(monkeypatch, raw, expected):
|
|||||||
assert dc.DEFAULT_CONFIG["checkpoint_enabled"] is expected
|
assert dc.DEFAULT_CONFIG["checkpoint_enabled"] is expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_reasoning_thinking_overrides(monkeypatch):
|
||||||
|
"""The provider reasoning/thinking knobs are env-configurable (non-interactive runs)."""
|
||||||
|
dc = _reload_with_env(
|
||||||
|
monkeypatch,
|
||||||
|
TRADINGAGENTS_OPENAI_REASONING_EFFORT="high",
|
||||||
|
TRADINGAGENTS_GOOGLE_THINKING_LEVEL="minimal",
|
||||||
|
TRADINGAGENTS_ANTHROPIC_EFFORT="low",
|
||||||
|
)
|
||||||
|
assert dc.DEFAULT_CONFIG["openai_reasoning_effort"] == "high"
|
||||||
|
assert dc.DEFAULT_CONFIG["google_thinking_level"] == "minimal"
|
||||||
|
assert dc.DEFAULT_CONFIG["anthropic_effort"] == "low"
|
||||||
|
|
||||||
|
|
||||||
|
def test_reasoning_effort_defaults_to_none(monkeypatch):
|
||||||
|
"""Unset reasoning/thinking knobs stay None so each provider uses its own default."""
|
||||||
|
dc = _reload_with_env(monkeypatch)
|
||||||
|
assert dc.DEFAULT_CONFIG["openai_reasoning_effort"] is None
|
||||||
|
assert dc.DEFAULT_CONFIG["google_thinking_level"] is None
|
||||||
|
assert dc.DEFAULT_CONFIG["anthropic_effort"] is None
|
||||||
|
|
||||||
|
|
||||||
def test_empty_env_value_is_passthrough(monkeypatch):
|
def test_empty_env_value_is_passthrough(monkeypatch):
|
||||||
"""Empty TRADINGAGENTS_* values must not clobber the built-in default."""
|
"""Empty TRADINGAGENTS_* values must not clobber the built-in default."""
|
||||||
dc = _reload_with_env(
|
dc = _reload_with_env(
|
||||||
@@ -82,13 +103,23 @@ def test_empty_env_value_is_passthrough(monkeypatch):
|
|||||||
def test_invalid_int_raises(monkeypatch):
|
def test_invalid_int_raises(monkeypatch):
|
||||||
"""Garbage int values should surface a ValueError at import, not silently misconfigure."""
|
"""Garbage int values should surface a ValueError at import, not silently misconfigure."""
|
||||||
monkeypatch.setenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", "not-a-number")
|
monkeypatch.setenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", "not-a-number")
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError, match="TRADINGAGENTS_MAX_DEBATE_ROUNDS"):
|
||||||
importlib.reload(default_config_module)
|
importlib.reload(default_config_module)
|
||||||
# Restore module state for subsequent tests in this process
|
# Restore module state for subsequent tests in this process
|
||||||
monkeypatch.delenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", raising=False)
|
monkeypatch.delenv("TRADINGAGENTS_MAX_DEBATE_ROUNDS", raising=False)
|
||||||
importlib.reload(default_config_module)
|
importlib.reload(default_config_module)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("bad", ["treu", "flase", "maybe", "2", "enabled"])
|
||||||
|
def test_invalid_bool_raises(monkeypatch, bad):
|
||||||
|
"""A misspelled boolean must fail loudly (like ints) instead of silently False."""
|
||||||
|
monkeypatch.setenv("TRADINGAGENTS_CHECKPOINT_ENABLED", bad)
|
||||||
|
with pytest.raises(ValueError, match="TRADINGAGENTS_CHECKPOINT_ENABLED"):
|
||||||
|
importlib.reload(default_config_module)
|
||||||
|
monkeypatch.delenv("TRADINGAGENTS_CHECKPOINT_ENABLED", raising=False)
|
||||||
|
importlib.reload(default_config_module)
|
||||||
|
|
||||||
|
|
||||||
def test_unknown_env_var_is_ignored(monkeypatch):
|
def test_unknown_env_var_is_ignored(monkeypatch):
|
||||||
"""Env vars outside _ENV_OVERRIDES must not bleed into DEFAULT_CONFIG."""
|
"""Env vars outside _ENV_OVERRIDES must not bleed into DEFAULT_CONFIG."""
|
||||||
dc = _reload_with_env(
|
dc = _reload_with_env(
|
||||||
|
|||||||
42
tests/test_openai_reasoning_effort.py
Normal file
42
tests/test_openai_reasoning_effort.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
"""OpenAI ``reasoning_effort`` is gated to reasoning models.
|
||||||
|
|
||||||
|
Non-reasoning OpenAI models (gpt-4.1, gpt-4o, ...) 400 with "Unsupported
|
||||||
|
parameter: 'reasoning.effort'". The client must drop the kwarg for those rather
|
||||||
|
than forward it and crash the run. The GPT-5 family and the o-series accept it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from tradingagents.llm_clients.openai_client import (
|
||||||
|
OpenAIClient,
|
||||||
|
_supports_reasoning_effort,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"model,expected",
|
||||||
|
[
|
||||||
|
("gpt-5.5", True), ("gpt-5.4", True), ("gpt-5.4-mini", True),
|
||||||
|
("gpt-5.5-pro", True), ("o1", True), ("o3-mini", True),
|
||||||
|
("gpt-4.1", False), ("gpt-4o", False), ("gpt-4o-mini", False),
|
||||||
|
("gpt-3.5-turbo", False),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_supports_reasoning_effort(model, expected):
|
||||||
|
assert _supports_reasoning_effort(model) is expected
|
||||||
|
|
||||||
|
|
||||||
|
def _effort_on(model, monkeypatch):
|
||||||
|
# A fake key lets get_llm() construct the client without a network call.
|
||||||
|
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
||||||
|
llm = OpenAIClient(model, provider="openai", reasoning_effort="low").get_llm()
|
||||||
|
return getattr(llm, "reasoning_effort", None)
|
||||||
|
|
||||||
|
|
||||||
|
def test_reasoning_model_receives_effort(monkeypatch):
|
||||||
|
assert _effort_on("gpt-5.4-mini", monkeypatch) == "low"
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_reasoning_model_drops_effort(monkeypatch):
|
||||||
|
# gpt-4.1 would 400 with reasoning_effort — it must be dropped.
|
||||||
|
assert _effort_on("gpt-4.1", monkeypatch) is None
|
||||||
@@ -18,13 +18,35 @@ _ENV_OVERRIDES = {
|
|||||||
"TRADINGAGENTS_CHECKPOINT_ENABLED": "checkpoint_enabled",
|
"TRADINGAGENTS_CHECKPOINT_ENABLED": "checkpoint_enabled",
|
||||||
"TRADINGAGENTS_BENCHMARK_TICKER": "benchmark_ticker",
|
"TRADINGAGENTS_BENCHMARK_TICKER": "benchmark_ticker",
|
||||||
"TRADINGAGENTS_TEMPERATURE": "temperature",
|
"TRADINGAGENTS_TEMPERATURE": "temperature",
|
||||||
|
# Provider-specific reasoning/thinking knobs (None = each provider's own
|
||||||
|
# default). Settable here for non-interactive runs; the CLI also offers an
|
||||||
|
# interactive choice, which is skipped when the matching var is set.
|
||||||
|
"TRADINGAGENTS_GOOGLE_THINKING_LEVEL": "google_thinking_level",
|
||||||
|
"TRADINGAGENTS_OPENAI_REASONING_EFFORT": "openai_reasoning_effort",
|
||||||
|
"TRADINGAGENTS_ANTHROPIC_EFFORT": "anthropic_effort",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_BOOL_TRUE = ("true", "1", "yes", "on")
|
||||||
|
_BOOL_FALSE = ("false", "0", "no", "off")
|
||||||
|
|
||||||
|
|
||||||
def _coerce(value: str, reference):
|
def _coerce(value: str, reference):
|
||||||
"""Coerce env-var string to the type of the existing default value."""
|
"""Coerce env-var string to the type of the existing default value.
|
||||||
|
|
||||||
|
Invalid values raise ``ValueError`` rather than silently falling back to a
|
||||||
|
default — a misspelled boolean (e.g. ``treu``) or non-numeric int should fail
|
||||||
|
loudly at startup, not quietly misconfigure an unattended run.
|
||||||
|
"""
|
||||||
if isinstance(reference, bool):
|
if isinstance(reference, bool):
|
||||||
return value.strip().lower() in ("true", "1", "yes", "on")
|
normalized = value.strip().lower()
|
||||||
|
if normalized in _BOOL_TRUE:
|
||||||
|
return True
|
||||||
|
if normalized in _BOOL_FALSE:
|
||||||
|
return False
|
||||||
|
raise ValueError(
|
||||||
|
f"expected a boolean ({'/'.join(_BOOL_TRUE + _BOOL_FALSE)}), got {value!r}"
|
||||||
|
)
|
||||||
if isinstance(reference, int) and not isinstance(reference, bool):
|
if isinstance(reference, int) and not isinstance(reference, bool):
|
||||||
return int(value)
|
return int(value)
|
||||||
if isinstance(reference, float):
|
if isinstance(reference, float):
|
||||||
@@ -38,7 +60,10 @@ def _apply_env_overrides(config: dict) -> dict:
|
|||||||
raw = os.environ.get(env_var)
|
raw = os.environ.get(env_var)
|
||||||
if raw is None or raw == "":
|
if raw is None or raw == "":
|
||||||
continue
|
continue
|
||||||
config[key] = _coerce(raw, config.get(key))
|
try:
|
||||||
|
config[key] = _coerce(raw, config.get(key))
|
||||||
|
except ValueError as exc:
|
||||||
|
raise ValueError(f"Invalid value for {env_var}: {exc}") from exc
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -12,20 +12,27 @@ _PASSTHROUGH_KWARGS = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Anthropic's extended-thinking ``effort`` parameter is accepted by Opus 4.5+
|
# Anthropic's extended-thinking ``effort`` parameter is accepted by Opus 4.5+
|
||||||
# and Sonnet 4.5+ only. Haiku (any version shipped to date) 400s with
|
# and Sonnet 4.6+ only. Sonnet 4.5 and any Haiku version 400 with
|
||||||
# ``"This model does not support the effort parameter"`` (#831). Future
|
# ``"This model does not support the effort parameter"`` (#831). The per-family
|
||||||
# ``claude-{opus,sonnet}-X-Y`` releases inherit effort support via the
|
# minimum version below is forward-compatible: future ``claude-{opus,sonnet}-X-Y``
|
||||||
# forward-compat pattern below; future Haiku stays excluded by default.
|
# releases inherit support automatically, while Sonnet 4.5 and Haiku stay excluded.
|
||||||
_EFFORT_EXACT = {
|
_EFFORT_EXACT = {
|
||||||
"claude-mythos-preview", # non-standard preview name; effort-capable
|
"claude-mythos-preview", # non-standard preview name; effort-capable
|
||||||
}
|
}
|
||||||
_EFFORT_PATTERN = re.compile(r"^claude-(opus|sonnet)-\d+-\d+$")
|
_EFFORT_MODEL = re.compile(r"^claude-(opus|sonnet)-(\d+)-(\d+)$")
|
||||||
|
_EFFORT_MIN_VERSION = {"opus": (4, 5), "sonnet": (4, 6)}
|
||||||
|
|
||||||
|
|
||||||
def _supports_effort(model: str) -> bool:
|
def _supports_effort(model: str) -> bool:
|
||||||
"""Whether Anthropic accepts the ``effort`` parameter for this model."""
|
"""Whether Anthropic accepts the ``effort`` parameter for this model."""
|
||||||
model_lc = model.lower()
|
model_lc = model.lower()
|
||||||
return model_lc in _EFFORT_EXACT or bool(_EFFORT_PATTERN.match(model_lc))
|
if model_lc in _EFFORT_EXACT:
|
||||||
|
return True
|
||||||
|
match = _EFFORT_MODEL.match(model_lc)
|
||||||
|
if not match:
|
||||||
|
return False
|
||||||
|
family, major, minor = match.group(1), int(match.group(2)), int(match.group(3))
|
||||||
|
return (major, minor) >= _EFFORT_MIN_VERSION[family]
|
||||||
|
|
||||||
|
|
||||||
class NormalizedChatAnthropic(ChatAnthropic):
|
class NormalizedChatAnthropic(ChatAnthropic):
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import re
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
@@ -150,6 +151,18 @@ _PASSTHROUGH_KWARGS = (
|
|||||||
"api_key", "callbacks", "http_client", "http_async_client",
|
"api_key", "callbacks", "http_client", "http_async_client",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# OpenAI's ``reasoning_effort`` is only accepted by reasoning models — the GPT-5
|
||||||
|
# family and the o-series. Non-reasoning models (gpt-4.1, gpt-4o, ...) 400 with
|
||||||
|
# "Unsupported parameter: 'reasoning.effort' is not supported with this model".
|
||||||
|
# Drop the kwarg for those rather than crash the run.
|
||||||
|
_OPENAI_REASONING_MODEL = re.compile(r"^(gpt-5|o[1-9])")
|
||||||
|
|
||||||
|
|
||||||
|
def _supports_reasoning_effort(model: str) -> bool:
|
||||||
|
"""Whether the (native OpenAI) model accepts ``reasoning_effort``."""
|
||||||
|
return bool(_OPENAI_REASONING_MODEL.match(model.lower().strip()))
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class ProviderSpec:
|
class ProviderSpec:
|
||||||
"""Declarative config for one OpenAI-compatible provider.
|
"""Declarative config for one OpenAI-compatible provider.
|
||||||
@@ -291,8 +304,11 @@ class OpenAIClient(BaseLLMClient):
|
|||||||
|
|
||||||
# Forward user-provided kwargs
|
# Forward user-provided kwargs
|
||||||
for key in _PASSTHROUGH_KWARGS:
|
for key in _PASSTHROUGH_KWARGS:
|
||||||
if key in self.kwargs:
|
if key not in self.kwargs:
|
||||||
llm_kwargs[key] = self.kwargs[key]
|
continue
|
||||||
|
if key == "reasoning_effort" and not _supports_reasoning_effort(self.model):
|
||||||
|
continue
|
||||||
|
llm_kwargs[key] = self.kwargs[key]
|
||||||
|
|
||||||
# The subclass (provider quirks) comes from the registry spec.
|
# The subclass (provider quirks) comes from the registry spec.
|
||||||
return chat_cls(**llm_kwargs)
|
return chat_cls(**llm_kwargs)
|
||||||
|
|||||||
Reference in New Issue
Block a user