feat: structured-output Portfolio Manager + 5-tier rating consistency (#434)

Three related changes that take the rating pipeline from heuristic-only to type-safe at the source. 1) Research Manager prompt now uses the same 5-tier scale (Buy / Overweight / Hold / Underweight / Sell) as the Portfolio Manager, signal_processing, and the memory log. The prior 3-tier wording (Buy / Sell / Hold) was the only remaining inconsistency in the pipeline. 2) Centralise the 5-tier vocabulary and the heuristic prose-rating parser into tradingagents/agents/utils/rating.py. Both the memory log and the signal processor now share the same parser instead of duplicating regex and word-walker logic. 3) Make structured output a first-class part of the Portfolio Manager's primary call. The PM uses llm.with_structured_output(PortfolioDecision) so each provider's native structured-output mode (json_schema for OpenAI/xAI, response_schema for Gemini, tool-use for Anthropic, function_calling for OpenAI-compatible providers) yields a typed Pydantic instance directly. A render helper turns that instance back into the same markdown shape downstream consumers (memory log, CLI display, saved reports) already expect, so no other code has to know the PM now produces structured output. Providers without structured support fall back gracefully to free-text + the deterministic heuristic. The previous SignalProcessor had been making a second LLM call to re-extract the rating from the PM's prose; that round-trip is now eliminated. SignalProcessor is a thin adapter over parse_rating(), makes zero LLM calls, and stays for backwards compatibility with process_signal() callers. Schema (PortfolioDecision) captures rating + executive_summary + investment_thesis + optional price_target + time_horizon, with field descriptions doubling as output instructions. Agent prose remains the primary artifact; structured output is layered onto the PM only because it is the one agent whose output has machine-readable downstream consumers. 15 new tests cover the heuristic parser (markdown-bold edge cases that had no coverage before), the structured PM happy path, the free-text fallback path, and that SignalProcessor never invokes the LLM. Full suite: 77 tests pass in ~2s without API keys.
2026-05-01 14:33:10 +03:00 · 2026-04-25 19:57:26 +00:00
parent 4cbd4b086f
commit 0fda24515f
8 changed files with 399 additions and 87 deletions
--- a/tests/test_memory_log.py
+++ b/tests/test_memory_log.py
@@ -5,6 +5,7 @@ import pandas as pd
 from unittest.mock import MagicMock, patch

 from tradingagents.agents.utils.memory import TradingMemoryLog
+from tradingagents.agents.schemas import PortfolioDecision, PortfolioRating
 from tradingagents.graph.reflection import Reflector
 from tradingagents.graph.trading_graph import TradingAgentsGraph
 from tradingagents.graph.propagation import Propagator
@@ -82,6 +83,25 @@ def _make_pm_state(past_context=""):
    }


+def _structured_pm_llm(captured: dict, decision: PortfolioDecision | None = None):
+    """Build a MagicMock LLM whose with_structured_output binding captures the
+    prompt and returns a real PortfolioDecision (so render_pm_decision works).
+    """
+    if decision is None:
+        decision = PortfolioDecision(
+            rating=PortfolioRating.HOLD,
+            executive_summary="Hold the position; await catalyst.",
+            investment_thesis="Balanced view; neither side carried the debate.",
+        )
+    structured = MagicMock()
+    structured.invoke.side_effect = lambda prompt: (
+        captured.__setitem__("prompt", prompt) or decision
+    )
+    llm = MagicMock()
+    llm.with_structured_output.return_value = structured
+    return llm
+
+
 # ---------------------------------------------------------------------------
 # Core: storage and read path
 # ---------------------------------------------------------------------------
@@ -518,29 +538,55 @@ class TestPortfolioManagerInjection:

    def test_pm_prompt_includes_past_context(self):
        captured = {}
-        mock_llm = MagicMock()
-        mock_llm.invoke.side_effect = lambda prompt: (
-            captured.__setitem__("prompt", prompt) or MagicMock(content="Rating: Hold\nHold.")
-        )
-        pm_node = create_portfolio_manager(mock_llm)
+        llm = _structured_pm_llm(captured)
+        pm_node = create_portfolio_manager(llm)
        state = _make_pm_state(past_context="[2026-01-05 | NVDA | Buy | +5.0% | +2.0% | 5d]\nGreat call.")
        pm_node(state)
        assert "Lessons from prior decisions and outcomes" in captured["prompt"]
        assert "Great call." in captured["prompt"]
-        assert "and the lessons from prior decisions" in captured["prompt"]

    def test_pm_no_past_context_no_section(self):
        """PM prompt omits the lessons section entirely when past_context is empty."""
        captured = {}
-        mock_llm = MagicMock()
-        mock_llm.invoke.side_effect = lambda prompt: (
-            captured.__setitem__("prompt", prompt) or MagicMock(content="Rating: Hold\nHold.")
-        )
-        pm_node = create_portfolio_manager(mock_llm)
+        llm = _structured_pm_llm(captured)
+        pm_node = create_portfolio_manager(llm)
        state = _make_pm_state(past_context="")
        pm_node(state)
        assert "Lessons from prior decisions" not in captured["prompt"]
-        assert "and the lessons from prior decisions" not in captured["prompt"]
+
+    def test_pm_returns_rendered_markdown_with_rating(self):
+        """The structured PortfolioDecision is rendered to markdown that
+        downstream consumers (memory log, signal processor, CLI display)
+        can parse without any extra LLM call."""
+        captured = {}
+        decision = PortfolioDecision(
+            rating=PortfolioRating.OVERWEIGHT,
+            executive_summary="Build position gradually over the next two weeks.",
+            investment_thesis="AI capex cycle remains intact; institutional flows constructive.",
+            price_target=215.0,
+            time_horizon="3-6 months",
+        )
+        llm = _structured_pm_llm(captured, decision)
+        pm_node = create_portfolio_manager(llm)
+        result = pm_node(_make_pm_state())
+        md = result["final_trade_decision"]
+        assert "**Rating**: Overweight" in md
+        assert "**Executive Summary**: Build position gradually" in md
+        assert "**Investment Thesis**: AI capex cycle" in md
+        assert "**Price Target**: 215.0" in md
+        assert "**Time Horizon**: 3-6 months" in md
+
+    def test_pm_falls_back_to_freetext_when_structured_unavailable(self):
+        """If a provider does not support with_structured_output, the agent
+        falls back to a plain invoke and returns whatever prose the model
+        produced, so the pipeline never blocks."""
+        plain_response = "**Rating**: Sell\n\nExit ahead of guidance."
+        llm = MagicMock()
+        llm.with_structured_output.side_effect = NotImplementedError("provider unsupported")
+        llm.invoke.return_value = MagicMock(content=plain_response)
+        pm_node = create_portfolio_manager(llm)
+        result = pm_node(_make_pm_state())
+        assert result["final_trade_decision"] == plain_response

    # get_past_context ordering and limits