diff --git a/tests/test_market_data_validator.py b/tests/test_market_data_validator.py new file mode 100644 index 000000000..40b6349e2 --- /dev/null +++ b/tests/test_market_data_validator.py @@ -0,0 +1,76 @@ +"""Tests for the deterministic market-data verification snapshot (#830/#881).""" + +from __future__ import annotations + +import pandas as pd +import pytest + +import tradingagents.dataflows.market_data_validator as validator + + +def _sample_ohlcv() -> pd.DataFrame: + dates = pd.bdate_range("2026-04-01", "2026-05-20") + closes = [100 + i for i in range(len(dates))] + return pd.DataFrame({ + "Date": dates, + "Open": [c - 0.5 for c in closes], + "High": [c + 1.0 for c in closes], + "Low": [c - 1.0 for c in closes], + "Close": closes, + "Volume": [1_000_000 + i for i in range(len(dates))], + }) + + +@pytest.mark.unit +class TestVerifiedSnapshot: + def test_excludes_future_rows(self, monkeypatch): + data = pd.concat([ + _sample_ohlcv(), + pd.DataFrame({"Date": [pd.Timestamp("2026-06-01")], "Open": [999.0], + "High": [999.0], "Low": [999.0], "Close": [999.0], "Volume": [999]}), + ], ignore_index=True) + monkeypatch.setattr(validator, "load_ohlcv", lambda s, d: data) + + snap = validator.build_verified_market_snapshot("COF", "2026-05-13") + assert "Verified market data snapshot for COF" in snap + assert "Requested analysis date: 2026-05-13" in snap + assert "Latest trading row used: 2026-05-13" in snap + assert "999.00" not in snap # future row excluded + assert "boll_lb" in snap # indicators present + + def test_uses_previous_trading_day_when_date_is_weekend(self, monkeypatch): + monkeypatch.setattr(validator, "load_ohlcv", lambda s, d: _sample_ohlcv()) + # 2026-05-16 is a Saturday; latest row should be Fri 2026-05-15 + snap = validator.build_verified_market_snapshot("COF", "2026-05-16") + assert "Latest trading row used: 2026-05-15" in snap + assert "Recent verified closes" in snap + + def test_raises_when_no_rows_on_or_before_date(self, monkeypatch): + monkeypatch.setattr(validator, "load_ohlcv", lambda s, d: _sample_ohlcv()) + with pytest.raises(ValueError): + validator.build_verified_market_snapshot("COF", "2020-01-01") + + def test_raises_on_empty_data(self, monkeypatch): + monkeypatch.setattr(validator, "load_ohlcv", lambda s, d: pd.DataFrame()) + with pytest.raises(ValueError): + validator.build_verified_market_snapshot("COF", "2026-05-13") + + def test_look_back_window_capped_at_30(self, monkeypatch): + monkeypatch.setattr(validator, "load_ohlcv", lambda s, d: _sample_ohlcv()) + snap = validator.build_verified_market_snapshot("COF", "2026-05-20", look_back_days=999) + # last-N closes table has at most 30 data rows + close_rows = [ln for ln in snap.splitlines() if ln.startswith("| 2026-")] + assert 0 < len(close_rows) <= 30 + + +@pytest.mark.unit +class TestTool: + def test_tool_delegates_to_builder(self, monkeypatch): + from tradingagents.agents.utils.market_data_validation_tools import ( + get_verified_market_snapshot, + ) + monkeypatch.setattr(validator, "load_ohlcv", lambda s, d: _sample_ohlcv()) + out = get_verified_market_snapshot.invoke( + {"symbol": "COF", "curr_date": "2026-05-20"} + ) + assert "Verified market data snapshot for COF" in out diff --git a/tradingagents/agents/analysts/market_analyst.py b/tradingagents/agents/analysts/market_analyst.py index af805dbfc..87fca70de 100644 --- a/tradingagents/agents/analysts/market_analyst.py +++ b/tradingagents/agents/analysts/market_analyst.py @@ -4,6 +4,7 @@ from tradingagents.agents.utils.agent_utils import ( get_indicators, get_language_instruction, get_stock_data, + get_verified_market_snapshot, ) from tradingagents.dataflows.config import get_config @@ -17,6 +18,7 @@ def create_market_analyst(llm): tools = [ get_stock_data, get_indicators, + get_verified_market_snapshot, ] system_message = ( @@ -44,7 +46,11 @@ Volatility Indicators: Volume-Based Indicators: - vwma: VWMA: A moving average weighted by volume. Usage: Confirm trends by integrating price action with volume data. Tips: Watch for skewed results from volume spikes; use in combination with other volume analyses. -- Select indicators that provide diverse and complementary information. Avoid redundancy (e.g., do not select both rsi and stochrsi). Also briefly explain why they are suitable for the given market context. When you tool call, please use the exact name of the indicators provided above as they are defined parameters, otherwise your call will fail. Please make sure to call get_stock_data first to retrieve the CSV that is needed to generate indicators. Then use get_indicators with the specific indicator names. Write a very detailed and nuanced report of the trends you observe. Provide specific, actionable insights with supporting evidence to help traders make informed decisions.""" +- Select indicators that provide diverse and complementary information. Avoid redundancy (e.g., do not select both rsi and stochrsi). Also briefly explain why they are suitable for the given market context. When you tool call, please use the exact name of the indicators provided above as they are defined parameters, otherwise your call will fail. Please make sure to call get_stock_data first to retrieve the CSV that is needed to generate indicators. Then use get_indicators with the specific indicator names. + +Before writing the final report, call get_verified_market_snapshot for this ticker and the current date, and treat it as the source of truth for any exact OHLCV, price-level, or indicator-value claim. If another tool's output conflicts with the verified snapshot, flag the discrepancy rather than inventing a reconciled number. Do not claim historical validation, support/resistance bounces, or exact percentage moves unless they are directly supported by tool output with concrete dates and prices. + +Write a very detailed and nuanced report of the trends you observe. Provide specific, actionable insights with supporting evidence to help traders make informed decisions.""" + """ Make sure to append a Markdown table at the end of the report to organize key points in the report, organized and easy to read.""" + get_language_instruction() ) diff --git a/tradingagents/agents/utils/agent_utils.py b/tradingagents/agents/utils/agent_utils.py index f137bd4d4..7476654c0 100644 --- a/tradingagents/agents/utils/agent_utils.py +++ b/tradingagents/agents/utils/agent_utils.py @@ -23,6 +23,9 @@ from tradingagents.agents.utils.news_data_tools import ( get_insider_transactions, get_global_news ) +from tradingagents.agents.utils.market_data_validation_tools import ( + get_verified_market_snapshot +) logger = logging.getLogger(__name__) diff --git a/tradingagents/agents/utils/market_data_validation_tools.py b/tradingagents/agents/utils/market_data_validation_tools.py new file mode 100644 index 000000000..356a4ac49 --- /dev/null +++ b/tradingagents/agents/utils/market_data_validation_tools.py @@ -0,0 +1,23 @@ +from typing import Annotated + +from langchain_core.tools import tool + +from tradingagents.dataflows.market_data_validator import build_verified_market_snapshot + + +@tool +def get_verified_market_snapshot( + symbol: Annotated[str, "ticker symbol of the company"], + curr_date: Annotated[str, "the current trading date, YYYY-mm-dd"], + look_back_days: Annotated[ + int, "number of recent trading rows to include for sanity-checking" + ] = 30, +) -> str: + """Deterministic verification snapshot for exact market-data claims. + + Returns the latest OHLCV row on or before curr_date, common technical + indicators, and recent closes. Call this before making exact claims about + price levels, Bollinger bands, RSI, MACD, moving averages, support / + resistance, or historical comparisons, and treat it as the source of truth. + """ + return build_verified_market_snapshot(symbol, curr_date, look_back_days) diff --git a/tradingagents/dataflows/market_data_validator.py b/tradingagents/dataflows/market_data_validator.py new file mode 100644 index 000000000..d1992c0f8 --- /dev/null +++ b/tradingagents/dataflows/market_data_validator.py @@ -0,0 +1,123 @@ +"""Deterministic market-data verification snapshot. + +The market analyst is an LLM that can confabulate exact numbers — citing a +Bollinger band or a "historically validated bounce" that the underlying data +doesn't support (#830). This module computes a ground-truth snapshot (latest +OHLCV row on or before the analysis date, common indicators, recent closes) +the analyst is told to treat as the source of truth for any exact numeric +claim. Deterministic, no LLM involved. +""" + +from __future__ import annotations + +from typing import Iterable, Optional + +import pandas as pd +from stockstats import wrap + +from tradingagents.dataflows.stockstats_utils import load_ohlcv + +# A fixed, common indicator set so the snapshot is the same shape every run. +DEFAULT_SNAPSHOT_INDICATORS: tuple[str, ...] = ( + "close_10_ema", "close_50_sma", "close_200_sma", + "rsi", "boll", "boll_ub", "boll_lb", + "macd", "macds", "macdh", "atr", +) + + +def _verified_rows(symbol: str, curr_date: str) -> pd.DataFrame: + """OHLCV on or before curr_date, date-sorted. Raises if nothing usable. + + ``load_ohlcv`` already normalizes the Date column and filters out + look-ahead rows, but we re-apply the cutoff defensively — this is a + verification path, so it must not trust its input to be pre-filtered. + """ + data = load_ohlcv(symbol, curr_date) + if data is None or data.empty: + raise ValueError(f"No OHLCV data available for {symbol}.") + + df = data.copy() + df["Date"] = pd.to_datetime(df["Date"], errors="coerce") + df = df.dropna(subset=["Date"]) + df = df[df["Date"] <= pd.to_datetime(curr_date)].sort_values("Date") + if df.empty: + raise ValueError(f"No OHLCV rows on or before {curr_date} for {symbol}.") + return df + + +def _fmt(value) -> str: + if value is None or pd.isna(value): + return "N/A" + if isinstance(value, pd.Timestamp): + return value.strftime("%Y-%m-%d") + if isinstance(value, bool): + return str(value) + if isinstance(value, (int,)): + return str(value) + if isinstance(value, float): + return f"{value:.2f}" + return str(value) + + +def build_verified_market_snapshot( + symbol: str, + curr_date: str, + look_back_days: int = 30, + indicators: Optional[Iterable[str]] = None, +) -> str: + """Render a ground-truth snapshot: latest OHLCV row, indicators, recent closes.""" + # `df` keeps the original capitalized OHLCV columns (Open/High/Low/Close/ + # Volume); stockstats `wrap()` lowercases columns and adds indicator + # columns, so read raw prices from `df` and indicators from `stock_df`. + df = _verified_rows(symbol, curr_date) + stock_df = wrap(df.copy()) + + selected = tuple(indicators or DEFAULT_SNAPSHOT_INDICATORS) + indicator_values: dict[str, str] = {} + for name in selected: + try: + stock_df[name] # triggers stockstats calculation + indicator_values[name] = _fmt(stock_df.iloc[-1][name]) + except Exception as exc: # noqa: BLE001 — one bad indicator shouldn't sink the snapshot + indicator_values[name] = f"N/A ({type(exc).__name__})" + + latest = df.iloc[-1] + latest_date = _fmt(latest["Date"]) + window = max(1, min(int(look_back_days), 30)) + recent = df.tail(window) + + lines = [ + f"## Verified market data snapshot for {symbol.upper()}", + "", + f"- Requested analysis date: {curr_date}", + f"- Latest trading row used: {latest_date}", + "- Rows after the requested analysis date are excluded before verification.", + "", + "### Latest verified OHLCV row", + "", + "| Field | Value |", + "|---|---:|", + ] + for field in ("Open", "High", "Low", "Close", "Volume"): + lines.append(f"| {field} | {_fmt(latest.get(field))} |") + + lines += ["", "### Verified technical indicators (latest row)", "", + "| Indicator | Value |", "|---|---:|"] + for name, value in indicator_values.items(): + lines.append(f"| {name} | {value} |") + + lines += ["", f"### Recent verified closes (last {len(recent)} rows)", "", + "| Date | Close |", "|---|---:|"] + for _, row in recent.iterrows(): + lines.append(f"| {_fmt(row['Date'])} | {_fmt(row.get('Close'))} |") + + lines += [ + "", + "Use this snapshot as the source of truth for exact OHLCV, price-level, " + "and indicator-value claims. If another tool output conflicts with it, " + "flag the discrepancy rather than inventing a reconciled number. Do not " + "claim historical validation, support/resistance bounces, or exact " + "percentage moves unless directly supported by tool output with concrete " + "dates and prices.", + ] + return "\n".join(lines)