feat(market): verified market-data snapshot to ground numeric claims

The market analyst could confabulate exact figures — citing a Bollinger
band or a "historically validated bounce" the data doesn't support (#830).
Add a deterministic get_verified_market_snapshot tool (latest OHLCV row,
common indicators, recent closes) the analyst must consult and treat as
the source of truth for any exact price/indicator claim, and instruct it
not to assert historical validation or support bounces without tool-backed
dates and prices.

#830
This commit is contained in:
Yijia-Xiao
2026-05-31 01:58:32 +00:00
parent e80636fc0e
commit 47cbb321fe
5 changed files with 232 additions and 1 deletions

View File

@@ -0,0 +1,76 @@
"""Tests for the deterministic market-data verification snapshot (#830/#881)."""
from __future__ import annotations
import pandas as pd
import pytest
import tradingagents.dataflows.market_data_validator as validator
def _sample_ohlcv() -> pd.DataFrame:
dates = pd.bdate_range("2026-04-01", "2026-05-20")
closes = [100 + i for i in range(len(dates))]
return pd.DataFrame({
"Date": dates,
"Open": [c - 0.5 for c in closes],
"High": [c + 1.0 for c in closes],
"Low": [c - 1.0 for c in closes],
"Close": closes,
"Volume": [1_000_000 + i for i in range(len(dates))],
})
@pytest.mark.unit
class TestVerifiedSnapshot:
def test_excludes_future_rows(self, monkeypatch):
data = pd.concat([
_sample_ohlcv(),
pd.DataFrame({"Date": [pd.Timestamp("2026-06-01")], "Open": [999.0],
"High": [999.0], "Low": [999.0], "Close": [999.0], "Volume": [999]}),
], ignore_index=True)
monkeypatch.setattr(validator, "load_ohlcv", lambda s, d: data)
snap = validator.build_verified_market_snapshot("COF", "2026-05-13")
assert "Verified market data snapshot for COF" in snap
assert "Requested analysis date: 2026-05-13" in snap
assert "Latest trading row used: 2026-05-13" in snap
assert "999.00" not in snap # future row excluded
assert "boll_lb" in snap # indicators present
def test_uses_previous_trading_day_when_date_is_weekend(self, monkeypatch):
monkeypatch.setattr(validator, "load_ohlcv", lambda s, d: _sample_ohlcv())
# 2026-05-16 is a Saturday; latest row should be Fri 2026-05-15
snap = validator.build_verified_market_snapshot("COF", "2026-05-16")
assert "Latest trading row used: 2026-05-15" in snap
assert "Recent verified closes" in snap
def test_raises_when_no_rows_on_or_before_date(self, monkeypatch):
monkeypatch.setattr(validator, "load_ohlcv", lambda s, d: _sample_ohlcv())
with pytest.raises(ValueError):
validator.build_verified_market_snapshot("COF", "2020-01-01")
def test_raises_on_empty_data(self, monkeypatch):
monkeypatch.setattr(validator, "load_ohlcv", lambda s, d: pd.DataFrame())
with pytest.raises(ValueError):
validator.build_verified_market_snapshot("COF", "2026-05-13")
def test_look_back_window_capped_at_30(self, monkeypatch):
monkeypatch.setattr(validator, "load_ohlcv", lambda s, d: _sample_ohlcv())
snap = validator.build_verified_market_snapshot("COF", "2026-05-20", look_back_days=999)
# last-N closes table has at most 30 data rows
close_rows = [ln for ln in snap.splitlines() if ln.startswith("| 2026-")]
assert 0 < len(close_rows) <= 30
@pytest.mark.unit
class TestTool:
def test_tool_delegates_to_builder(self, monkeypatch):
from tradingagents.agents.utils.market_data_validation_tools import (
get_verified_market_snapshot,
)
monkeypatch.setattr(validator, "load_ohlcv", lambda s, d: _sample_ohlcv())
out = get_verified_market_snapshot.invoke(
{"symbol": "COF", "curr_date": "2026-05-20"}
)
assert "Verified market data snapshot for COF" in out

View File

@@ -4,6 +4,7 @@ from tradingagents.agents.utils.agent_utils import (
get_indicators,
get_language_instruction,
get_stock_data,
get_verified_market_snapshot,
)
from tradingagents.dataflows.config import get_config
@@ -17,6 +18,7 @@ def create_market_analyst(llm):
tools = [
get_stock_data,
get_indicators,
get_verified_market_snapshot,
]
system_message = (
@@ -44,7 +46,11 @@ Volatility Indicators:
Volume-Based Indicators:
- vwma: VWMA: A moving average weighted by volume. Usage: Confirm trends by integrating price action with volume data. Tips: Watch for skewed results from volume spikes; use in combination with other volume analyses.
- Select indicators that provide diverse and complementary information. Avoid redundancy (e.g., do not select both rsi and stochrsi). Also briefly explain why they are suitable for the given market context. When you tool call, please use the exact name of the indicators provided above as they are defined parameters, otherwise your call will fail. Please make sure to call get_stock_data first to retrieve the CSV that is needed to generate indicators. Then use get_indicators with the specific indicator names. Write a very detailed and nuanced report of the trends you observe. Provide specific, actionable insights with supporting evidence to help traders make informed decisions."""
- Select indicators that provide diverse and complementary information. Avoid redundancy (e.g., do not select both rsi and stochrsi). Also briefly explain why they are suitable for the given market context. When you tool call, please use the exact name of the indicators provided above as they are defined parameters, otherwise your call will fail. Please make sure to call get_stock_data first to retrieve the CSV that is needed to generate indicators. Then use get_indicators with the specific indicator names.
Before writing the final report, call get_verified_market_snapshot for this ticker and the current date, and treat it as the source of truth for any exact OHLCV, price-level, or indicator-value claim. If another tool's output conflicts with the verified snapshot, flag the discrepancy rather than inventing a reconciled number. Do not claim historical validation, support/resistance bounces, or exact percentage moves unless they are directly supported by tool output with concrete dates and prices.
Write a very detailed and nuanced report of the trends you observe. Provide specific, actionable insights with supporting evidence to help traders make informed decisions."""
+ """ Make sure to append a Markdown table at the end of the report to organize key points in the report, organized and easy to read."""
+ get_language_instruction()
)

View File

@@ -23,6 +23,9 @@ from tradingagents.agents.utils.news_data_tools import (
get_insider_transactions,
get_global_news
)
from tradingagents.agents.utils.market_data_validation_tools import (
get_verified_market_snapshot
)
logger = logging.getLogger(__name__)

View File

@@ -0,0 +1,23 @@
from typing import Annotated
from langchain_core.tools import tool
from tradingagents.dataflows.market_data_validator import build_verified_market_snapshot
@tool
def get_verified_market_snapshot(
symbol: Annotated[str, "ticker symbol of the company"],
curr_date: Annotated[str, "the current trading date, YYYY-mm-dd"],
look_back_days: Annotated[
int, "number of recent trading rows to include for sanity-checking"
] = 30,
) -> str:
"""Deterministic verification snapshot for exact market-data claims.
Returns the latest OHLCV row on or before curr_date, common technical
indicators, and recent closes. Call this before making exact claims about
price levels, Bollinger bands, RSI, MACD, moving averages, support /
resistance, or historical comparisons, and treat it as the source of truth.
"""
return build_verified_market_snapshot(symbol, curr_date, look_back_days)

View File

@@ -0,0 +1,123 @@
"""Deterministic market-data verification snapshot.
The market analyst is an LLM that can confabulate exact numbers — citing a
Bollinger band or a "historically validated bounce" that the underlying data
doesn't support (#830). This module computes a ground-truth snapshot (latest
OHLCV row on or before the analysis date, common indicators, recent closes)
the analyst is told to treat as the source of truth for any exact numeric
claim. Deterministic, no LLM involved.
"""
from __future__ import annotations
from typing import Iterable, Optional
import pandas as pd
from stockstats import wrap
from tradingagents.dataflows.stockstats_utils import load_ohlcv
# A fixed, common indicator set so the snapshot is the same shape every run.
DEFAULT_SNAPSHOT_INDICATORS: tuple[str, ...] = (
"close_10_ema", "close_50_sma", "close_200_sma",
"rsi", "boll", "boll_ub", "boll_lb",
"macd", "macds", "macdh", "atr",
)
def _verified_rows(symbol: str, curr_date: str) -> pd.DataFrame:
"""OHLCV on or before curr_date, date-sorted. Raises if nothing usable.
``load_ohlcv`` already normalizes the Date column and filters out
look-ahead rows, but we re-apply the cutoff defensively — this is a
verification path, so it must not trust its input to be pre-filtered.
"""
data = load_ohlcv(symbol, curr_date)
if data is None or data.empty:
raise ValueError(f"No OHLCV data available for {symbol}.")
df = data.copy()
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
df = df.dropna(subset=["Date"])
df = df[df["Date"] <= pd.to_datetime(curr_date)].sort_values("Date")
if df.empty:
raise ValueError(f"No OHLCV rows on or before {curr_date} for {symbol}.")
return df
def _fmt(value) -> str:
if value is None or pd.isna(value):
return "N/A"
if isinstance(value, pd.Timestamp):
return value.strftime("%Y-%m-%d")
if isinstance(value, bool):
return str(value)
if isinstance(value, (int,)):
return str(value)
if isinstance(value, float):
return f"{value:.2f}"
return str(value)
def build_verified_market_snapshot(
symbol: str,
curr_date: str,
look_back_days: int = 30,
indicators: Optional[Iterable[str]] = None,
) -> str:
"""Render a ground-truth snapshot: latest OHLCV row, indicators, recent closes."""
# `df` keeps the original capitalized OHLCV columns (Open/High/Low/Close/
# Volume); stockstats `wrap()` lowercases columns and adds indicator
# columns, so read raw prices from `df` and indicators from `stock_df`.
df = _verified_rows(symbol, curr_date)
stock_df = wrap(df.copy())
selected = tuple(indicators or DEFAULT_SNAPSHOT_INDICATORS)
indicator_values: dict[str, str] = {}
for name in selected:
try:
stock_df[name] # triggers stockstats calculation
indicator_values[name] = _fmt(stock_df.iloc[-1][name])
except Exception as exc: # noqa: BLE001 — one bad indicator shouldn't sink the snapshot
indicator_values[name] = f"N/A ({type(exc).__name__})"
latest = df.iloc[-1]
latest_date = _fmt(latest["Date"])
window = max(1, min(int(look_back_days), 30))
recent = df.tail(window)
lines = [
f"## Verified market data snapshot for {symbol.upper()}",
"",
f"- Requested analysis date: {curr_date}",
f"- Latest trading row used: {latest_date}",
"- Rows after the requested analysis date are excluded before verification.",
"",
"### Latest verified OHLCV row",
"",
"| Field | Value |",
"|---|---:|",
]
for field in ("Open", "High", "Low", "Close", "Volume"):
lines.append(f"| {field} | {_fmt(latest.get(field))} |")
lines += ["", "### Verified technical indicators (latest row)", "",
"| Indicator | Value |", "|---|---:|"]
for name, value in indicator_values.items():
lines.append(f"| {name} | {value} |")
lines += ["", f"### Recent verified closes (last {len(recent)} rows)", "",
"| Date | Close |", "|---|---:|"]
for _, row in recent.iterrows():
lines.append(f"| {_fmt(row['Date'])} | {_fmt(row.get('Close'))} |")
lines += [
"",
"Use this snapshot as the source of truth for exact OHLCV, price-level, "
"and indicator-value claims. If another tool output conflicts with it, "
"flag the discrepancy rather than inventing a reconciled number. Do not "
"claim historical validation, support/resistance bounces, or exact "
"percentage moves unless directly supported by tool output with concrete "
"dates and prices.",
]
return "\n".join(lines)