mirror of
https://github.com/TauricResearch/TradingAgents.git
synced 2026-06-16 21:06:15 +03:00
fix(data): reject stale yfinance OHLCV instead of reporting wrong prices
yfinance intermittently returns a year-old partial frame (e.g. June 2025 rows for a June 2026 request) that still has rows and a Close, so it passed the empty-check and silently fed a wrong close price and indicators into the report (#1021). Add a freshness guard that rejects a frame whose latest row is far older than the requested date, on both the raw OHLCV path and the indicator path. It raises the existing NoMarketDataError with a stale-specific detail, so the vendor router's try-next-vendor and single unavailable-signal handling apply unchanged; the sentinel now surfaces that detail so the agent reports the specific reason rather than fabricating a value.
This commit is contained in:
113
tests/test_yfinance_stale_ohlcv_guard.py
Normal file
113
tests/test_yfinance_stale_ohlcv_guard.py
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
"""Stale OHLCV guard (#1021): a vendor returning a year-old partial frame must
|
||||||
|
be rejected, not fed into the report as if it were current.
|
||||||
|
|
||||||
|
The guard raises NoMarketDataError with a stale-specific detail, so the router's
|
||||||
|
existing try-next-vendor + single-sentinel handling applies and the sentinel
|
||||||
|
surfaces the reason.
|
||||||
|
"""
|
||||||
|
import copy
|
||||||
|
import unittest
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import tradingagents.dataflows.config as config_module
|
||||||
|
import tradingagents.dataflows.y_finance as y_finance
|
||||||
|
import tradingagents.default_config as default_config
|
||||||
|
from tradingagents.dataflows import interface
|
||||||
|
from tradingagents.dataflows.config import set_config
|
||||||
|
from tradingagents.dataflows.stockstats_utils import _assert_ohlcv_not_stale
|
||||||
|
from tradingagents.dataflows.symbol_utils import NoMarketDataError
|
||||||
|
|
||||||
|
|
||||||
|
def _frame(date):
|
||||||
|
return pd.DataFrame(
|
||||||
|
{
|
||||||
|
"Date": [pd.Timestamp(date)],
|
||||||
|
"Open": [330.0],
|
||||||
|
"High": [332.0],
|
||||||
|
"Low": [328.0],
|
||||||
|
"Close": [330.58],
|
||||||
|
"Volume": [1_000_000],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class StaleGuardUnitTests(unittest.TestCase):
|
||||||
|
def test_recent_prior_trading_day_is_accepted(self):
|
||||||
|
# 1 day before curr_date — well within the freshness window.
|
||||||
|
_assert_ohlcv_not_stale(_frame("2026-06-10"), "2026-06-11", "CB")
|
||||||
|
|
||||||
|
def test_year_old_row_is_rejected_with_detail(self):
|
||||||
|
with self.assertRaises(NoMarketDataError) as ctx:
|
||||||
|
_assert_ohlcv_not_stale(_frame("2025-06-11"), "2026-06-11", "CB", "CB")
|
||||||
|
msg = str(ctx.exception)
|
||||||
|
self.assertIn("2025-06-11", msg)
|
||||||
|
self.assertIn("2026-06-11", msg)
|
||||||
|
self.assertIn("stale", msg)
|
||||||
|
|
||||||
|
def test_empty_frame_is_left_to_caller(self):
|
||||||
|
# Empty is a no-data condition handled elsewhere, not a staleness one.
|
||||||
|
_assert_ohlcv_not_stale(
|
||||||
|
pd.DataFrame(columns=["Date", "Close"]), "2026-06-11", "X"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_long_holiday_gap_within_threshold_is_accepted(self):
|
||||||
|
_assert_ohlcv_not_stale(_frame("2026-06-02"), "2026-06-11", "X") # 9 days
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class StaleGuardPropagationTests(unittest.TestCase):
|
||||||
|
def test_get_yfin_data_online_raises_on_stale_frame(self):
|
||||||
|
stale = pd.DataFrame(
|
||||||
|
{
|
||||||
|
"Open": [280.0], "High": [286.0], "Low": [278.0],
|
||||||
|
"Close": [284.45], "Volume": [1_000_000],
|
||||||
|
},
|
||||||
|
index=pd.DatetimeIndex([pd.Timestamp("2025-06-11")], name="Date"),
|
||||||
|
)
|
||||||
|
|
||||||
|
class DummyTicker:
|
||||||
|
def __init__(self, symbol):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def history(self, start, end):
|
||||||
|
return stale
|
||||||
|
|
||||||
|
with mock.patch.object(y_finance.yf, "Ticker", DummyTicker), \
|
||||||
|
self.assertRaises(NoMarketDataError):
|
||||||
|
y_finance.get_YFin_data_online("CB", "2026-06-01", "2026-06-11")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class StaleGuardRoutingTests(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
config_module._config = copy.deepcopy(default_config.DEFAULT_CONFIG)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
config_module._config = copy.deepcopy(default_config.DEFAULT_CONFIG)
|
||||||
|
|
||||||
|
def test_router_sentinel_surfaces_stale_reason(self):
|
||||||
|
set_config({"data_vendors": {"core_stock_apis": "yfinance"}})
|
||||||
|
|
||||||
|
def _stale(symbol, *a, **k):
|
||||||
|
raise NoMarketDataError(
|
||||||
|
symbol, symbol, "latest row is 2025-06-11, 365 days before ... (stale)"
|
||||||
|
)
|
||||||
|
|
||||||
|
with mock.patch.dict(
|
||||||
|
interface.VENDOR_METHODS,
|
||||||
|
{"get_stock_data": {"yfinance": _stale}},
|
||||||
|
clear=False,
|
||||||
|
):
|
||||||
|
out = interface.route_to_vendor(
|
||||||
|
"get_stock_data", "CB", "2026-06-01", "2026-06-11"
|
||||||
|
)
|
||||||
|
self.assertIn("NO_DATA_AVAILABLE", out)
|
||||||
|
self.assertIn("stale", out) # the typed detail is surfaced to the agent
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -1,18 +1,24 @@
|
|||||||
import time
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from typing import Annotated
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import yfinance as yf
|
import yfinance as yf
|
||||||
from yfinance.exceptions import YFRateLimitError
|
|
||||||
from stockstats import wrap
|
from stockstats import wrap
|
||||||
from typing import Annotated
|
from yfinance.exceptions import YFRateLimitError
|
||||||
import os
|
|
||||||
from .config import get_config
|
from .config import get_config
|
||||||
|
from .symbol_utils import NoMarketDataError, normalize_symbol
|
||||||
from .utils import safe_ticker_component
|
from .utils import safe_ticker_component
|
||||||
from .symbol_utils import normalize_symbol, NoMarketDataError
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# A vendor's latest OHLCV row this many calendar days before the requested date
|
||||||
|
# is treated as stale. Generous enough to span long holiday weekends, tight
|
||||||
|
# enough to catch the year-old frames yfinance occasionally returns (#1021).
|
||||||
|
MAX_OHLCV_STALE_DAYS = 10
|
||||||
|
|
||||||
|
|
||||||
def yf_retry(func, max_retries=3, base_delay=2.0):
|
def yf_retry(func, max_retries=3, base_delay=2.0):
|
||||||
"""Execute a yfinance call with exponential backoff on rate limits.
|
"""Execute a yfinance call with exponential backoff on rate limits.
|
||||||
@@ -62,6 +68,60 @@ def _clean_dataframe(data: pd.DataFrame) -> pd.DataFrame:
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_ohlcv_dates(data: pd.DataFrame) -> pd.Series:
|
||||||
|
"""Return parsed dates from an OHLCV frame, whether Date is a column or the index."""
|
||||||
|
if "Date" in data.columns:
|
||||||
|
return pd.to_datetime(data["Date"], errors="coerce").dropna()
|
||||||
|
# yfinance keeps the dates in the index (a DatetimeIndex, sometimes unnamed).
|
||||||
|
if isinstance(data.index, pd.DatetimeIndex):
|
||||||
|
return pd.Series(pd.to_datetime(data.index, errors="coerce")).dropna()
|
||||||
|
# Fallback: expose the index and look for any date-like column.
|
||||||
|
df = data.reset_index()
|
||||||
|
for col in ("Date", "Datetime", "date", "index"):
|
||||||
|
if col in df.columns:
|
||||||
|
parsed = pd.to_datetime(df[col], errors="coerce").dropna()
|
||||||
|
if not parsed.empty:
|
||||||
|
return parsed
|
||||||
|
return pd.Series(dtype="datetime64[ns]")
|
||||||
|
|
||||||
|
|
||||||
|
def _assert_ohlcv_not_stale(
|
||||||
|
data: pd.DataFrame,
|
||||||
|
curr_date: str,
|
||||||
|
symbol: str,
|
||||||
|
canonical: str | None = None,
|
||||||
|
*,
|
||||||
|
max_stale_days: int = MAX_OHLCV_STALE_DAYS,
|
||||||
|
) -> None:
|
||||||
|
"""Reject OHLCV whose latest row is far older than curr_date.
|
||||||
|
|
||||||
|
Raises NoMarketDataError (with a stale-specific detail) so the router treats
|
||||||
|
it like any other "no usable data from this vendor" — try the next vendor,
|
||||||
|
then emit one clear unavailable signal. Empty frames are left to the
|
||||||
|
caller's existing no-data handling; this guards only the dangerous case of
|
||||||
|
present-but-stale rows (a vendor returning a year-old frame that would
|
||||||
|
otherwise feed wrong prices to the agent, #1021).
|
||||||
|
"""
|
||||||
|
if data is None or data.empty:
|
||||||
|
return
|
||||||
|
requested = pd.to_datetime(curr_date, errors="coerce")
|
||||||
|
if pd.isna(requested):
|
||||||
|
return
|
||||||
|
requested = requested.normalize()
|
||||||
|
dates = _coerce_ohlcv_dates(data)
|
||||||
|
if dates.empty:
|
||||||
|
return
|
||||||
|
latest = dates.max().normalize()
|
||||||
|
stale_days = (requested - latest).days
|
||||||
|
if stale_days > max_stale_days:
|
||||||
|
raise NoMarketDataError(
|
||||||
|
symbol,
|
||||||
|
canonical,
|
||||||
|
f"latest row is {latest.date()}, {stale_days} days before the "
|
||||||
|
f"requested {requested.date()} (stale) — refusing to use it",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
|
def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
|
||||||
"""Fetch OHLCV data with caching, filtered to prevent look-ahead bias.
|
"""Fetch OHLCV data with caching, filtered to prevent look-ahead bias.
|
||||||
|
|
||||||
@@ -125,6 +185,10 @@ def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
|
|||||||
# Filter to curr_date to prevent look-ahead bias in backtesting
|
# Filter to curr_date to prevent look-ahead bias in backtesting
|
||||||
data = data[data["Date"] <= curr_date_dt]
|
data = data[data["Date"] <= curr_date_dt]
|
||||||
|
|
||||||
|
# Reject a stale frame (latest row far older than curr_date) rather than
|
||||||
|
# feeding year-old prices into indicators (#1021).
|
||||||
|
_assert_ohlcv_not_stale(data, curr_date, symbol, canonical)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,19 @@
|
|||||||
from typing import Annotated
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from dateutil.relativedelta import relativedelta
|
from typing import Annotated
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import yfinance as yf
|
import yfinance as yf
|
||||||
import os
|
from dateutil.relativedelta import relativedelta
|
||||||
from .stockstats_utils import StockstatsUtils, _clean_dataframe, yf_retry, load_ohlcv, filter_financials_by_date
|
|
||||||
from .symbol_utils import normalize_symbol, NoMarketDataError
|
from .stockstats_utils import (
|
||||||
|
StockstatsUtils,
|
||||||
|
_assert_ohlcv_not_stale,
|
||||||
|
filter_financials_by_date,
|
||||||
|
load_ohlcv,
|
||||||
|
yf_retry,
|
||||||
|
)
|
||||||
|
from .symbol_utils import NoMarketDataError, normalize_symbol
|
||||||
|
|
||||||
|
|
||||||
def get_YFin_data_online(
|
def get_YFin_data_online(
|
||||||
symbol: Annotated[str, "ticker symbol of the company"],
|
symbol: Annotated[str, "ticker symbol of the company"],
|
||||||
@@ -38,6 +46,11 @@ def get_YFin_data_online(
|
|||||||
if data.index.tz is not None:
|
if data.index.tz is not None:
|
||||||
data.index = data.index.tz_localize(None)
|
data.index = data.index.tz_localize(None)
|
||||||
|
|
||||||
|
# Reject a stale frame (e.g. a year-old partial response) before it is
|
||||||
|
# formatted into the report. Raises NoMarketDataError, which the router
|
||||||
|
# turns into one clear unavailable signal (#1021).
|
||||||
|
_assert_ohlcv_not_stale(data, end_date, symbol, canonical)
|
||||||
|
|
||||||
# Round numerical values to 2 decimal places for cleaner display
|
# Round numerical values to 2 decimal places for cleaner display
|
||||||
numeric_columns = ["Open", "High", "Low", "Close", "Adj Close"]
|
numeric_columns = ["Open", "High", "Low", "Close", "Adj Close"]
|
||||||
for col in numeric_columns:
|
for col in numeric_columns:
|
||||||
|
|||||||
Reference in New Issue
Block a user