From dab07688fba33c33c3e7d874ec554cf3201caa8d Mon Sep 17 00:00:00 2001 From: Yijia-Xiao Date: Sat, 13 Jun 2026 21:30:11 +0000 Subject: [PATCH] fix(data): include the requested end date in yfinance fetches yfinance treats end as exclusive, so get_YFin_data_online dropped the requested end_date row and load_ohlcv dropped the current day. Request one day past the end so the range is inclusive (look-ahead is still prevented by the curr_date filter; the header still shows the requested range). Also correct the load_ohlcv docstring to the 5-year window it actually downloads. --- tests/test_date_boundaries.py | 61 +++++++++++++++++++++ tradingagents/dataflows/stockstats_utils.py | 9 ++- tradingagents/dataflows/y_finance.py | 9 ++- 3 files changed, 73 insertions(+), 6 deletions(-) create mode 100644 tests/test_date_boundaries.py diff --git a/tests/test_date_boundaries.py b/tests/test_date_boundaries.py new file mode 100644 index 000000000..263a8cbb6 --- /dev/null +++ b/tests/test_date_boundaries.py @@ -0,0 +1,61 @@ +"""yfinance treats ``end`` as exclusive; we must request one extra day so the +requested end_date (and the current day) is actually included. + +Regressions for #986 (current-day OHLCV excluded) and #987 (requested end_date +row omitted). +""" +import pandas as pd +import pytest + +import tradingagents.dataflows.stockstats_utils as su +import tradingagents.dataflows.y_finance as yfin +from tradingagents.dataflows.config import set_config + + +@pytest.mark.unit +def test_get_yfin_requests_inclusive_end(monkeypatch): + captured = {} + + class FakeTicker: + def __init__(self, symbol): + pass + + def history(self, start, end): + captured["start"] = start + captured["end"] = end + idx = pd.to_datetime(["2025-05-08", "2025-05-09"]) + return pd.DataFrame( + {"Open": [1.0, 2.0], "High": [1.0, 2.0], "Low": [1.0, 2.0], + "Close": [1.0, 2.0], "Volume": [1, 2]}, + index=idx, + ) + + monkeypatch.setattr(yfin.yf, "Ticker", FakeTicker) + out = yfin.get_YFin_data_online("AAPL", "2025-05-01", "2025-05-09") + + # end is requested one day past end_date so 2025-05-09 is included (#987). + assert captured["end"] == "2025-05-10" + # Header still reflects the requested range, not the internal +1 day. + assert "to 2025-05-09" in out + + +@pytest.mark.unit +def test_load_ohlcv_requests_inclusive_end(monkeypatch, tmp_path): + set_config({"data_cache_dir": str(tmp_path)}) + captured = {} + + def fake_download(symbol, start, end, **kwargs): + captured["end"] = end + idx = pd.to_datetime([pd.Timestamp.today().normalize()]) + return pd.DataFrame( + {"Open": [100.0], "High": [100.0], "Low": [100.0], + "Close": [100.0], "Volume": [1]}, + index=idx, + ) + + monkeypatch.setattr(su.yf, "download", fake_download) + today = pd.Timestamp.today().strftime("%Y-%m-%d") + su.load_ohlcv("AAPL", today) + + expected_end = (pd.Timestamp.today() + pd.Timedelta(days=1)).strftime("%Y-%m-%d") + assert captured["end"] == expected_end # tomorrow -> today's row included (#986) diff --git a/tradingagents/dataflows/stockstats_utils.py b/tradingagents/dataflows/stockstats_utils.py index 47661e811..585e581ea 100644 --- a/tradingagents/dataflows/stockstats_utils.py +++ b/tradingagents/dataflows/stockstats_utils.py @@ -65,7 +65,7 @@ def _clean_dataframe(data: pd.DataFrame) -> pd.DataFrame: def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame: """Fetch OHLCV data with caching, filtered to prevent look-ahead bias. - Downloads 15 years of data up to today and caches per symbol. On + Downloads 5 years of data up to today and caches per symbol. On subsequent calls the cache is reused. Rows after curr_date are filtered out so backtests never see future prices. """ @@ -78,11 +78,14 @@ def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame: config = get_config() curr_date_dt = pd.to_datetime(curr_date) - # Cache uses a fixed window (15y to today) so one file per symbol + # Cache uses a fixed window (5y to today) so one file per symbol. today_date = pd.Timestamp.today() start_date = today_date - pd.DateOffset(years=5) start_str = start_date.strftime("%Y-%m-%d") - end_str = today_date.strftime("%Y-%m-%d") + # yfinance ``end`` is EXCLUSIVE; request tomorrow so today's row is included + # when curr_date is the current day (#986). Look-ahead is still prevented by + # the curr_date filter below. + end_str = (today_date + pd.Timedelta(days=1)).strftime("%Y-%m-%d") os.makedirs(config["data_cache_dir"], exist_ok=True) data_file = os.path.join( diff --git a/tradingagents/dataflows/y_finance.py b/tradingagents/dataflows/y_finance.py index f2f77cb3f..5e0cea865 100644 --- a/tradingagents/dataflows/y_finance.py +++ b/tradingagents/dataflows/y_finance.py @@ -14,14 +14,17 @@ def get_YFin_data_online( ): datetime.strptime(start_date, "%Y-%m-%d") - datetime.strptime(end_date, "%Y-%m-%d") + end_dt = datetime.strptime(end_date, "%Y-%m-%d") # Resolve broker/forex symbols to Yahoo's convention (XAUUSD+ -> GC=F). canonical = normalize_symbol(symbol) ticker = yf.Ticker(canonical) - # Fetch historical data for the specified date range - data = yf_retry(lambda: ticker.history(start=start_date, end=end_date)) + # yfinance treats ``end`` as EXCLUSIVE, so it would drop the requested + # end_date row (and the current day when end_date is today). Request one day + # past end_date so the requested range is actually inclusive (#986/#987). + end_inclusive = (end_dt + relativedelta(days=1)).strftime("%Y-%m-%d") + data = yf_retry(lambda: ticker.history(start=start_date, end=end_inclusive)) # Empty result means the symbol is unknown/delisted. Raise a typed error # instead of returning prose: the routing layer turns it into a single