mirror of
https://github.com/TauricResearch/TradingAgents.git
synced 2026-06-16 21:06:15 +03:00
fix(data): include the requested end date in yfinance fetches
yfinance treats end as exclusive, so get_YFin_data_online dropped the requested end_date row and load_ohlcv dropped the current day. Request one day past the end so the range is inclusive (look-ahead is still prevented by the curr_date filter; the header still shows the requested range). Also correct the load_ohlcv docstring to the 5-year window it actually downloads.
This commit is contained in:
61
tests/test_date_boundaries.py
Normal file
61
tests/test_date_boundaries.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""yfinance treats ``end`` as exclusive; we must request one extra day so the
|
||||
requested end_date (and the current day) is actually included.
|
||||
|
||||
Regressions for #986 (current-day OHLCV excluded) and #987 (requested end_date
|
||||
row omitted).
|
||||
"""
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
import tradingagents.dataflows.stockstats_utils as su
|
||||
import tradingagents.dataflows.y_finance as yfin
|
||||
from tradingagents.dataflows.config import set_config
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_get_yfin_requests_inclusive_end(monkeypatch):
|
||||
captured = {}
|
||||
|
||||
class FakeTicker:
|
||||
def __init__(self, symbol):
|
||||
pass
|
||||
|
||||
def history(self, start, end):
|
||||
captured["start"] = start
|
||||
captured["end"] = end
|
||||
idx = pd.to_datetime(["2025-05-08", "2025-05-09"])
|
||||
return pd.DataFrame(
|
||||
{"Open": [1.0, 2.0], "High": [1.0, 2.0], "Low": [1.0, 2.0],
|
||||
"Close": [1.0, 2.0], "Volume": [1, 2]},
|
||||
index=idx,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(yfin.yf, "Ticker", FakeTicker)
|
||||
out = yfin.get_YFin_data_online("AAPL", "2025-05-01", "2025-05-09")
|
||||
|
||||
# end is requested one day past end_date so 2025-05-09 is included (#987).
|
||||
assert captured["end"] == "2025-05-10"
|
||||
# Header still reflects the requested range, not the internal +1 day.
|
||||
assert "to 2025-05-09" in out
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_load_ohlcv_requests_inclusive_end(monkeypatch, tmp_path):
|
||||
set_config({"data_cache_dir": str(tmp_path)})
|
||||
captured = {}
|
||||
|
||||
def fake_download(symbol, start, end, **kwargs):
|
||||
captured["end"] = end
|
||||
idx = pd.to_datetime([pd.Timestamp.today().normalize()])
|
||||
return pd.DataFrame(
|
||||
{"Open": [100.0], "High": [100.0], "Low": [100.0],
|
||||
"Close": [100.0], "Volume": [1]},
|
||||
index=idx,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(su.yf, "download", fake_download)
|
||||
today = pd.Timestamp.today().strftime("%Y-%m-%d")
|
||||
su.load_ohlcv("AAPL", today)
|
||||
|
||||
expected_end = (pd.Timestamp.today() + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
assert captured["end"] == expected_end # tomorrow -> today's row included (#986)
|
||||
@@ -65,7 +65,7 @@ def _clean_dataframe(data: pd.DataFrame) -> pd.DataFrame:
|
||||
def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
|
||||
"""Fetch OHLCV data with caching, filtered to prevent look-ahead bias.
|
||||
|
||||
Downloads 15 years of data up to today and caches per symbol. On
|
||||
Downloads 5 years of data up to today and caches per symbol. On
|
||||
subsequent calls the cache is reused. Rows after curr_date are
|
||||
filtered out so backtests never see future prices.
|
||||
"""
|
||||
@@ -78,11 +78,14 @@ def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
|
||||
config = get_config()
|
||||
curr_date_dt = pd.to_datetime(curr_date)
|
||||
|
||||
# Cache uses a fixed window (15y to today) so one file per symbol
|
||||
# Cache uses a fixed window (5y to today) so one file per symbol.
|
||||
today_date = pd.Timestamp.today()
|
||||
start_date = today_date - pd.DateOffset(years=5)
|
||||
start_str = start_date.strftime("%Y-%m-%d")
|
||||
end_str = today_date.strftime("%Y-%m-%d")
|
||||
# yfinance ``end`` is EXCLUSIVE; request tomorrow so today's row is included
|
||||
# when curr_date is the current day (#986). Look-ahead is still prevented by
|
||||
# the curr_date filter below.
|
||||
end_str = (today_date + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
|
||||
os.makedirs(config["data_cache_dir"], exist_ok=True)
|
||||
data_file = os.path.join(
|
||||
|
||||
@@ -14,14 +14,17 @@ def get_YFin_data_online(
|
||||
):
|
||||
|
||||
datetime.strptime(start_date, "%Y-%m-%d")
|
||||
datetime.strptime(end_date, "%Y-%m-%d")
|
||||
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
|
||||
|
||||
# Resolve broker/forex symbols to Yahoo's convention (XAUUSD+ -> GC=F).
|
||||
canonical = normalize_symbol(symbol)
|
||||
ticker = yf.Ticker(canonical)
|
||||
|
||||
# Fetch historical data for the specified date range
|
||||
data = yf_retry(lambda: ticker.history(start=start_date, end=end_date))
|
||||
# yfinance treats ``end`` as EXCLUSIVE, so it would drop the requested
|
||||
# end_date row (and the current day when end_date is today). Request one day
|
||||
# past end_date so the requested range is actually inclusive (#986/#987).
|
||||
end_inclusive = (end_dt + relativedelta(days=1)).strftime("%Y-%m-%d")
|
||||
data = yf_retry(lambda: ticker.history(start=start_date, end=end_inclusive))
|
||||
|
||||
# Empty result means the symbol is unknown/delisted. Raise a typed error
|
||||
# instead of returning prose: the routing layer turns it into a single
|
||||
|
||||
Reference in New Issue
Block a user