fix(data): include the requested end date in yfinance fetches

yfinance treats end as exclusive, so get_YFin_data_online dropped the requested
end_date row and load_ohlcv dropped the current day. Request one day past the
end so the range is inclusive (look-ahead is still prevented by the curr_date
filter; the header still shows the requested range). Also correct the load_ohlcv
docstring to the 5-year window it actually downloads.
This commit is contained in:
Yijia-Xiao
2026-06-13 21:30:11 +00:00
parent 65608831f8
commit dab07688fb
3 changed files with 73 additions and 6 deletions

View File

@@ -65,7 +65,7 @@ def _clean_dataframe(data: pd.DataFrame) -> pd.DataFrame:
def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
"""Fetch OHLCV data with caching, filtered to prevent look-ahead bias.
Downloads 15 years of data up to today and caches per symbol. On
Downloads 5 years of data up to today and caches per symbol. On
subsequent calls the cache is reused. Rows after curr_date are
filtered out so backtests never see future prices.
"""
@@ -78,11 +78,14 @@ def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
config = get_config()
curr_date_dt = pd.to_datetime(curr_date)
# Cache uses a fixed window (15y to today) so one file per symbol
# Cache uses a fixed window (5y to today) so one file per symbol.
today_date = pd.Timestamp.today()
start_date = today_date - pd.DateOffset(years=5)
start_str = start_date.strftime("%Y-%m-%d")
end_str = today_date.strftime("%Y-%m-%d")
# yfinance ``end`` is EXCLUSIVE; request tomorrow so today's row is included
# when curr_date is the current day (#986). Look-ahead is still prevented by
# the curr_date filter below.
end_str = (today_date + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
os.makedirs(config["data_cache_dir"], exist_ok=True)
data_file = os.path.join(

View File

@@ -14,14 +14,17 @@ def get_YFin_data_online(
):
datetime.strptime(start_date, "%Y-%m-%d")
datetime.strptime(end_date, "%Y-%m-%d")
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
# Resolve broker/forex symbols to Yahoo's convention (XAUUSD+ -> GC=F).
canonical = normalize_symbol(symbol)
ticker = yf.Ticker(canonical)
# Fetch historical data for the specified date range
data = yf_retry(lambda: ticker.history(start=start_date, end=end_date))
# yfinance treats ``end`` as EXCLUSIVE, so it would drop the requested
# end_date row (and the current day when end_date is today). Request one day
# past end_date so the requested range is actually inclusive (#986/#987).
end_inclusive = (end_dt + relativedelta(days=1)).strftime("%Y-%m-%d")
data = yf_retry(lambda: ticker.history(start=start_date, end=end_inclusive))
# Empty result means the symbol is unknown/delisted. Raise a typed error
# instead of returning prose: the routing layer turns it into a single