fix(deps): require yfinance >=1.4.1 and tolerate non-Date index column

yfinance 1.4.0 regressed the daily-download index to unnamed, so
reset_index() produced an "index" column instead of "Date" and every
stockstats indicator silently failed (no SMA/RSI/MACD/Bollinger/ATR).
Verified across versions: 1.2.0 / 1.3.0 / 1.4.1 name it "Date"; only
1.4.0 is broken. Pin to >=1.4.1 (the upstream fix) and normalize the
date column defensively so a non-"Date" index can't silently drop
indicators on any build.

#890
This commit is contained in:
Yijia-Xiao
2026-05-31 00:51:30 +00:00
parent 3543e5397e
commit a66aa8fb94
3 changed files with 88 additions and 2 deletions

View File

@@ -29,7 +29,7 @@ dependencies = [
"stockstats>=0.6.5", "stockstats>=0.6.5",
"tqdm>=4.67.1", "tqdm>=4.67.1",
"typing-extensions>=4.14.0", "typing-extensions>=4.14.0",
"yfinance>=0.2.63", "yfinance>=1.4.1",
] ]
[project.scripts] [project.scripts]

View File

@@ -0,0 +1,70 @@
"""Tests for tolerating a non-`Date` index column in stockstats_utils (#890).
Guards against a download frame whose date column is `index` or `Datetime`
instead of `Date`, which would otherwise silently drop every indicator.
"""
from __future__ import annotations
import pandas as pd
import pytest
from tradingagents.dataflows import stockstats_utils as su
def _ohlcv(date_col: str) -> pd.DataFrame:
"""OHLCV frame whose date column is named `date_col`."""
dates = pd.bdate_range("2026-04-01", periods=10)
return pd.DataFrame({
date_col: dates,
"Open": [100.0 + i for i in range(10)],
"High": [101.0 + i for i in range(10)],
"Low": [99.0 + i for i in range(10)],
"Close": [100.5 + i for i in range(10)],
"Volume": [1_000_000 + i for i in range(10)],
})
@pytest.mark.unit
class TestEnsureDateColumn:
def test_renames_index_column(self):
out = su._ensure_date_column(_ohlcv("index"))
assert "Date" in out.columns and "index" not in out.columns
def test_renames_datetime_and_date_variants(self):
assert "Date" in su._ensure_date_column(_ohlcv("Datetime")).columns
assert "Date" in su._ensure_date_column(_ohlcv("date")).columns
def test_leaves_existing_date_untouched(self):
df = _ohlcv("Date")
assert su._ensure_date_column(df) is df # no-op short-circuit
def test_no_datelike_column_is_left_alone(self):
df = pd.DataFrame({"Close": [1, 2, 3]})
out = su._ensure_date_column(df)
assert "Date" not in out.columns # nothing to rename; caller handles
@pytest.mark.unit
class TestCleanDataframeAcrossVersions:
def test_clean_handles_index_column(self):
"""A frame with `index` instead of `Date` must still clean to a
usable, date-parsed frame (was KeyError: 'Date')."""
cleaned = su._clean_dataframe(_ohlcv("index"))
assert "Date" in cleaned.columns
assert pd.api.types.is_datetime64_any_dtype(cleaned["Date"])
assert len(cleaned) == 10
def test_clean_handles_legacy_date_column(self):
cleaned = su._clean_dataframe(_ohlcv("Date"))
assert len(cleaned) == 10
def test_indicators_compute_after_index_rename(self):
"""stockstats must compute indicators on a frame whose date column
arrived as `index`, instead of erroring per indicator."""
from stockstats import wrap
cleaned = su._clean_dataframe(_ohlcv("index"))
df = wrap(cleaned)
df["close_5_sma"] # triggers calculation
assert "close_5_sma" in df.columns
assert df["close_5_sma"].notna().any()

View File

@@ -32,8 +32,24 @@ def yf_retry(func, max_retries=3, base_delay=2.0):
raise raise
def _ensure_date_column(data: pd.DataFrame) -> pd.DataFrame:
"""Normalize the date column to ``Date``.
Some yfinance builds leave the index unnamed (so ``reset_index()`` yields
``index``) or use ``Datetime`` for intraday data. Rename the first
date-like column so indicators don't silently drop when it isn't ``Date``.
"""
if "Date" in data.columns:
return data
for candidate in ("index", "Datetime", "date"):
if candidate in data.columns:
return data.rename(columns={candidate: "Date"})
return data
def _clean_dataframe(data: pd.DataFrame) -> pd.DataFrame: def _clean_dataframe(data: pd.DataFrame) -> pd.DataFrame:
"""Normalize a stock DataFrame for stockstats: parse dates, drop invalid rows, fill price gaps.""" """Normalize a stock DataFrame for stockstats: parse dates, drop invalid rows, fill price gaps."""
data = _ensure_date_column(data)
data["Date"] = pd.to_datetime(data["Date"], errors="coerce") data["Date"] = pd.to_datetime(data["Date"], errors="coerce")
data = data.dropna(subset=["Date"]) data = data.dropna(subset=["Date"])
@@ -82,7 +98,7 @@ def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
progress=False, progress=False,
auto_adjust=True, auto_adjust=True,
)) ))
data = data.reset_index() data = _ensure_date_column(data.reset_index())
data.to_csv(data_file, index=False, encoding="utf-8") data.to_csv(data_file, index=False, encoding="utf-8")
data = _clean_dataframe(data) data = _clean_dataframe(data)