fix(deps): require yfinance >=1.4.1 and tolerate non-Date index column

yfinance 1.4.0 regressed the daily-download index to unnamed, so reset_index() produced an "index" column instead of "Date" and every stockstats indicator silently failed (no SMA/RSI/MACD/Bollinger/ATR). Verified across versions: 1.2.0 / 1.3.0 / 1.4.1 name it "Date"; only 1.4.0 is broken. Pin to >=1.4.1 (the upstream fix) and normalize the date column defensively so a non-"Date" index can't silently drop indicators on any build. #890
2026-08-01 19:34:24 +03:00 · 2026-05-31 00:51:30 +00:00
parent 3543e5397e
commit a66aa8fb94
3 changed files with 88 additions and 2 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ dependencies = [
    "stockstats>=0.6.5",
    "tqdm>=4.67.1",
    "typing-extensions>=4.14.0",
-    "yfinance>=0.2.63",
+    "yfinance>=1.4.1",
 ]
 [project.scripts]
--- a/tests/test_stockstats_date_column.py
+++ b/tests/test_stockstats_date_column.py
@@ -0,0 +1,70 @@
 """Tests for tolerating a non-`Date` index column in stockstats_utils (#890).
 Guards against a download frame whose date column is `index` or `Datetime`
 instead of `Date`, which would otherwise silently drop every indicator.
 """
 from __future__ import annotations
 import pandas as pd
 import pytest
 from tradingagents.dataflows import stockstats_utils as su
 def _ohlcv(date_col: str) -> pd.DataFrame:
    """OHLCV frame whose date column is named `date_col`."""
    dates = pd.bdate_range("2026-04-01", periods=10)
    return pd.DataFrame({
        date_col: dates,
        "Open": [100.0 + i for i in range(10)],
        "High": [101.0 + i for i in range(10)],
        "Low": [99.0 + i for i in range(10)],
        "Close": [100.5 + i for i in range(10)],
        "Volume": [1_000_000 + i for i in range(10)],
    })
@pytest.mark.unit
 class TestEnsureDateColumn:
    def test_renames_index_column(self):
        out = su._ensure_date_column(_ohlcv("index"))
        assert "Date" in out.columns and "index" not in out.columns
    def test_renames_datetime_and_date_variants(self):
        assert "Date" in su._ensure_date_column(_ohlcv("Datetime")).columns
        assert "Date" in su._ensure_date_column(_ohlcv("date")).columns
    def test_leaves_existing_date_untouched(self):
        df = _ohlcv("Date")
        assert su._ensure_date_column(df) is df  # no-op short-circuit
    def test_no_datelike_column_is_left_alone(self):
        df = pd.DataFrame({"Close": [1, 2, 3]})
        out = su._ensure_date_column(df)
        assert "Date" not in out.columns  # nothing to rename; caller handles
@pytest.mark.unit
 class TestCleanDataframeAcrossVersions:
    def test_clean_handles_index_column(self):
        """A frame with `index` instead of `Date` must still clean to a
        usable, date-parsed frame (was KeyError: 'Date')."""
        cleaned = su._clean_dataframe(_ohlcv("index"))
        assert "Date" in cleaned.columns
        assert pd.api.types.is_datetime64_any_dtype(cleaned["Date"])
        assert len(cleaned) == 10
    def test_clean_handles_legacy_date_column(self):
        cleaned = su._clean_dataframe(_ohlcv("Date"))
        assert len(cleaned) == 10
    def test_indicators_compute_after_index_rename(self):
        """stockstats must compute indicators on a frame whose date column
        arrived as `index`, instead of erroring per indicator."""
        from stockstats import wrap
        cleaned = su._clean_dataframe(_ohlcv("index"))
        df = wrap(cleaned)
        df["close_5_sma"]  # triggers calculation
        assert "close_5_sma" in df.columns
        assert df["close_5_sma"].notna().any()
--- a/tradingagents/dataflows/stockstats_utils.py
+++ b/tradingagents/dataflows/stockstats_utils.py
@@ -32,8 +32,24 @@ def yf_retry(func, max_retries=3, base_delay=2.0):
                raise
 def _ensure_date_column(data: pd.DataFrame) -> pd.DataFrame:
    """Normalize the date column to ``Date``.
    Some yfinance builds leave the index unnamed (so ``reset_index()`` yields
    ``index``) or use ``Datetime`` for intraday data. Rename the first
    date-like column so indicators don't silently drop when it isn't ``Date``.
    """
    if "Date" in data.columns:
        return data
    for candidate in ("index", "Datetime", "date"):
        if candidate in data.columns:
            return data.rename(columns={candidate: "Date"})
    return data
 def _clean_dataframe(data: pd.DataFrame) -> pd.DataFrame:
    """Normalize a stock DataFrame for stockstats: parse dates, drop invalid rows, fill price gaps."""
    data = _ensure_date_column(data)
    data["Date"] = pd.to_datetime(data["Date"], errors="coerce")
    data = data.dropna(subset=["Date"])
@@ -82,7 +98,7 @@ def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
            progress=False,
            auto_adjust=True,
        ))
-        data = data.reset_index()
+        data = _ensure_date_column(data.reset_index())
        data.to_csv(data_file, index=False, encoding="utf-8")
    data = _clean_dataframe(data)