From a66aa8fb94062c60ccd83b7eb350c19e16c32728 Mon Sep 17 00:00:00 2001 From: Yijia-Xiao Date: Sun, 31 May 2026 00:51:30 +0000 Subject: [PATCH] fix(deps): require yfinance >=1.4.1 and tolerate non-Date index column yfinance 1.4.0 regressed the daily-download index to unnamed, so reset_index() produced an "index" column instead of "Date" and every stockstats indicator silently failed (no SMA/RSI/MACD/Bollinger/ATR). Verified across versions: 1.2.0 / 1.3.0 / 1.4.1 name it "Date"; only 1.4.0 is broken. Pin to >=1.4.1 (the upstream fix) and normalize the date column defensively so a non-"Date" index can't silently drop indicators on any build. #890 --- pyproject.toml | 2 +- tests/test_stockstats_date_column.py | 70 +++++++++++++++++++++ tradingagents/dataflows/stockstats_utils.py | 18 +++++- 3 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 tests/test_stockstats_date_column.py diff --git a/pyproject.toml b/pyproject.toml index dcb1a6471..9604c3381 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dependencies = [ "stockstats>=0.6.5", "tqdm>=4.67.1", "typing-extensions>=4.14.0", - "yfinance>=0.2.63", + "yfinance>=1.4.1", ] [project.scripts] diff --git a/tests/test_stockstats_date_column.py b/tests/test_stockstats_date_column.py new file mode 100644 index 000000000..e55a5c70c --- /dev/null +++ b/tests/test_stockstats_date_column.py @@ -0,0 +1,70 @@ +"""Tests for tolerating a non-`Date` index column in stockstats_utils (#890). + +Guards against a download frame whose date column is `index` or `Datetime` +instead of `Date`, which would otherwise silently drop every indicator. +""" + +from __future__ import annotations + +import pandas as pd +import pytest + +from tradingagents.dataflows import stockstats_utils as su + + +def _ohlcv(date_col: str) -> pd.DataFrame: + """OHLCV frame whose date column is named `date_col`.""" + dates = pd.bdate_range("2026-04-01", periods=10) + return pd.DataFrame({ + date_col: dates, + "Open": [100.0 + i for i in range(10)], + "High": [101.0 + i for i in range(10)], + "Low": [99.0 + i for i in range(10)], + "Close": [100.5 + i for i in range(10)], + "Volume": [1_000_000 + i for i in range(10)], + }) + + +@pytest.mark.unit +class TestEnsureDateColumn: + def test_renames_index_column(self): + out = su._ensure_date_column(_ohlcv("index")) + assert "Date" in out.columns and "index" not in out.columns + + def test_renames_datetime_and_date_variants(self): + assert "Date" in su._ensure_date_column(_ohlcv("Datetime")).columns + assert "Date" in su._ensure_date_column(_ohlcv("date")).columns + + def test_leaves_existing_date_untouched(self): + df = _ohlcv("Date") + assert su._ensure_date_column(df) is df # no-op short-circuit + + def test_no_datelike_column_is_left_alone(self): + df = pd.DataFrame({"Close": [1, 2, 3]}) + out = su._ensure_date_column(df) + assert "Date" not in out.columns # nothing to rename; caller handles + + +@pytest.mark.unit +class TestCleanDataframeAcrossVersions: + def test_clean_handles_index_column(self): + """A frame with `index` instead of `Date` must still clean to a + usable, date-parsed frame (was KeyError: 'Date').""" + cleaned = su._clean_dataframe(_ohlcv("index")) + assert "Date" in cleaned.columns + assert pd.api.types.is_datetime64_any_dtype(cleaned["Date"]) + assert len(cleaned) == 10 + + def test_clean_handles_legacy_date_column(self): + cleaned = su._clean_dataframe(_ohlcv("Date")) + assert len(cleaned) == 10 + + def test_indicators_compute_after_index_rename(self): + """stockstats must compute indicators on a frame whose date column + arrived as `index`, instead of erroring per indicator.""" + from stockstats import wrap + cleaned = su._clean_dataframe(_ohlcv("index")) + df = wrap(cleaned) + df["close_5_sma"] # triggers calculation + assert "close_5_sma" in df.columns + assert df["close_5_sma"].notna().any() diff --git a/tradingagents/dataflows/stockstats_utils.py b/tradingagents/dataflows/stockstats_utils.py index 260ef73cd..d4ff577fa 100644 --- a/tradingagents/dataflows/stockstats_utils.py +++ b/tradingagents/dataflows/stockstats_utils.py @@ -32,8 +32,24 @@ def yf_retry(func, max_retries=3, base_delay=2.0): raise +def _ensure_date_column(data: pd.DataFrame) -> pd.DataFrame: + """Normalize the date column to ``Date``. + + Some yfinance builds leave the index unnamed (so ``reset_index()`` yields + ``index``) or use ``Datetime`` for intraday data. Rename the first + date-like column so indicators don't silently drop when it isn't ``Date``. + """ + if "Date" in data.columns: + return data + for candidate in ("index", "Datetime", "date"): + if candidate in data.columns: + return data.rename(columns={candidate: "Date"}) + return data + + def _clean_dataframe(data: pd.DataFrame) -> pd.DataFrame: """Normalize a stock DataFrame for stockstats: parse dates, drop invalid rows, fill price gaps.""" + data = _ensure_date_column(data) data["Date"] = pd.to_datetime(data["Date"], errors="coerce") data = data.dropna(subset=["Date"]) @@ -82,7 +98,7 @@ def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame: progress=False, auto_adjust=True, )) - data = data.reset_index() + data = _ensure_date_column(data.reset_index()) data.to_csv(data_file, index=False, encoding="utf-8") data = _clean_dataframe(data)