mirror of
https://github.com/TauricResearch/TradingAgents.git
synced 2026-06-16 21:06:15 +03:00
fix(deps): require yfinance >=1.4.1 and tolerate non-Date index column
yfinance 1.4.0 regressed the daily-download index to unnamed, so reset_index() produced an "index" column instead of "Date" and every stockstats indicator silently failed (no SMA/RSI/MACD/Bollinger/ATR). Verified across versions: 1.2.0 / 1.3.0 / 1.4.1 name it "Date"; only 1.4.0 is broken. Pin to >=1.4.1 (the upstream fix) and normalize the date column defensively so a non-"Date" index can't silently drop indicators on any build. #890
This commit is contained in:
@@ -29,7 +29,7 @@ dependencies = [
|
||||
"stockstats>=0.6.5",
|
||||
"tqdm>=4.67.1",
|
||||
"typing-extensions>=4.14.0",
|
||||
"yfinance>=0.2.63",
|
||||
"yfinance>=1.4.1",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
|
||||
70
tests/test_stockstats_date_column.py
Normal file
70
tests/test_stockstats_date_column.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Tests for tolerating a non-`Date` index column in stockstats_utils (#890).
|
||||
|
||||
Guards against a download frame whose date column is `index` or `Datetime`
|
||||
instead of `Date`, which would otherwise silently drop every indicator.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from tradingagents.dataflows import stockstats_utils as su
|
||||
|
||||
|
||||
def _ohlcv(date_col: str) -> pd.DataFrame:
|
||||
"""OHLCV frame whose date column is named `date_col`."""
|
||||
dates = pd.bdate_range("2026-04-01", periods=10)
|
||||
return pd.DataFrame({
|
||||
date_col: dates,
|
||||
"Open": [100.0 + i for i in range(10)],
|
||||
"High": [101.0 + i for i in range(10)],
|
||||
"Low": [99.0 + i for i in range(10)],
|
||||
"Close": [100.5 + i for i in range(10)],
|
||||
"Volume": [1_000_000 + i for i in range(10)],
|
||||
})
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestEnsureDateColumn:
|
||||
def test_renames_index_column(self):
|
||||
out = su._ensure_date_column(_ohlcv("index"))
|
||||
assert "Date" in out.columns and "index" not in out.columns
|
||||
|
||||
def test_renames_datetime_and_date_variants(self):
|
||||
assert "Date" in su._ensure_date_column(_ohlcv("Datetime")).columns
|
||||
assert "Date" in su._ensure_date_column(_ohlcv("date")).columns
|
||||
|
||||
def test_leaves_existing_date_untouched(self):
|
||||
df = _ohlcv("Date")
|
||||
assert su._ensure_date_column(df) is df # no-op short-circuit
|
||||
|
||||
def test_no_datelike_column_is_left_alone(self):
|
||||
df = pd.DataFrame({"Close": [1, 2, 3]})
|
||||
out = su._ensure_date_column(df)
|
||||
assert "Date" not in out.columns # nothing to rename; caller handles
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestCleanDataframeAcrossVersions:
|
||||
def test_clean_handles_index_column(self):
|
||||
"""A frame with `index` instead of `Date` must still clean to a
|
||||
usable, date-parsed frame (was KeyError: 'Date')."""
|
||||
cleaned = su._clean_dataframe(_ohlcv("index"))
|
||||
assert "Date" in cleaned.columns
|
||||
assert pd.api.types.is_datetime64_any_dtype(cleaned["Date"])
|
||||
assert len(cleaned) == 10
|
||||
|
||||
def test_clean_handles_legacy_date_column(self):
|
||||
cleaned = su._clean_dataframe(_ohlcv("Date"))
|
||||
assert len(cleaned) == 10
|
||||
|
||||
def test_indicators_compute_after_index_rename(self):
|
||||
"""stockstats must compute indicators on a frame whose date column
|
||||
arrived as `index`, instead of erroring per indicator."""
|
||||
from stockstats import wrap
|
||||
cleaned = su._clean_dataframe(_ohlcv("index"))
|
||||
df = wrap(cleaned)
|
||||
df["close_5_sma"] # triggers calculation
|
||||
assert "close_5_sma" in df.columns
|
||||
assert df["close_5_sma"].notna().any()
|
||||
@@ -32,8 +32,24 @@ def yf_retry(func, max_retries=3, base_delay=2.0):
|
||||
raise
|
||||
|
||||
|
||||
def _ensure_date_column(data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Normalize the date column to ``Date``.
|
||||
|
||||
Some yfinance builds leave the index unnamed (so ``reset_index()`` yields
|
||||
``index``) or use ``Datetime`` for intraday data. Rename the first
|
||||
date-like column so indicators don't silently drop when it isn't ``Date``.
|
||||
"""
|
||||
if "Date" in data.columns:
|
||||
return data
|
||||
for candidate in ("index", "Datetime", "date"):
|
||||
if candidate in data.columns:
|
||||
return data.rename(columns={candidate: "Date"})
|
||||
return data
|
||||
|
||||
|
||||
def _clean_dataframe(data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Normalize a stock DataFrame for stockstats: parse dates, drop invalid rows, fill price gaps."""
|
||||
data = _ensure_date_column(data)
|
||||
data["Date"] = pd.to_datetime(data["Date"], errors="coerce")
|
||||
data = data.dropna(subset=["Date"])
|
||||
|
||||
@@ -82,7 +98,7 @@ def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
|
||||
progress=False,
|
||||
auto_adjust=True,
|
||||
))
|
||||
data = data.reset_index()
|
||||
data = _ensure_date_column(data.reset_index())
|
||||
data.to_csv(data_file, index=False, encoding="utf-8")
|
||||
|
||||
data = _clean_dataframe(data)
|
||||
|
||||
Reference in New Issue
Block a user