mirror of
https://github.com/TauricResearch/TradingAgents.git
synced 2026-06-16 21:06:15 +03:00
fix(deps): require yfinance >=1.4.1 and tolerate non-Date index column
yfinance 1.4.0 regressed the daily-download index to unnamed, so reset_index() produced an "index" column instead of "Date" and every stockstats indicator silently failed (no SMA/RSI/MACD/Bollinger/ATR). Verified across versions: 1.2.0 / 1.3.0 / 1.4.1 name it "Date"; only 1.4.0 is broken. Pin to >=1.4.1 (the upstream fix) and normalize the date column defensively so a non-"Date" index can't silently drop indicators on any build. #890
This commit is contained in:
@@ -29,7 +29,7 @@ dependencies = [
|
|||||||
"stockstats>=0.6.5",
|
"stockstats>=0.6.5",
|
||||||
"tqdm>=4.67.1",
|
"tqdm>=4.67.1",
|
||||||
"typing-extensions>=4.14.0",
|
"typing-extensions>=4.14.0",
|
||||||
"yfinance>=0.2.63",
|
"yfinance>=1.4.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
|
|||||||
70
tests/test_stockstats_date_column.py
Normal file
70
tests/test_stockstats_date_column.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
"""Tests for tolerating a non-`Date` index column in stockstats_utils (#890).
|
||||||
|
|
||||||
|
Guards against a download frame whose date column is `index` or `Datetime`
|
||||||
|
instead of `Date`, which would otherwise silently drop every indicator.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from tradingagents.dataflows import stockstats_utils as su
|
||||||
|
|
||||||
|
|
||||||
|
def _ohlcv(date_col: str) -> pd.DataFrame:
|
||||||
|
"""OHLCV frame whose date column is named `date_col`."""
|
||||||
|
dates = pd.bdate_range("2026-04-01", periods=10)
|
||||||
|
return pd.DataFrame({
|
||||||
|
date_col: dates,
|
||||||
|
"Open": [100.0 + i for i in range(10)],
|
||||||
|
"High": [101.0 + i for i in range(10)],
|
||||||
|
"Low": [99.0 + i for i in range(10)],
|
||||||
|
"Close": [100.5 + i for i in range(10)],
|
||||||
|
"Volume": [1_000_000 + i for i in range(10)],
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestEnsureDateColumn:
|
||||||
|
def test_renames_index_column(self):
|
||||||
|
out = su._ensure_date_column(_ohlcv("index"))
|
||||||
|
assert "Date" in out.columns and "index" not in out.columns
|
||||||
|
|
||||||
|
def test_renames_datetime_and_date_variants(self):
|
||||||
|
assert "Date" in su._ensure_date_column(_ohlcv("Datetime")).columns
|
||||||
|
assert "Date" in su._ensure_date_column(_ohlcv("date")).columns
|
||||||
|
|
||||||
|
def test_leaves_existing_date_untouched(self):
|
||||||
|
df = _ohlcv("Date")
|
||||||
|
assert su._ensure_date_column(df) is df # no-op short-circuit
|
||||||
|
|
||||||
|
def test_no_datelike_column_is_left_alone(self):
|
||||||
|
df = pd.DataFrame({"Close": [1, 2, 3]})
|
||||||
|
out = su._ensure_date_column(df)
|
||||||
|
assert "Date" not in out.columns # nothing to rename; caller handles
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestCleanDataframeAcrossVersions:
|
||||||
|
def test_clean_handles_index_column(self):
|
||||||
|
"""A frame with `index` instead of `Date` must still clean to a
|
||||||
|
usable, date-parsed frame (was KeyError: 'Date')."""
|
||||||
|
cleaned = su._clean_dataframe(_ohlcv("index"))
|
||||||
|
assert "Date" in cleaned.columns
|
||||||
|
assert pd.api.types.is_datetime64_any_dtype(cleaned["Date"])
|
||||||
|
assert len(cleaned) == 10
|
||||||
|
|
||||||
|
def test_clean_handles_legacy_date_column(self):
|
||||||
|
cleaned = su._clean_dataframe(_ohlcv("Date"))
|
||||||
|
assert len(cleaned) == 10
|
||||||
|
|
||||||
|
def test_indicators_compute_after_index_rename(self):
|
||||||
|
"""stockstats must compute indicators on a frame whose date column
|
||||||
|
arrived as `index`, instead of erroring per indicator."""
|
||||||
|
from stockstats import wrap
|
||||||
|
cleaned = su._clean_dataframe(_ohlcv("index"))
|
||||||
|
df = wrap(cleaned)
|
||||||
|
df["close_5_sma"] # triggers calculation
|
||||||
|
assert "close_5_sma" in df.columns
|
||||||
|
assert df["close_5_sma"].notna().any()
|
||||||
@@ -32,8 +32,24 @@ def yf_retry(func, max_retries=3, base_delay=2.0):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_date_column(data: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""Normalize the date column to ``Date``.
|
||||||
|
|
||||||
|
Some yfinance builds leave the index unnamed (so ``reset_index()`` yields
|
||||||
|
``index``) or use ``Datetime`` for intraday data. Rename the first
|
||||||
|
date-like column so indicators don't silently drop when it isn't ``Date``.
|
||||||
|
"""
|
||||||
|
if "Date" in data.columns:
|
||||||
|
return data
|
||||||
|
for candidate in ("index", "Datetime", "date"):
|
||||||
|
if candidate in data.columns:
|
||||||
|
return data.rename(columns={candidate: "Date"})
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
def _clean_dataframe(data: pd.DataFrame) -> pd.DataFrame:
|
def _clean_dataframe(data: pd.DataFrame) -> pd.DataFrame:
|
||||||
"""Normalize a stock DataFrame for stockstats: parse dates, drop invalid rows, fill price gaps."""
|
"""Normalize a stock DataFrame for stockstats: parse dates, drop invalid rows, fill price gaps."""
|
||||||
|
data = _ensure_date_column(data)
|
||||||
data["Date"] = pd.to_datetime(data["Date"], errors="coerce")
|
data["Date"] = pd.to_datetime(data["Date"], errors="coerce")
|
||||||
data = data.dropna(subset=["Date"])
|
data = data.dropna(subset=["Date"])
|
||||||
|
|
||||||
@@ -82,7 +98,7 @@ def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
|
|||||||
progress=False,
|
progress=False,
|
||||||
auto_adjust=True,
|
auto_adjust=True,
|
||||||
))
|
))
|
||||||
data = data.reset_index()
|
data = _ensure_date_column(data.reset_index())
|
||||||
data.to_csv(data_file, index=False, encoding="utf-8")
|
data.to_csv(data_file, index=False, encoding="utf-8")
|
||||||
|
|
||||||
data = _clean_dataframe(data)
|
data = _clean_dataframe(data)
|
||||||
|
|||||||
Reference in New Issue
Block a user