Files
tradingagents/tests/test_news_lookahead.py
Yijia-Xiao 0c1231a405 fix(data): keep future/undated news out of historical windows
The yfinance news date filter only ran when an article had a parsed date, so
flat-format and undated articles bypassed it and leaked future news into
historical/backtest runs. Parse the flat providerPublishTime, apply one
look-ahead-safe window rule across ticker and global news (undated kept only
when the window reaches the present), and return an informative message when
everything is filtered out.
2026-06-13 21:54:07 +00:00

81 lines
3.0 KiB
Python

"""yfinance news must not leak future-dated (or undated, in a backtest) articles
into a historical window.
Regressions for #992 (flat articles bypassed the date filter), #1007 (global
news injected future articles), #993 (empty-after-filter returned a blank body).
"""
import time
from datetime import datetime
from unittest import mock
import pytest
import tradingagents.dataflows.yfinance_news as ynews
def _epoch(date_str):
return int(time.mktime(datetime.strptime(date_str, "%Y-%m-%d").timetuple()))
@pytest.mark.unit
def test_flat_article_publish_time_is_parsed():
# #992: flat articles now carry a pub_date (was always None -> unfilterable).
data = ynews._extract_article_data(
{"title": "X", "publisher": "P", "link": "l", "providerPublishTime": _epoch("2025-05-09")}
)
assert data["pub_date"] is not None
assert data["pub_date"].strftime("%Y-%m-%d") == "2025-05-09"
@pytest.mark.unit
def test_window_excludes_future_and_undated_in_backtest():
start = datetime(2025, 5, 1)
end = datetime(2025, 5, 9) # historical window (well in the past)
inside = datetime(2025, 5, 5)
future = datetime(2025, 6, 1)
assert ynews._in_news_window(inside, start, end) is True
assert ynews._in_news_window(future, start, end) is False # look-ahead blocked
assert ynews._in_news_window(None, start, end) is False # undated -> excluded in backtest
@pytest.mark.unit
def test_window_keeps_undated_in_live_window():
# Live window (reaches today): undated articles can't be "future", so keep them.
start = datetime.now()
end = datetime.now()
assert ynews._in_news_window(None, start, end) is True
@pytest.mark.unit
def test_global_news_future_flat_article_excluded(monkeypatch):
# #1007: a flat, future-dated global article must not appear in a historical run.
future_article = {"title": "FUTURE EVENT", "publisher": "P", "link": "l",
"providerPublishTime": _epoch("2025-06-01")}
past_article = {"title": "PAST EVENT", "publisher": "P", "link": "l",
"providerPublishTime": _epoch("2025-05-05")}
class FakeSearch:
def __init__(self, *a, **k):
self.news = [future_article, past_article]
monkeypatch.setattr(ynews.yf, "Search", FakeSearch)
out = ynews.get_global_news_yfinance("2025-05-09", look_back_days=7, limit=10)
assert "PAST EVENT" in out
assert "FUTURE EVENT" not in out # #1007
@pytest.mark.unit
def test_global_news_empty_after_filter_is_informative(monkeypatch):
# #993: everything filtered out -> a clear message, not a blank-bodied report.
only_future = {"title": "FUTURE", "publisher": "P", "link": "l",
"providerPublishTime": _epoch("2025-06-01")}
class FakeSearch:
def __init__(self, *a, **k):
self.news = [only_future]
monkeypatch.setattr(ynews.yf, "Search", FakeSearch)
out = ynews.get_global_news_yfinance("2025-05-09", look_back_days=7, limit=10)
assert "No global news found" in out
assert "###" not in out # no empty article body