mirror of
https://github.com/TauricResearch/TradingAgents.git
synced 2026-06-16 21:06:15 +03:00
fix(dataflows): fall back to Reddit RSS search when JSON 403s
Reddit blocks the anonymous JSON search endpoint, which silently emptied the sentiment analyst's Reddit source. Fall back to the public RSS search feed when JSON fails. RSS lacks score/comment counts, so those posts are marked "via RSS feed" rather than shown with fake zeros. #862
This commit is contained in:
114
tests/test_reddit_fallback.py
Normal file
114
tests/test_reddit_fallback.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""Tests for the Reddit RSS/Atom fallback when the JSON endpoint 403s (#862)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
from urllib.error import HTTPError
|
||||
|
||||
import pytest
|
||||
|
||||
from tradingagents.dataflows import reddit
|
||||
|
||||
|
||||
_SAMPLE_ATOM = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<title>NVDA earnings beat, stock pops</title>
|
||||
<published>2026-05-20T14:30:00+00:00</published>
|
||||
<content type="html"><!-- SC_OFF --><div class="md"><p>Great <b>quarter</b> for NVDA&#39;s datacenter unit.</p></div><!-- SC_ON --></content>
|
||||
</entry>
|
||||
<entry>
|
||||
<title>Is NVDA overvalued?</title>
|
||||
<published>2026-05-19T09:00:00Z</published>
|
||||
<content type="html"><p>Forward P/E discussion</p></content>
|
||||
</entry>
|
||||
</feed>
|
||||
"""
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestIsoToTimestamp:
|
||||
def test_parses_offset_and_z(self):
|
||||
assert reddit._iso_to_timestamp("2026-05-20T14:30:00+00:00") > 0
|
||||
assert reddit._iso_to_timestamp("2026-05-19T09:00:00Z") > 0
|
||||
|
||||
def test_none_and_garbage_return_none(self):
|
||||
assert reddit._iso_to_timestamp(None) is None
|
||||
assert reddit._iso_to_timestamp("not-a-date") is None
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestStripHtml:
|
||||
def test_extracts_between_sc_markers_and_unescapes(self):
|
||||
raw = "<!-- SC_OFF --><div class=\"md\"><p>Great <b>quarter</b> & more</p></div><!-- SC_ON -->"
|
||||
assert reddit._strip_html(raw) == "Great quarter & more"
|
||||
|
||||
def test_empty(self):
|
||||
assert reddit._strip_html("") == ""
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestRssFallbackParsing:
|
||||
def _patch_rss_response(self, xml_bytes):
|
||||
class _Resp:
|
||||
def __enter__(self_inner):
|
||||
return self_inner
|
||||
def __exit__(self_inner, *a):
|
||||
return False
|
||||
def read(self_inner):
|
||||
return xml_bytes
|
||||
return patch.object(reddit, "urlopen", return_value=_Resp())
|
||||
|
||||
def test_parses_atom_entries(self):
|
||||
with self._patch_rss_response(_SAMPLE_ATOM.encode("utf-8")):
|
||||
posts = reddit._fetch_subreddit_rss("NVDA", "stocks", limit=5, timeout=5.0)
|
||||
assert len(posts) == 2
|
||||
assert posts[0]["title"] == "NVDA earnings beat, stock pops"
|
||||
assert posts[0]["source"] == "rss"
|
||||
assert posts[0]["score"] is None
|
||||
assert posts[0]["num_comments"] is None
|
||||
assert posts[0]["created_utc"] > 0
|
||||
assert "datacenter unit" in posts[0]["selftext"]
|
||||
|
||||
def test_malformed_xml_fails_open(self):
|
||||
with self._patch_rss_response(b"<<not xml>>"):
|
||||
assert reddit._fetch_subreddit_rss("NVDA", "stocks", 5, 5.0) == []
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestJsonFallsBackToRss:
|
||||
def test_403_triggers_rss(self):
|
||||
err = HTTPError("url", 403, "Blocked", {}, None)
|
||||
with patch.object(reddit, "urlopen", side_effect=err), \
|
||||
patch.object(reddit, "_fetch_subreddit_rss", return_value=[{"title": "x", "source": "rss", "score": None, "num_comments": None, "created_utc": None, "selftext": ""}]) as rss:
|
||||
out = reddit._fetch_subreddit("NVDA", "stocks", 5, 5.0)
|
||||
rss.assert_called_once()
|
||||
assert out and out[0]["source"] == "rss"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestFormatterHandlesRssPosts:
|
||||
def test_rss_posts_omit_fake_counts_and_note_source(self):
|
||||
rss_posts = [{
|
||||
"title": "NVDA pops", "score": None, "num_comments": None,
|
||||
"created_utc": reddit._iso_to_timestamp("2026-05-20T14:30:00Z"),
|
||||
"selftext": "great quarter", "source": "rss",
|
||||
}]
|
||||
with patch.object(reddit, "_fetch_subreddit", return_value=rss_posts):
|
||||
out = reddit.fetch_reddit_posts("NVDA", subreddits=("stocks",), inter_request_delay=0)
|
||||
assert "via RSS feed" in out
|
||||
assert "↑" not in out # no fake score arrow
|
||||
assert "NVDA pops" in out
|
||||
assert "great quarter" in out
|
||||
|
||||
def test_json_posts_still_show_counts(self):
|
||||
json_posts = [{
|
||||
"title": "NVDA pops", "score": 1234, "num_comments": 56,
|
||||
"created_utc": reddit._iso_to_timestamp("2026-05-20T14:30:00Z"),
|
||||
"selftext": "",
|
||||
}]
|
||||
with patch.object(reddit, "_fetch_subreddit", return_value=json_posts):
|
||||
out = reddit.fetch_reddit_posts("NVDA", subreddits=("stocks",), inter_request_delay=0)
|
||||
assert "1234↑" in out
|
||||
assert "56c" in out
|
||||
assert "via RSS" not in out
|
||||
Reference in New Issue
Block a user