mirror of
https://github.com/TauricResearch/TradingAgents.git
synced 2026-06-17 05:16:14 +03:00
Reddit blocks the anonymous JSON search endpoint, which silently emptied the sentiment analyst's Reddit source. Fall back to the public RSS search feed when JSON fails. RSS lacks score/comment counts, so those posts are marked "via RSS feed" rather than shown with fake zeros. #862
115 lines
4.4 KiB
Python
115 lines
4.4 KiB
Python
"""Tests for the Reddit RSS/Atom fallback when the JSON endpoint 403s (#862)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from unittest.mock import patch
|
|
from urllib.error import HTTPError
|
|
|
|
import pytest
|
|
|
|
from tradingagents.dataflows import reddit
|
|
|
|
|
|
_SAMPLE_ATOM = """<?xml version="1.0" encoding="UTF-8"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<title>NVDA earnings beat, stock pops</title>
|
|
<published>2026-05-20T14:30:00+00:00</published>
|
|
<content type="html"><!-- SC_OFF --><div class="md"><p>Great <b>quarter</b> for NVDA&#39;s datacenter unit.</p></div><!-- SC_ON --></content>
|
|
</entry>
|
|
<entry>
|
|
<title>Is NVDA overvalued?</title>
|
|
<published>2026-05-19T09:00:00Z</published>
|
|
<content type="html"><p>Forward P/E discussion</p></content>
|
|
</entry>
|
|
</feed>
|
|
"""
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestIsoToTimestamp:
|
|
def test_parses_offset_and_z(self):
|
|
assert reddit._iso_to_timestamp("2026-05-20T14:30:00+00:00") > 0
|
|
assert reddit._iso_to_timestamp("2026-05-19T09:00:00Z") > 0
|
|
|
|
def test_none_and_garbage_return_none(self):
|
|
assert reddit._iso_to_timestamp(None) is None
|
|
assert reddit._iso_to_timestamp("not-a-date") is None
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestStripHtml:
|
|
def test_extracts_between_sc_markers_and_unescapes(self):
|
|
raw = "<!-- SC_OFF --><div class=\"md\"><p>Great <b>quarter</b> & more</p></div><!-- SC_ON -->"
|
|
assert reddit._strip_html(raw) == "Great quarter & more"
|
|
|
|
def test_empty(self):
|
|
assert reddit._strip_html("") == ""
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestRssFallbackParsing:
|
|
def _patch_rss_response(self, xml_bytes):
|
|
class _Resp:
|
|
def __enter__(self_inner):
|
|
return self_inner
|
|
def __exit__(self_inner, *a):
|
|
return False
|
|
def read(self_inner):
|
|
return xml_bytes
|
|
return patch.object(reddit, "urlopen", return_value=_Resp())
|
|
|
|
def test_parses_atom_entries(self):
|
|
with self._patch_rss_response(_SAMPLE_ATOM.encode("utf-8")):
|
|
posts = reddit._fetch_subreddit_rss("NVDA", "stocks", limit=5, timeout=5.0)
|
|
assert len(posts) == 2
|
|
assert posts[0]["title"] == "NVDA earnings beat, stock pops"
|
|
assert posts[0]["source"] == "rss"
|
|
assert posts[0]["score"] is None
|
|
assert posts[0]["num_comments"] is None
|
|
assert posts[0]["created_utc"] > 0
|
|
assert "datacenter unit" in posts[0]["selftext"]
|
|
|
|
def test_malformed_xml_fails_open(self):
|
|
with self._patch_rss_response(b"<<not xml>>"):
|
|
assert reddit._fetch_subreddit_rss("NVDA", "stocks", 5, 5.0) == []
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestJsonFallsBackToRss:
|
|
def test_403_triggers_rss(self):
|
|
err = HTTPError("url", 403, "Blocked", {}, None)
|
|
with patch.object(reddit, "urlopen", side_effect=err), \
|
|
patch.object(reddit, "_fetch_subreddit_rss", return_value=[{"title": "x", "source": "rss", "score": None, "num_comments": None, "created_utc": None, "selftext": ""}]) as rss:
|
|
out = reddit._fetch_subreddit("NVDA", "stocks", 5, 5.0)
|
|
rss.assert_called_once()
|
|
assert out and out[0]["source"] == "rss"
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestFormatterHandlesRssPosts:
|
|
def test_rss_posts_omit_fake_counts_and_note_source(self):
|
|
rss_posts = [{
|
|
"title": "NVDA pops", "score": None, "num_comments": None,
|
|
"created_utc": reddit._iso_to_timestamp("2026-05-20T14:30:00Z"),
|
|
"selftext": "great quarter", "source": "rss",
|
|
}]
|
|
with patch.object(reddit, "_fetch_subreddit", return_value=rss_posts):
|
|
out = reddit.fetch_reddit_posts("NVDA", subreddits=("stocks",), inter_request_delay=0)
|
|
assert "via RSS feed" in out
|
|
assert "↑" not in out # no fake score arrow
|
|
assert "NVDA pops" in out
|
|
assert "great quarter" in out
|
|
|
|
def test_json_posts_still_show_counts(self):
|
|
json_posts = [{
|
|
"title": "NVDA pops", "score": 1234, "num_comments": 56,
|
|
"created_utc": reddit._iso_to_timestamp("2026-05-20T14:30:00Z"),
|
|
"selftext": "",
|
|
}]
|
|
with patch.object(reddit, "_fetch_subreddit", return_value=json_posts):
|
|
out = reddit.fetch_reddit_posts("NVDA", subreddits=("stocks",), inter_request_delay=0)
|
|
assert "1234↑" in out
|
|
assert "56c" in out
|
|
assert "via RSS" not in out
|