feat(agents): rename to sentiment_analyst; integrate StockTwits + Reddit

Pre-fetches news + StockTwits + Reddit via no-auth public endpoints
and injects structured data blocks into the prompt with professional
analysis instructions. Replaces the prompt-vs-tool mismatch that
caused fabricated social-platform content. Backward-compat alias +
"social" CLI key preserved.

#557 #607
This commit is contained in:
Yijia-Xiao
2026-05-11 05:20:07 +00:00
parent d0dd0420ad
commit 0fcf13624e
6 changed files with 401 additions and 54 deletions

View File

@@ -4,7 +4,10 @@ from .utils.agent_states import AgentState, InvestDebateState, RiskDebateState
from .analysts.fundamentals_analyst import create_fundamentals_analyst from .analysts.fundamentals_analyst import create_fundamentals_analyst
from .analysts.market_analyst import create_market_analyst from .analysts.market_analyst import create_market_analyst
from .analysts.news_analyst import create_news_analyst from .analysts.news_analyst import create_news_analyst
from .analysts.social_media_analyst import create_social_media_analyst from .analysts.sentiment_analyst import (
create_sentiment_analyst,
create_social_media_analyst, # deprecated alias kept for back-compat
)
from .researchers.bear_researcher import create_bear_researcher from .researchers.bear_researcher import create_bear_researcher
from .researchers.bull_researcher import create_bull_researcher from .researchers.bull_researcher import create_bull_researcher
@@ -33,6 +36,7 @@ __all__ = [
"create_aggressive_debator", "create_aggressive_debator",
"create_portfolio_manager", "create_portfolio_manager",
"create_conservative_debator", "create_conservative_debator",
"create_social_media_analyst", "create_sentiment_analyst",
"create_social_media_analyst", # deprecated; will be removed in a future version
"create_trader", "create_trader",
] ]

View File

@@ -0,0 +1,184 @@
"""Sentiment analyst — multi-source sentiment analysis for a target ticker.
Previously named ``social_media_analyst``. Renamed and redesigned because
the old version had a prompt that demanded social-media analysis but the
only tool available was Yahoo Finance news — which led LLMs to fabricate
Reddit/X/StockTwits content under prompt pressure (verified live).
The redesigned agent pre-fetches three complementary data sources before
the LLM is invoked and injects them into the prompt as structured blocks:
1. News headlines — Yahoo Finance (institutional framing)
2. StockTwits messages — retail-trader posts indexed by cashtag, with
user-labeled Bullish/Bearish sentiment tags
3. Reddit posts — r/wallstreetbets, r/stocks, r/investing
The agent does not use tool-calling; the data is in the prompt from
turn 0. The LLM produces the sentiment report in a single invocation.
See: https://github.com/TauricResearch/TradingAgents/issues/557
"""
from datetime import datetime, timedelta
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from tradingagents.agents.utils.agent_utils import (
build_instrument_context,
get_language_instruction,
get_news,
)
from tradingagents.dataflows.reddit import fetch_reddit_posts
from tradingagents.dataflows.stocktwits import fetch_stocktwits_messages
def _seven_days_back(trade_date: str) -> str:
return (datetime.strptime(trade_date, "%Y-%m-%d") - timedelta(days=7)).strftime("%Y-%m-%d")
def create_sentiment_analyst(llm):
"""Create a sentiment analyst node for the trading graph.
Pre-fetches news + StockTwits + Reddit data, injects them into the
prompt as structured blocks, and produces a sentiment report in a
single LLM call.
"""
def sentiment_analyst_node(state):
ticker = state["company_of_interest"]
end_date = state["trade_date"]
start_date = _seven_days_back(end_date)
instrument_context = build_instrument_context(ticker)
# Pre-fetch all three sources. Each fetcher degrades gracefully and
# returns a string (no exceptions surface from here), so the LLM
# always sees something — either real data or a clear placeholder.
news_block = get_news.func(ticker, start_date, end_date)
stocktwits_block = fetch_stocktwits_messages(ticker, limit=30)
reddit_block = fetch_reddit_posts(ticker)
system_message = _build_system_message(
ticker=ticker,
start_date=start_date,
end_date=end_date,
news_block=news_block,
stocktwits_block=stocktwits_block,
reddit_block=reddit_block,
)
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"You are a helpful AI assistant, collaborating with other assistants."
" If you or any other assistant has the FINAL TRANSACTION PROPOSAL: **BUY/HOLD/SELL** or deliverable,"
" prefix your response with FINAL TRANSACTION PROPOSAL: **BUY/HOLD/SELL** so the team knows to stop."
"\n{system_message}\n"
"For your reference, the current date is {current_date}. {instrument_context}",
),
MessagesPlaceholder(variable_name="messages"),
]
)
prompt = prompt.partial(system_message=system_message)
prompt = prompt.partial(current_date=end_date)
prompt = prompt.partial(instrument_context=instrument_context)
# No bind_tools — the data is already in the prompt; a single LLM
# call produces the report directly.
chain = prompt | llm
result = chain.invoke(state["messages"])
return {
"messages": [result],
"sentiment_report": result.content,
}
return sentiment_analyst_node
def _build_system_message(
*,
ticker: str,
start_date: str,
end_date: str,
news_block: str,
stocktwits_block: str,
reddit_block: str,
) -> str:
"""Assemble the sentiment-analyst system message with structured data blocks."""
return f"""You are a financial market sentiment analyst. Your task is to produce a comprehensive sentiment report for {ticker} covering the period from {start_date} to {end_date}, drawing on three complementary data sources that have already been collected for you.
## Data sources (pre-fetched, in this prompt)
### News headlines — Yahoo Finance, past 7 days
Institutional framing. Fact-driven, slower-moving signal.
<start_of_news>
{news_block}
<end_of_news>
### StockTwits messages — retail-trader social platform indexed by cashtag
Fast-moving signal. Each message carries a user-labeled sentiment tag (Bullish / Bearish / no-label) plus the message body.
<start_of_stocktwits>
{stocktwits_block}
<end_of_stocktwits>
### Reddit posts — r/wallstreetbets, r/stocks, r/investing (past 7 days)
Community discussion. Engagement signal via upvote score and comment count. Subreddit character matters (r/wallstreetbets is often contrarian/exuberant; r/stocks more measured; r/investing longer-term).
<start_of_reddit>
{reddit_block}
<end_of_reddit>
## How to analyze this data (best practices)
1. **Read the StockTwits Bullish/Bearish ratio as a leading retail-sentiment signal.** A 70/30 bullish/bearish split is moderately bullish; ≥90/10 may indicate over-extension and contrarian risk; 50/50 is uncertainty. Sample size matters — base rates on the actual message count, not percentages alone.
2. **Look for cross-source divergences.** If news framing is bearish but StockTwits is overwhelmingly bullish, that mismatch is itself a signal — it can mean retail is leaning into a thesis the news flow hasn't caught up to (or vice versa, that retail is chasing while institutions are cautious).
3. **Weight Reddit posts by engagement.** A 400-upvote / 200-comment thread reflects community attention; a 3-upvote post is noise. Read the body excerpts for context — the title alone often misleads.
4. **Distinguish opinion from event.** A news headline ("Nvidia announces $500M Corning deal") is an event; a StockTwits post ("buying NVDA, this is going to moon") is opinion. Both are inputs but should be weighted differently in your conclusions.
5. **Identify recurring narrative themes.** What topic keeps coming up across sources? That's the dominant narrative driving current sentiment.
6. **Be honest about data limits.** If StockTwits returned only a handful of messages, or one or more sources returned an "<unavailable>" placeholder, the sentiment read is less robust — flag this caveat explicitly. If the sources are silent on a given subreddit, say so.
7. **Identify catalysts and risks** that emerge across sources — news of upcoming earnings, product launches, competitive threats, macro headlines, etc.
8. **Past sentiment is not predictive.** Frame your conclusions as signal for the trader to weigh alongside fundamentals and technicals, not as a price call.
## Output
Produce a sentiment report covering, in order:
1. **Overall sentiment direction** — Bullish / Bearish / Neutral / Mixed — with a brief confidence note based on data quality and sample size.
2. **Source-by-source breakdown** — what each of news / StockTwits / Reddit is telling you, with specific evidence (cite message counts, ratios, notable posts).
3. **Divergences, alignments, and key narratives** across sources.
4. **Catalysts and risks** surfaced by the data.
5. **Markdown table** at the end summarizing key sentiment signals, their direction, source, and supporting evidence.
{get_language_instruction()}"""
# ---------------------------------------------------------------------------
# Backwards-compatibility shim
# ---------------------------------------------------------------------------
def create_social_media_analyst(llm):
"""Deprecated alias for :func:`create_sentiment_analyst`.
Kept so existing code that imports ``create_social_media_analyst``
continues to work.
.. deprecated::
Import :func:`create_sentiment_analyst` directly instead.
"""
import warnings
warnings.warn(
"create_social_media_analyst is deprecated and will be removed in a "
"future version. Use create_sentiment_analyst instead.",
DeprecationWarning,
stacklevel=2,
)
return create_sentiment_analyst(llm)

View File

@@ -1,57 +1,23 @@
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder """Backwards-compatibility shim for the renamed social_media_analyst module.
from tradingagents.agents.utils.agent_utils import build_instrument_context, get_language_instruction, get_news
from tradingagents.dataflows.config import get_config
The social media analyst has been renamed to ``sentiment_analyst`` because its
only data tool is ``get_news`` (Yahoo Finance), not a social media feed.
def create_social_media_analyst(llm): Import from ``tradingagents.agents.analysts.sentiment_analyst`` going forward.
def social_media_analyst_node(state):
current_date = state["trade_date"]
instrument_context = build_instrument_context(state["company_of_interest"])
tools = [ See: https://github.com/TauricResearch/TradingAgents/issues/557
get_news, """
]
system_message = ( import warnings as _warnings
"You are a social media and company specific news researcher/analyst tasked with analyzing social media posts, recent company news, and public sentiment for a specific company over the past week. You will be given a company's name your objective is to write a comprehensive long report detailing your analysis, insights, and implications for traders and investors on this company's current state after looking at social media and what people are saying about that company, analyzing sentiment data of what people feel each day about the company, and looking at recent company news. Use the get_news(query, start_date, end_date) tool to search for company-specific news and social media discussions. Try to look at all sources possible from social media to sentiment to news. Provide specific, actionable insights with supporting evidence to help traders make informed decisions."
+ """ Make sure to append a Markdown table at the end of the report to organize key points in the report, organized and easy to read."""
+ get_language_instruction()
)
prompt = ChatPromptTemplate.from_messages( from tradingagents.agents.analysts.sentiment_analyst import ( # noqa: F401
[ create_sentiment_analyst,
( create_social_media_analyst,
"system", )
"You are a helpful AI assistant, collaborating with other assistants."
" Use the provided tools to progress towards answering the question."
" If you are unable to fully answer, that's OK; another assistant with different tools"
" will help where you left off. Execute what you can to make progress."
" If you or any other assistant has the FINAL TRANSACTION PROPOSAL: **BUY/HOLD/SELL** or deliverable,"
" prefix your response with FINAL TRANSACTION PROPOSAL: **BUY/HOLD/SELL** so the team knows to stop."
" You have access to the following tools: {tool_names}.\n{system_message}"
"For your reference, the current date is {current_date}. {instrument_context}",
),
MessagesPlaceholder(variable_name="messages"),
]
)
prompt = prompt.partial(system_message=system_message) _warnings.warn(
prompt = prompt.partial(tool_names=", ".join([tool.name for tool in tools])) "tradingagents.agents.analysts.social_media_analyst is deprecated. "
prompt = prompt.partial(current_date=current_date) "Import from tradingagents.agents.analysts.sentiment_analyst instead.",
prompt = prompt.partial(instrument_context=instrument_context) DeprecationWarning,
stacklevel=2,
chain = prompt | llm.bind_tools(tools) )
result = chain.invoke(state["messages"])
report = ""
if len(result.tool_calls) == 0:
report = result.content
return {
"messages": [result],
"sentiment_report": report,
}
return social_media_analyst_node

View File

@@ -0,0 +1,106 @@
"""Reddit search fetcher for ticker-specific discussion posts.
Uses Reddit's public JSON endpoints (``reddit.com/r/{sub}/search.json``)
which do not require an API key. Public throughput is ~10 requests per
minute per IP, well within budget for a single agent run that queries
a handful of finance subreddits per ticker.
Returns formatted plaintext blocks ready for prompt injection. Degrades
gracefully — returns a placeholder string rather than raising, so callers
never have to special-case missing data.
"""
from __future__ import annotations
import json
import logging
import time
from typing import Iterable
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import Request, urlopen
logger = logging.getLogger(__name__)
_API = "https://www.reddit.com/r/{sub}/search.json?{qs}"
_UA = "tradingagents/0.2 (+https://github.com/TauricResearch/TradingAgents)"
# Default subreddits ordered roughly by signal density for ticker-specific
# discussion. wallstreetbets has the most volume but most noise; stocks /
# investing trend more measured. Caller can override.
DEFAULT_SUBREDDITS = ("wallstreetbets", "stocks", "investing")
def _fetch_subreddit(
ticker: str,
sub: str,
limit: int,
timeout: float,
) -> list[dict]:
qs = urlencode({
"q": ticker,
"restrict_sr": "on",
"sort": "new",
"t": "week", # last 7 days
"limit": limit,
})
url = _API.format(sub=sub, qs=qs)
req = Request(url, headers={"User-Agent": _UA, "Accept": "application/json"})
try:
with urlopen(req, timeout=timeout) as resp:
payload = json.loads(resp.read())
except (HTTPError, URLError, json.JSONDecodeError, TimeoutError) as exc:
logger.warning("Reddit fetch failed for r/%s · %s: %s", sub, ticker, exc)
return []
children = (payload.get("data") or {}).get("children") or []
return [c.get("data", {}) for c in children if isinstance(c, dict)]
def fetch_reddit_posts(
ticker: str,
subreddits: Iterable[str] = DEFAULT_SUBREDDITS,
limit_per_sub: int = 5,
timeout: float = 10.0,
inter_request_delay: float = 0.4,
) -> str:
"""Fetch recent Reddit posts mentioning ``ticker`` across finance
subreddits and return them as a formatted plaintext block.
``inter_request_delay`` keeps us under Reddit's public rate limit
(~10 req/min per IP) even if the caller queries many subreddits.
"""
blocks = []
total_posts = 0
for i, sub in enumerate(subreddits):
if i > 0:
time.sleep(inter_request_delay)
posts = _fetch_subreddit(ticker, sub, limit_per_sub, timeout)
total_posts += len(posts)
if not posts:
blocks.append(f"r/{sub}: <no posts found mentioning {ticker.upper()} in the past 7 days>")
continue
lines = [f"r/{sub}{len(posts)} recent posts mentioning {ticker.upper()}:"]
for p in posts:
title = (p.get("title") or "").replace("\n", " ").strip()
score = p.get("score", 0)
comments = p.get("num_comments", 0)
created = p.get("created_utc")
created_str = (
time.strftime("%Y-%m-%d", time.gmtime(created)) if created else "?"
)
selftext = (p.get("selftext") or "").replace("\n", " ").strip()
if len(selftext) > 240:
selftext = selftext[:240] + ""
lines.append(
f" [{created_str} · {score:>4}↑ · {comments:>3}c] {title}"
+ (f"\n body excerpt: {selftext}" if selftext else "")
)
blocks.append("\n".join(lines))
if total_posts == 0:
return (
f"<no Reddit posts found mentioning {ticker.upper()} across "
f"{', '.join(f'r/{s}' for s in subreddits)} in the past 7 days>"
)
return "\n\n".join(blocks)

View File

@@ -0,0 +1,83 @@
"""StockTwits public symbol-stream fetcher.
StockTwits exposes a per-symbol message stream at
``api.stocktwits.com/api/2/streams/symbol/{ticker}.json`` that requires no
API key, no OAuth, and no registration. Each message includes a
user-labeled sentiment field (``Bullish``/``Bearish``/null), the message
body, timestamp, and posting user.
The function is deliberately self-contained: short timeout, graceful
degradation on any HTTP or parse failure, and a string return type so
the calling agent gets a uniform interface regardless of whether the
network call succeeded.
"""
from __future__ import annotations
import json
import logging
from datetime import datetime, timezone
from typing import Optional
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen
logger = logging.getLogger(__name__)
_API = "https://api.stocktwits.com/api/2/streams/symbol/{ticker}.json"
_UA = "tradingagents/0.2 (+https://github.com/TauricResearch/TradingAgents)"
def fetch_stocktwits_messages(ticker: str, limit: int = 30, timeout: float = 10.0) -> str:
"""Fetch recent StockTwits messages for ``ticker`` and return them as a
formatted plaintext block ready for prompt injection.
Returns a placeholder string when the endpoint is unreachable, the
symbol has no messages, or the response shape is unexpected — the
caller never has to special-case None or exceptions.
"""
url = _API.format(ticker=ticker.upper())
req = Request(url, headers={"User-Agent": _UA, "Accept": "application/json"})
try:
with urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read())
except (HTTPError, URLError, json.JSONDecodeError, TimeoutError) as exc:
logger.warning("StockTwits fetch failed for %s: %s", ticker, exc)
return f"<stocktwits unavailable: {type(exc).__name__}>"
messages = data.get("messages", []) if isinstance(data, dict) else []
if not messages:
return f"<no StockTwits messages found for ${ticker.upper()}>"
lines = []
bullish = bearish = unlabeled = 0
for m in messages[:limit]:
created = m.get("created_at", "")
user = (m.get("user") or {}).get("username", "?")
entities = m.get("entities") or {}
sentiment_obj = entities.get("sentiment") or {}
sentiment = sentiment_obj.get("basic") if isinstance(sentiment_obj, dict) else None
body = (m.get("body") or "").replace("\n", " ").strip()
if len(body) > 280:
body = body[:280] + ""
if sentiment == "Bullish":
bullish += 1
tag = "Bullish"
elif sentiment == "Bearish":
bearish += 1
tag = "Bearish"
else:
unlabeled += 1
tag = "no-label"
lines.append(f"[{created} · @{user} · {tag}] {body}")
total = bullish + bearish + unlabeled
bull_pct = round(100 * bullish / total) if total else 0
bear_pct = round(100 * bearish / total) if total else 0
summary = (
f"Bullish: {bullish} ({bull_pct}%) · "
f"Bearish: {bearish} ({bear_pct}%) · "
f"Unlabeled: {unlabeled} · "
f"Total: {total} most-recent messages"
)
return summary + "\n\n" + "\n".join(lines)

View File

@@ -54,7 +54,11 @@ class GraphSetup:
tool_nodes["market"] = self.tool_nodes["market"] tool_nodes["market"] = self.tool_nodes["market"]
if "social" in selected_analysts: if "social" in selected_analysts:
analyst_nodes["social"] = create_social_media_analyst( # "social" selector key preserved for back-compat with existing
# user configs; the underlying agent has been renamed to
# sentiment_analyst (the old name advertised social-media data
# the agent never had access to — see issue #557).
analyst_nodes["social"] = create_sentiment_analyst(
self.quick_thinking_llm self.quick_thinking_llm
) )
delete_nodes["social"] = create_msg_delete() delete_nodes["social"] = create_msg_delete()