mirror of
https://github.com/TauricResearch/TradingAgents.git
synced 2026-06-16 21:06:15 +03:00
fix: support commodity/forex/crypto tickers and never invent prices (#781)
Analyzing a symbol Yahoo Finance does not recognize (e.g. XAUUSD+) could
produce an invented price instead of an error. The agent now either prices
the correct instrument or clearly reports that data is unavailable.
Ticker support:
- Commodities/forex/crypto resolve to the symbol Yahoo actually serves, so
you can enter the common form and it just works:
XAUUSD / XAUUSD+ / GOLD -> GC=F (gold)
USOIL -> CL=F (WTI crude)
EURUSD -> EURUSD=X
BTCUSD -> BTC-USD
SPX500 / NAS100 -> ^GSPC / ^NDX
Native Yahoo symbols (AAPL, GC=F, ^GSPC) keep working unchanged. New
instruments are added by extending the alias table.
Reliability:
- Unknown or delisted symbols now return a clear "data unavailable" result
the agent reports verbatim, instead of a value the model fills in.
- A failed fetch no longer leaves a broken symbol cached until the cache is
cleared by hand.
This commit is contained in:
88
tests/test_no_data_handling.py
Normal file
88
tests/test_no_data_handling.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""Tests that empty vendor results never become fabricated data.
|
||||
|
||||
Covers two systematic fixes:
|
||||
- load_ohlcv must not cache an empty download (cache poisoning), and must
|
||||
raise NoMarketDataError instead of returning an empty frame.
|
||||
- route_to_vendor must convert NoMarketDataError into a single explicit
|
||||
"NO_DATA_AVAILABLE" sentinel after all vendors are exhausted.
|
||||
"""
|
||||
|
||||
import os
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from tradingagents.dataflows import stockstats_utils, interface
|
||||
from tradingagents.dataflows.config import set_config
|
||||
from tradingagents.dataflows.symbol_utils import NoMarketDataError
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestLoadOhlcvNoPoison(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self._tmp = os.path.join(os.path.dirname(__file__), "_tmp_cache")
|
||||
os.makedirs(self._tmp, exist_ok=True)
|
||||
set_config({"data_cache_dir": self._tmp})
|
||||
|
||||
def tearDown(self):
|
||||
for f in os.listdir(self._tmp):
|
||||
os.remove(os.path.join(self._tmp, f))
|
||||
os.rmdir(self._tmp)
|
||||
|
||||
def test_empty_download_raises_and_does_not_cache(self):
|
||||
empty = pd.DataFrame()
|
||||
with mock.patch.object(stockstats_utils.yf, "download", return_value=empty) as dl:
|
||||
with self.assertRaises(NoMarketDataError):
|
||||
stockstats_utils.load_ohlcv("FAKE", "2026-01-01")
|
||||
# Nothing should have been written to the cache.
|
||||
self.assertEqual(os.listdir(self._tmp), [])
|
||||
|
||||
# A second call must re-attempt the fetch (no poisoned cache served).
|
||||
with mock.patch.object(stockstats_utils.yf, "download", return_value=empty) as dl2:
|
||||
with self.assertRaises(NoMarketDataError):
|
||||
stockstats_utils.load_ohlcv("FAKE", "2026-01-01")
|
||||
self.assertTrue(dl2.called)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestRouteToVendorSentinel(unittest.TestCase):
|
||||
def test_no_data_from_all_vendors_returns_sentinel(self):
|
||||
def raises_no_data(symbol, *a, **k):
|
||||
raise NoMarketDataError(symbol, "GC=F", "no rows")
|
||||
|
||||
patched = {"yfinance": raises_no_data, "alpha_vantage": raises_no_data}
|
||||
with mock.patch.dict(
|
||||
interface.VENDOR_METHODS, {"get_stock_data": patched}, clear=False
|
||||
):
|
||||
result = interface.route_to_vendor(
|
||||
"get_stock_data", "XAUUSD+", "2026-01-01", "2026-01-10"
|
||||
)
|
||||
self.assertIn("NO_DATA_AVAILABLE", result)
|
||||
self.assertIn("XAUUSD+", result)
|
||||
self.assertIn("GC=F", result)
|
||||
self.assertIn("Do not estimate", result)
|
||||
|
||||
def test_unconfigured_fallback_does_not_mask_no_data(self):
|
||||
# When the primary vendor reports no data and the fallback is simply
|
||||
# unavailable (e.g. missing API key -> raises), the no-data sentinel
|
||||
# must win rather than the fallback's incidental error crashing out.
|
||||
def raises_no_data(symbol, *a, **k):
|
||||
raise NoMarketDataError(symbol, symbol, "no rows")
|
||||
|
||||
def raises_unavailable(symbol, *a, **k):
|
||||
raise ValueError("ALPHA_VANTAGE_API_KEY environment variable is not set.")
|
||||
|
||||
patched = {"yfinance": raises_no_data, "alpha_vantage": raises_unavailable}
|
||||
with mock.patch.dict(
|
||||
interface.VENDOR_METHODS, {"get_stock_data": patched}, clear=False
|
||||
):
|
||||
result = interface.route_to_vendor(
|
||||
"get_stock_data", "FAKE", "2026-01-01", "2026-01-10"
|
||||
)
|
||||
self.assertIn("NO_DATA_AVAILABLE", result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -14,6 +14,11 @@ class TestSafeTickerComponent(unittest.TestCase):
|
||||
for ticker in ("AAPL", "BRK-B", "BRK.A", "0700.HK", "7203.T", "BHP.AX", "^GSPC"):
|
||||
self.assertEqual(safe_ticker_component(ticker), ticker)
|
||||
|
||||
def test_accepts_futures_and_forex_formats(self):
|
||||
# Futures use '=' (GC=F gold, CL=F crude), forex/CFD symbols use '+'.
|
||||
for ticker in ("GC=F", "CL=F", "ES=F", "XAUUSD+", "EURUSD+"):
|
||||
self.assertEqual(safe_ticker_component(ticker), ticker)
|
||||
|
||||
def test_rejects_path_separators(self):
|
||||
for bad in (".", "..", "../etc", "a/b", "a\\b", "/abs", "..\\..\\x"):
|
||||
with self.assertRaises(ValueError):
|
||||
|
||||
81
tests/test_symbol_utils.py
Normal file
81
tests/test_symbol_utils.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""Tests for symbol normalization and the no-data routing sentinel."""
|
||||
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
|
||||
from tradingagents.dataflows.symbol_utils import (
|
||||
NoMarketDataError,
|
||||
normalize_symbol,
|
||||
is_yahoo_safe,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestNormalizeSymbol(unittest.TestCase):
|
||||
def test_plain_equities_unchanged(self):
|
||||
for sym in ("AAPL", "MSFT", "TSM", "BRK.B", "0700.HK", "^GSPC", "GC=F"):
|
||||
self.assertEqual(normalize_symbol(sym), sym)
|
||||
|
||||
def test_lowercases_are_upper(self):
|
||||
self.assertEqual(normalize_symbol("aapl"), "AAPL")
|
||||
self.assertEqual(normalize_symbol(" msft "), "MSFT")
|
||||
|
||||
def test_metal_aliases_map_to_futures(self):
|
||||
self.assertEqual(normalize_symbol("XAUUSD"), "GC=F")
|
||||
self.assertEqual(normalize_symbol("XAUUSD+"), "GC=F") # broker CFD suffix
|
||||
self.assertEqual(normalize_symbol("xauusd+"), "GC=F")
|
||||
self.assertEqual(normalize_symbol("GOLD"), "GC=F")
|
||||
self.assertEqual(normalize_symbol("XAGUSD"), "SI=F")
|
||||
|
||||
def test_energy_and_index_aliases(self):
|
||||
self.assertEqual(normalize_symbol("USOIL"), "CL=F")
|
||||
self.assertEqual(normalize_symbol("SPX500"), "^GSPC")
|
||||
self.assertEqual(normalize_symbol("NAS100"), "^NDX")
|
||||
self.assertEqual(normalize_symbol("US30"), "^DJI")
|
||||
|
||||
def test_forex_pairs_get_x_suffix(self):
|
||||
self.assertEqual(normalize_symbol("EURUSD"), "EURUSD=X")
|
||||
self.assertEqual(normalize_symbol("GBPJPY"), "GBPJPY=X")
|
||||
self.assertEqual(normalize_symbol("eurusd"), "EURUSD=X")
|
||||
|
||||
def test_crypto_pairs_get_dash_usd(self):
|
||||
self.assertEqual(normalize_symbol("BTCUSD"), "BTC-USD")
|
||||
self.assertEqual(normalize_symbol("ETHUSD"), "ETH-USD")
|
||||
|
||||
def test_six_letter_non_currency_left_alone(self):
|
||||
# GOOGLE-style 6-letter tickers that aren't two currency codes
|
||||
# must not be mangled into a fake forex pair.
|
||||
self.assertEqual(normalize_symbol("ABCDEF"), "ABCDEF")
|
||||
|
||||
def test_empty_input_passthrough(self):
|
||||
self.assertEqual(normalize_symbol(""), "")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestNoMarketDataError(unittest.TestCase):
|
||||
def test_message_includes_resolution(self):
|
||||
err = NoMarketDataError("XAUUSD+", "GC=F", "no rows")
|
||||
self.assertIn("XAUUSD+", str(err))
|
||||
self.assertIn("GC=F", str(err))
|
||||
self.assertEqual(err.symbol, "XAUUSD+")
|
||||
self.assertEqual(err.canonical, "GC=F")
|
||||
|
||||
def test_canonical_defaults_to_symbol(self):
|
||||
err = NoMarketDataError("FOOBAR")
|
||||
self.assertEqual(err.canonical, "FOOBAR")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestIsYahooSafe(unittest.TestCase):
|
||||
def test_accepts_structural_chars(self):
|
||||
for sym in ("AAPL", "GC=F", "^GSPC", "BRK.B", "BTC-USD"):
|
||||
self.assertTrue(is_yahoo_safe(sym))
|
||||
|
||||
def test_rejects_slash_and_space(self):
|
||||
for sym in ("a/b", "AA PL", ""):
|
||||
self.assertFalse(is_yahoo_safe(sym))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -7,11 +7,24 @@ from io import StringIO
|
||||
|
||||
API_BASE_URL = "https://www.alphavantage.co/query"
|
||||
|
||||
|
||||
class AlphaVantageNotConfiguredError(ValueError):
|
||||
"""Raised when Alpha Vantage is selected but no API key is configured.
|
||||
|
||||
Subclasses ValueError for backward compatibility with callers that
|
||||
already catch ValueError, while letting the routing layer distinguish a
|
||||
"vendor unavailable" condition from a genuine data error.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def get_api_key() -> str:
|
||||
"""Retrieve the API key for Alpha Vantage from environment variables."""
|
||||
api_key = os.getenv("ALPHA_VANTAGE_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("ALPHA_VANTAGE_API_KEY environment variable is not set.")
|
||||
raise AlphaVantageNotConfiguredError(
|
||||
"ALPHA_VANTAGE_API_KEY environment variable is not set."
|
||||
)
|
||||
return api_key
|
||||
|
||||
def format_datetime_for_api(date_input) -> str:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .alpha_vantage_common import _make_api_request
|
||||
from .alpha_vantage_common import _make_api_request, AlphaVantageNotConfiguredError
|
||||
|
||||
def get_indicator(
|
||||
symbol: str,
|
||||
@@ -217,6 +217,11 @@ def get_indicator(
|
||||
|
||||
return result_str
|
||||
|
||||
except AlphaVantageNotConfiguredError:
|
||||
# Vendor unavailable (no API key). Let it propagate so the router can
|
||||
# fall back / emit the no-data sentinel instead of returning this as a
|
||||
# successful-looking error string.
|
||||
raise
|
||||
except Exception as e:
|
||||
print(f"Error getting Alpha Vantage indicator data for {indicator}: {e}")
|
||||
return f"Error retrieving {indicator} data: {str(e)}"
|
||||
|
||||
@@ -23,6 +23,7 @@ from .alpha_vantage import (
|
||||
get_global_news as get_alpha_vantage_global_news,
|
||||
)
|
||||
from .alpha_vantage_common import AlphaVantageRateLimitError
|
||||
from .symbol_utils import NoMarketDataError
|
||||
|
||||
# Configuration and routing logic
|
||||
from .config import get_config
|
||||
@@ -147,6 +148,8 @@ def route_to_vendor(method: str, *args, **kwargs):
|
||||
if vendor not in fallback_vendors:
|
||||
fallback_vendors.append(vendor)
|
||||
|
||||
last_no_data: NoMarketDataError | None = None
|
||||
first_error: Exception | None = None
|
||||
for vendor in fallback_vendors:
|
||||
if vendor not in VENDOR_METHODS[method]:
|
||||
continue
|
||||
@@ -157,6 +160,37 @@ def route_to_vendor(method: str, *args, **kwargs):
|
||||
try:
|
||||
return impl_func(*args, **kwargs)
|
||||
except AlphaVantageRateLimitError:
|
||||
continue # Only rate limits trigger fallback
|
||||
continue # Rate limits: try the next vendor
|
||||
except NoMarketDataError as e:
|
||||
last_no_data = e # No data here; another vendor may have it
|
||||
continue
|
||||
except Exception as e:
|
||||
# A fallback vendor failing for an incidental reason (e.g. no API
|
||||
# key configured) must not crash the call when another vendor
|
||||
# already determined the symbol simply has no data. Remember the
|
||||
# first error so a genuine primary-vendor failure still surfaces.
|
||||
if first_error is None:
|
||||
first_error = e
|
||||
continue
|
||||
|
||||
# If any vendor reported "no data", the symbol is genuinely unavailable.
|
||||
# Return one explicit, instructive sentinel rather than a vendor-specific
|
||||
# empty string, so the agent reports "unavailable" instead of inventing a
|
||||
# value. This takes precedence over incidental fallback errors.
|
||||
if last_no_data is not None:
|
||||
sym = last_no_data.symbol
|
||||
canonical = last_no_data.canonical
|
||||
resolved = "" if canonical == sym else f" (resolved to '{canonical}')"
|
||||
return (
|
||||
f"NO_DATA_AVAILABLE: No market data found for '{sym}'{resolved} from "
|
||||
f"any configured vendor. The symbol may be invalid, delisted, or not "
|
||||
f"covered by Yahoo Finance / Alpha Vantage. Do not estimate or "
|
||||
f"fabricate values — report that data is unavailable for this symbol."
|
||||
)
|
||||
|
||||
# No vendor returned data and none reported clean "no data" — surface the
|
||||
# first real error (e.g. the primary vendor's network failure).
|
||||
if first_error is not None:
|
||||
raise first_error
|
||||
|
||||
raise RuntimeError(f"No available vendor for '{method}'")
|
||||
@@ -9,6 +9,7 @@ from typing import Annotated
|
||||
import os
|
||||
from .config import get_config
|
||||
from .utils import safe_ticker_component
|
||||
from .symbol_utils import normalize_symbol, NoMarketDataError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -68,9 +69,11 @@ def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
|
||||
subsequent calls the cache is reused. Rows after curr_date are
|
||||
filtered out so backtests never see future prices.
|
||||
"""
|
||||
# Reject ticker values that would escape the cache directory when
|
||||
# Resolve broker/forex symbols (XAUUSD+ -> GC=F) to Yahoo's convention,
|
||||
# then reject values that would escape the cache directory when
|
||||
# interpolated into the cache filename (e.g. ``../../tmp/x``).
|
||||
safe_symbol = safe_ticker_component(symbol)
|
||||
canonical = normalize_symbol(symbol)
|
||||
safe_symbol = safe_ticker_component(canonical)
|
||||
|
||||
config = get_config()
|
||||
curr_date_dt = pd.to_datetime(curr_date)
|
||||
@@ -87,19 +90,32 @@ def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
|
||||
f"{safe_symbol}-YFin-data-{start_str}-{end_str}.csv",
|
||||
)
|
||||
|
||||
# A cached file may be empty if a prior fetch failed (unknown symbol,
|
||||
# transient rate limit). Treat an empty/columnless cache as a miss and
|
||||
# re-fetch rather than serving the poisoned file forever.
|
||||
data = None
|
||||
if os.path.exists(data_file):
|
||||
data = pd.read_csv(data_file, on_bad_lines="skip", encoding="utf-8")
|
||||
else:
|
||||
data = yf_retry(lambda: yf.download(
|
||||
symbol,
|
||||
cached = pd.read_csv(data_file, on_bad_lines="skip", encoding="utf-8")
|
||||
if not cached.empty and "Close" in cached.columns:
|
||||
data = cached
|
||||
|
||||
if data is None:
|
||||
downloaded = yf_retry(lambda: yf.download(
|
||||
canonical,
|
||||
start=start_str,
|
||||
end=end_str,
|
||||
multi_level_index=False,
|
||||
progress=False,
|
||||
auto_adjust=True,
|
||||
))
|
||||
data = _ensure_date_column(data.reset_index())
|
||||
data.to_csv(data_file, index=False, encoding="utf-8")
|
||||
downloaded = _ensure_date_column(downloaded.reset_index())
|
||||
# Only cache real data — never persist an empty frame.
|
||||
if downloaded.empty or "Close" not in downloaded.columns:
|
||||
raise NoMarketDataError(
|
||||
symbol, canonical, "Yahoo Finance returned no rows"
|
||||
)
|
||||
downloaded.to_csv(data_file, index=False, encoding="utf-8")
|
||||
data = downloaded
|
||||
|
||||
data = _clean_dataframe(data)
|
||||
|
||||
|
||||
131
tradingagents/dataflows/symbol_utils.py
Normal file
131
tradingagents/dataflows/symbol_utils.py
Normal file
@@ -0,0 +1,131 @@
|
||||
"""Symbol normalization and market-data error types for vendor calls.
|
||||
|
||||
Yahoo Finance (the default vendor) uses specific ticker conventions that
|
||||
differ from the broker / TradingView / MT5 style symbols users often type:
|
||||
|
||||
user types Yahoo wants why
|
||||
--------------- --------------- -----------------------------------
|
||||
XAUUSD, XAUUSD+ GC=F gold has no forex pair on Yahoo;
|
||||
it is quoted as a COMEX future
|
||||
EURUSD EURUSD=X spot forex pairs take a ``=X`` suffix
|
||||
BTCUSD BTC-USD crypto pairs use a ``-`` separator
|
||||
SPX500, US500 ^GSPC index CFDs map to Yahoo index symbols
|
||||
|
||||
Passing the raw broker symbol to Yahoo returns an empty result, which the
|
||||
agents previously received as free text and could hallucinate a price
|
||||
around (see issue #781). Centralizing the mapping here means every yfinance
|
||||
entry point resolves symbols the same way, and new instruments are added by
|
||||
appending a table row rather than editing call sites.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NoMarketDataError(Exception):
|
||||
"""Raised when a vendor returns no rows/records for a symbol.
|
||||
|
||||
Carries both the symbol the user requested and the canonical symbol the
|
||||
vendor was actually queried with, so callers can build a clear message
|
||||
instead of emitting a vendor-specific empty string into the data channel.
|
||||
"""
|
||||
|
||||
def __init__(self, symbol: str, canonical: str | None = None, detail: str = ""):
|
||||
self.symbol = symbol
|
||||
self.canonical = canonical or symbol
|
||||
self.detail = detail
|
||||
msg = f"No market data for {symbol!r}"
|
||||
if canonical and canonical != symbol:
|
||||
msg += f" (queried as {canonical!r})"
|
||||
if detail:
|
||||
msg += f": {detail}"
|
||||
super().__init__(msg)
|
||||
|
||||
|
||||
# ISO-4217 codes common enough to appear in retail forex pairs. A bare
|
||||
# six-letter symbol whose halves are BOTH in this set is treated as a spot
|
||||
# forex pair and given Yahoo's ``=X`` suffix.
|
||||
_FOREX_CURRENCIES = frozenset(
|
||||
{
|
||||
"USD", "EUR", "GBP", "JPY", "CHF", "CAD", "AUD", "NZD",
|
||||
"CNY", "CNH", "HKD", "SGD", "SEK", "NOK", "DKK", "PLN",
|
||||
"MXN", "ZAR", "TRY", "INR", "KRW", "BRL", "RUB", "THB",
|
||||
}
|
||||
)
|
||||
|
||||
# Crypto bases that brokers quote against USD without a separator.
|
||||
_CRYPTO_BASES = frozenset(
|
||||
{"BTC", "ETH", "SOL", "XRP", "ADA", "DOGE", "LTC", "BCH", "DOT", "AVAX", "LINK"}
|
||||
)
|
||||
|
||||
# Explicit aliases for instruments whose broker symbol does not map to a
|
||||
# Yahoo symbol by rule. Metals/energy resolve to their front-month future;
|
||||
# index CFD names resolve to the underlying Yahoo index symbol. Extend by
|
||||
# adding rows — no call site changes required.
|
||||
_ALIASES = {
|
||||
# Precious metals (spot names -> COMEX/NYMEX futures)
|
||||
"XAUUSD": "GC=F", "XAU": "GC=F", "GOLD": "GC=F",
|
||||
"XAGUSD": "SI=F", "XAG": "SI=F", "SILVER": "SI=F",
|
||||
"XPTUSD": "PL=F", "XPDUSD": "PA=F",
|
||||
# Energy
|
||||
"WTICOUSD": "CL=F", "USOIL": "CL=F", "WTI": "CL=F",
|
||||
"BCOUSD": "BZ=F", "UKOIL": "BZ=F", "BRENT": "BZ=F",
|
||||
"NATGAS": "NG=F", "XNGUSD": "NG=F",
|
||||
"COPPER": "HG=F", "XCUUSD": "HG=F",
|
||||
# Index CFDs -> Yahoo index symbols
|
||||
"SPX500": "^GSPC", "US500": "^GSPC", "SPX": "^GSPC",
|
||||
"NAS100": "^NDX", "US100": "^NDX", "USTEC": "^NDX",
|
||||
"US30": "^DJI", "DJI30": "^DJI", "WS30": "^DJI",
|
||||
"GER40": "^GDAXI", "GER30": "^GDAXI", "DE40": "^GDAXI",
|
||||
"UK100": "^FTSE", "JP225": "^N225", "JPN225": "^N225",
|
||||
"FRA40": "^FCHI", "EU50": "^STOXX50E", "HK50": "^HSI",
|
||||
}
|
||||
|
||||
# Yahoo symbols may contain letters, digits, and these structural characters.
|
||||
_YAHOO_SAFE = re.compile(r"^[A-Za-z0-9._\-\^=]+$")
|
||||
|
||||
|
||||
def normalize_symbol(raw: str) -> str:
|
||||
"""Map a user/broker symbol to its canonical Yahoo Finance symbol.
|
||||
|
||||
Resolution order (first match wins):
|
||||
1. Explicit alias table (metals, energy, index CFDs).
|
||||
2. Crypto rule: ``<BASE>USD`` where BASE is a known crypto -> ``BASE-USD``.
|
||||
3. Forex rule: six letters that are two ISO currency codes -> ``PAIR=X``.
|
||||
4. Otherwise the upper-cased symbol is returned unchanged (plain
|
||||
equities, ETFs, Yahoo-native symbols like ``GC=F`` or ``^GSPC``).
|
||||
|
||||
A trailing ``+`` (broker CFD marker, e.g. ``XAUUSD+``) is stripped before
|
||||
matching. The function is purely syntactic — it performs no network
|
||||
calls — so it is safe to apply on every request.
|
||||
"""
|
||||
if not isinstance(raw, str) or not raw.strip():
|
||||
return raw
|
||||
|
||||
s = raw.strip().upper()
|
||||
# Broker CFD/qualifier suffixes Yahoo never uses.
|
||||
s = s.rstrip("+")
|
||||
|
||||
if s in _ALIASES:
|
||||
canonical = _ALIASES[s]
|
||||
elif len(s) == 6 and s[:3] in _CRYPTO_BASES and s[3:] == "USD":
|
||||
canonical = f"{s[:3]}-USD"
|
||||
elif s[:-3] in _CRYPTO_BASES and s.endswith("USD") and "-" not in s:
|
||||
canonical = f"{s[:-3]}-USD"
|
||||
elif len(s) == 6 and s[:3] in _FOREX_CURRENCIES and s[3:] in _FOREX_CURRENCIES:
|
||||
canonical = f"{s}=X"
|
||||
else:
|
||||
canonical = s
|
||||
|
||||
if canonical != raw.strip().upper():
|
||||
logger.info("Resolved symbol %r to Yahoo symbol %r", raw, canonical)
|
||||
return canonical
|
||||
|
||||
|
||||
def is_yahoo_safe(symbol: str) -> bool:
|
||||
"""True when ``symbol`` only contains characters Yahoo symbols use."""
|
||||
return bool(symbol) and _YAHOO_SAFE.fullmatch(symbol) is not None
|
||||
@@ -7,10 +7,12 @@ from typing import Annotated
|
||||
|
||||
SavePathType = Annotated[str, "File path to save data. If None, data is not saved."]
|
||||
|
||||
# Tickers can contain letters, digits, dot, dash, underscore, and caret
|
||||
# (for index symbols like ^GSPC). Anything else is rejected so the value
|
||||
# never escapes a containing directory when interpolated into a path.
|
||||
_TICKER_PATH_RE = re.compile(r"^[A-Za-z0-9._\-\^]+$")
|
||||
# Tickers can contain letters, digits, dot, dash, underscore, caret
|
||||
# (index symbols like ^GSPC), equals (futures like GC=F), and plus
|
||||
# (forex/CFD symbols like XAUUSD+). None of these enable directory
|
||||
# traversal, so the value never escapes a containing directory when
|
||||
# interpolated into a path. Anything else is rejected.
|
||||
_TICKER_PATH_RE = re.compile(r"^[A-Za-z0-9._\-\^=+]+$")
|
||||
|
||||
|
||||
def safe_ticker_component(value: str, *, max_len: int = 32) -> str:
|
||||
|
||||
@@ -5,6 +5,7 @@ import pandas as pd
|
||||
import yfinance as yf
|
||||
import os
|
||||
from .stockstats_utils import StockstatsUtils, _clean_dataframe, yf_retry, load_ohlcv, filter_financials_by_date
|
||||
from .symbol_utils import normalize_symbol, NoMarketDataError
|
||||
|
||||
def get_YFin_data_online(
|
||||
symbol: Annotated[str, "ticker symbol of the company"],
|
||||
@@ -15,16 +16,19 @@ def get_YFin_data_online(
|
||||
datetime.strptime(start_date, "%Y-%m-%d")
|
||||
datetime.strptime(end_date, "%Y-%m-%d")
|
||||
|
||||
# Create ticker object
|
||||
ticker = yf.Ticker(symbol.upper())
|
||||
# Resolve broker/forex symbols to Yahoo's convention (XAUUSD+ -> GC=F).
|
||||
canonical = normalize_symbol(symbol)
|
||||
ticker = yf.Ticker(canonical)
|
||||
|
||||
# Fetch historical data for the specified date range
|
||||
data = yf_retry(lambda: ticker.history(start=start_date, end=end_date))
|
||||
|
||||
# Check if data is empty
|
||||
# Empty result means the symbol is unknown/delisted. Raise a typed error
|
||||
# instead of returning prose: the routing layer turns it into a single
|
||||
# unambiguous "no data" signal so the agent never fabricates a price.
|
||||
if data.empty:
|
||||
return (
|
||||
f"No data found for symbol '{symbol}' between {start_date} and {end_date}"
|
||||
raise NoMarketDataError(
|
||||
symbol, canonical, f"no rows between {start_date} and {end_date}"
|
||||
)
|
||||
|
||||
# Remove timezone info from index for cleaner output
|
||||
@@ -40,8 +44,10 @@ def get_YFin_data_online(
|
||||
# Convert DataFrame to CSV string
|
||||
csv_string = data.to_csv()
|
||||
|
||||
# Add header information
|
||||
header = f"# Stock data for {symbol.upper()} from {start_date} to {end_date}\n"
|
||||
# Add header information; note the resolved symbol when it differs so the
|
||||
# agent (and user) can see which instrument was actually priced.
|
||||
label = canonical if canonical == symbol.upper() else f"{canonical} (from {symbol})"
|
||||
header = f"# Stock data for {label} from {start_date} to {end_date}\n"
|
||||
header += f"# Total records: {len(data)}\n"
|
||||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||
|
||||
@@ -163,6 +169,8 @@ def get_stock_stats_indicators_window(
|
||||
for date_str, value in date_values:
|
||||
ind_string += f"{date_str}: {value}\n"
|
||||
|
||||
except NoMarketDataError:
|
||||
raise # Unknown/delisted symbol — let the router emit the sentinel
|
||||
except Exception as e:
|
||||
print(f"Error getting bulk stockstats data: {e}")
|
||||
# Fallback to original implementation if bulk method fails
|
||||
@@ -236,6 +244,8 @@ def get_stockstats_indicator(
|
||||
indicator,
|
||||
curr_date,
|
||||
)
|
||||
except NoMarketDataError:
|
||||
raise # Unknown/delisted symbol — let the router emit the sentinel
|
||||
except Exception as e:
|
||||
print(
|
||||
f"Error getting stockstats indicator data for indicator {indicator} on {curr_date}: {e}"
|
||||
@@ -250,12 +260,13 @@ def get_fundamentals(
|
||||
curr_date: Annotated[str, "current date (not used for yfinance)"] = None
|
||||
):
|
||||
"""Get company fundamentals overview from yfinance."""
|
||||
canonical = normalize_symbol(ticker)
|
||||
try:
|
||||
ticker_obj = yf.Ticker(ticker.upper())
|
||||
ticker_obj = yf.Ticker(canonical)
|
||||
info = yf_retry(lambda: ticker_obj.info)
|
||||
|
||||
if not info:
|
||||
return f"No fundamentals data found for symbol '{ticker}'"
|
||||
raise NoMarketDataError(ticker, canonical, "no fundamentals returned")
|
||||
|
||||
fields = [
|
||||
("Name", info.get("longName")),
|
||||
@@ -293,11 +304,20 @@ def get_fundamentals(
|
||||
if value is not None:
|
||||
lines.append(f"{label}: {value}")
|
||||
|
||||
header = f"# Company Fundamentals for {ticker.upper()}\n"
|
||||
# yfinance returns a stub dict (e.g. {"trailingPegRatio": None}) for
|
||||
# unknown symbols, so `info` is truthy but every field is empty. Treat
|
||||
# "no usable fields" as no data rather than emitting a bare header the
|
||||
# agent might fabricate around.
|
||||
if not lines:
|
||||
raise NoMarketDataError(ticker, canonical, "no fundamental fields returned")
|
||||
|
||||
header = f"# Company Fundamentals for {canonical}\n"
|
||||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||
|
||||
return header + "\n".join(lines)
|
||||
|
||||
except NoMarketDataError:
|
||||
raise
|
||||
except Exception as e:
|
||||
return f"Error retrieving fundamentals for {ticker}: {str(e)}"
|
||||
|
||||
@@ -308,8 +328,9 @@ def get_balance_sheet(
|
||||
curr_date: Annotated[str, "current date in YYYY-MM-DD format"] = None
|
||||
):
|
||||
"""Get balance sheet data from yfinance."""
|
||||
canonical = normalize_symbol(ticker)
|
||||
try:
|
||||
ticker_obj = yf.Ticker(ticker.upper())
|
||||
ticker_obj = yf.Ticker(canonical)
|
||||
|
||||
if freq.lower() == "quarterly":
|
||||
data = yf_retry(lambda: ticker_obj.quarterly_balance_sheet)
|
||||
@@ -319,17 +340,19 @@ def get_balance_sheet(
|
||||
data = filter_financials_by_date(data, curr_date)
|
||||
|
||||
if data.empty:
|
||||
return f"No balance sheet data found for symbol '{ticker}'"
|
||||
raise NoMarketDataError(ticker, canonical, "no balance sheet data")
|
||||
|
||||
# Convert to CSV string for consistency with other functions
|
||||
csv_string = data.to_csv()
|
||||
|
||||
# Add header information
|
||||
header = f"# Balance Sheet data for {ticker.upper()} ({freq})\n"
|
||||
header = f"# Balance Sheet data for {canonical} ({freq})\n"
|
||||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||
|
||||
return header + csv_string
|
||||
|
||||
except NoMarketDataError:
|
||||
raise
|
||||
except Exception as e:
|
||||
return f"Error retrieving balance sheet for {ticker}: {str(e)}"
|
||||
|
||||
@@ -340,8 +363,9 @@ def get_cashflow(
|
||||
curr_date: Annotated[str, "current date in YYYY-MM-DD format"] = None
|
||||
):
|
||||
"""Get cash flow data from yfinance."""
|
||||
canonical = normalize_symbol(ticker)
|
||||
try:
|
||||
ticker_obj = yf.Ticker(ticker.upper())
|
||||
ticker_obj = yf.Ticker(canonical)
|
||||
|
||||
if freq.lower() == "quarterly":
|
||||
data = yf_retry(lambda: ticker_obj.quarterly_cashflow)
|
||||
@@ -351,17 +375,19 @@ def get_cashflow(
|
||||
data = filter_financials_by_date(data, curr_date)
|
||||
|
||||
if data.empty:
|
||||
return f"No cash flow data found for symbol '{ticker}'"
|
||||
raise NoMarketDataError(ticker, canonical, "no cash flow data")
|
||||
|
||||
# Convert to CSV string for consistency with other functions
|
||||
csv_string = data.to_csv()
|
||||
|
||||
# Add header information
|
||||
header = f"# Cash Flow data for {ticker.upper()} ({freq})\n"
|
||||
header = f"# Cash Flow data for {canonical} ({freq})\n"
|
||||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||
|
||||
return header + csv_string
|
||||
|
||||
except NoMarketDataError:
|
||||
raise
|
||||
except Exception as e:
|
||||
return f"Error retrieving cash flow for {ticker}: {str(e)}"
|
||||
|
||||
@@ -372,8 +398,9 @@ def get_income_statement(
|
||||
curr_date: Annotated[str, "current date in YYYY-MM-DD format"] = None
|
||||
):
|
||||
"""Get income statement data from yfinance."""
|
||||
canonical = normalize_symbol(ticker)
|
||||
try:
|
||||
ticker_obj = yf.Ticker(ticker.upper())
|
||||
ticker_obj = yf.Ticker(canonical)
|
||||
|
||||
if freq.lower() == "quarterly":
|
||||
data = yf_retry(lambda: ticker_obj.quarterly_income_stmt)
|
||||
@@ -383,17 +410,19 @@ def get_income_statement(
|
||||
data = filter_financials_by_date(data, curr_date)
|
||||
|
||||
if data.empty:
|
||||
return f"No income statement data found for symbol '{ticker}'"
|
||||
raise NoMarketDataError(ticker, canonical, "no income statement data")
|
||||
|
||||
# Convert to CSV string for consistency with other functions
|
||||
csv_string = data.to_csv()
|
||||
|
||||
# Add header information
|
||||
header = f"# Income Statement data for {ticker.upper()} ({freq})\n"
|
||||
header = f"# Income Statement data for {canonical} ({freq})\n"
|
||||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||
|
||||
return header + csv_string
|
||||
|
||||
except NoMarketDataError:
|
||||
raise
|
||||
except Exception as e:
|
||||
return f"Error retrieving income statement for {ticker}: {str(e)}"
|
||||
|
||||
@@ -402,18 +431,21 @@ def get_insider_transactions(
|
||||
ticker: Annotated[str, "ticker symbol of the company"]
|
||||
):
|
||||
"""Get insider transactions data from yfinance."""
|
||||
canonical = normalize_symbol(ticker)
|
||||
try:
|
||||
ticker_obj = yf.Ticker(ticker.upper())
|
||||
ticker_obj = yf.Ticker(canonical)
|
||||
data = yf_retry(lambda: ticker_obj.insider_transactions)
|
||||
|
||||
# Empty is normal here (many valid symbols have no insider filings),
|
||||
# so report it plainly rather than treating the symbol as invalid.
|
||||
if data is None or data.empty:
|
||||
return f"No insider transactions data found for symbol '{ticker}'"
|
||||
return f"No insider transactions reported for symbol '{canonical}'"
|
||||
|
||||
# Convert to CSV string for consistency with other functions
|
||||
csv_string = data.to_csv()
|
||||
|
||||
# Add header information
|
||||
header = f"# Insider Transactions data for {ticker.upper()}\n"
|
||||
header = f"# Insider Transactions data for {canonical}\n"
|
||||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||
|
||||
return header + csv_string
|
||||
|
||||
Reference in New Issue
Block a user