mirror of
https://github.com/TauricResearch/TradingAgents.git
synced 2026-06-16 21:06:15 +03:00
fix: support commodity/forex/crypto tickers and never invent prices (#781)
Analyzing a symbol Yahoo Finance does not recognize (e.g. XAUUSD+) could
produce an invented price instead of an error. The agent now either prices
the correct instrument or clearly reports that data is unavailable.
Ticker support:
- Commodities/forex/crypto resolve to the symbol Yahoo actually serves, so
you can enter the common form and it just works:
XAUUSD / XAUUSD+ / GOLD -> GC=F (gold)
USOIL -> CL=F (WTI crude)
EURUSD -> EURUSD=X
BTCUSD -> BTC-USD
SPX500 / NAS100 -> ^GSPC / ^NDX
Native Yahoo symbols (AAPL, GC=F, ^GSPC) keep working unchanged. New
instruments are added by extending the alias table.
Reliability:
- Unknown or delisted symbols now return a clear "data unavailable" result
the agent reports verbatim, instead of a value the model fills in.
- A failed fetch no longer leaves a broken symbol cached until the cache is
cleared by hand.
This commit is contained in:
88
tests/test_no_data_handling.py
Normal file
88
tests/test_no_data_handling.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
"""Tests that empty vendor results never become fabricated data.
|
||||||
|
|
||||||
|
Covers two systematic fixes:
|
||||||
|
- load_ohlcv must not cache an empty download (cache poisoning), and must
|
||||||
|
raise NoMarketDataError instead of returning an empty frame.
|
||||||
|
- route_to_vendor must convert NoMarketDataError into a single explicit
|
||||||
|
"NO_DATA_AVAILABLE" sentinel after all vendors are exhausted.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from tradingagents.dataflows import stockstats_utils, interface
|
||||||
|
from tradingagents.dataflows.config import set_config
|
||||||
|
from tradingagents.dataflows.symbol_utils import NoMarketDataError
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestLoadOhlcvNoPoison(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self._tmp = os.path.join(os.path.dirname(__file__), "_tmp_cache")
|
||||||
|
os.makedirs(self._tmp, exist_ok=True)
|
||||||
|
set_config({"data_cache_dir": self._tmp})
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
for f in os.listdir(self._tmp):
|
||||||
|
os.remove(os.path.join(self._tmp, f))
|
||||||
|
os.rmdir(self._tmp)
|
||||||
|
|
||||||
|
def test_empty_download_raises_and_does_not_cache(self):
|
||||||
|
empty = pd.DataFrame()
|
||||||
|
with mock.patch.object(stockstats_utils.yf, "download", return_value=empty) as dl:
|
||||||
|
with self.assertRaises(NoMarketDataError):
|
||||||
|
stockstats_utils.load_ohlcv("FAKE", "2026-01-01")
|
||||||
|
# Nothing should have been written to the cache.
|
||||||
|
self.assertEqual(os.listdir(self._tmp), [])
|
||||||
|
|
||||||
|
# A second call must re-attempt the fetch (no poisoned cache served).
|
||||||
|
with mock.patch.object(stockstats_utils.yf, "download", return_value=empty) as dl2:
|
||||||
|
with self.assertRaises(NoMarketDataError):
|
||||||
|
stockstats_utils.load_ohlcv("FAKE", "2026-01-01")
|
||||||
|
self.assertTrue(dl2.called)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestRouteToVendorSentinel(unittest.TestCase):
|
||||||
|
def test_no_data_from_all_vendors_returns_sentinel(self):
|
||||||
|
def raises_no_data(symbol, *a, **k):
|
||||||
|
raise NoMarketDataError(symbol, "GC=F", "no rows")
|
||||||
|
|
||||||
|
patched = {"yfinance": raises_no_data, "alpha_vantage": raises_no_data}
|
||||||
|
with mock.patch.dict(
|
||||||
|
interface.VENDOR_METHODS, {"get_stock_data": patched}, clear=False
|
||||||
|
):
|
||||||
|
result = interface.route_to_vendor(
|
||||||
|
"get_stock_data", "XAUUSD+", "2026-01-01", "2026-01-10"
|
||||||
|
)
|
||||||
|
self.assertIn("NO_DATA_AVAILABLE", result)
|
||||||
|
self.assertIn("XAUUSD+", result)
|
||||||
|
self.assertIn("GC=F", result)
|
||||||
|
self.assertIn("Do not estimate", result)
|
||||||
|
|
||||||
|
def test_unconfigured_fallback_does_not_mask_no_data(self):
|
||||||
|
# When the primary vendor reports no data and the fallback is simply
|
||||||
|
# unavailable (e.g. missing API key -> raises), the no-data sentinel
|
||||||
|
# must win rather than the fallback's incidental error crashing out.
|
||||||
|
def raises_no_data(symbol, *a, **k):
|
||||||
|
raise NoMarketDataError(symbol, symbol, "no rows")
|
||||||
|
|
||||||
|
def raises_unavailable(symbol, *a, **k):
|
||||||
|
raise ValueError("ALPHA_VANTAGE_API_KEY environment variable is not set.")
|
||||||
|
|
||||||
|
patched = {"yfinance": raises_no_data, "alpha_vantage": raises_unavailable}
|
||||||
|
with mock.patch.dict(
|
||||||
|
interface.VENDOR_METHODS, {"get_stock_data": patched}, clear=False
|
||||||
|
):
|
||||||
|
result = interface.route_to_vendor(
|
||||||
|
"get_stock_data", "FAKE", "2026-01-01", "2026-01-10"
|
||||||
|
)
|
||||||
|
self.assertIn("NO_DATA_AVAILABLE", result)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -14,6 +14,11 @@ class TestSafeTickerComponent(unittest.TestCase):
|
|||||||
for ticker in ("AAPL", "BRK-B", "BRK.A", "0700.HK", "7203.T", "BHP.AX", "^GSPC"):
|
for ticker in ("AAPL", "BRK-B", "BRK.A", "0700.HK", "7203.T", "BHP.AX", "^GSPC"):
|
||||||
self.assertEqual(safe_ticker_component(ticker), ticker)
|
self.assertEqual(safe_ticker_component(ticker), ticker)
|
||||||
|
|
||||||
|
def test_accepts_futures_and_forex_formats(self):
|
||||||
|
# Futures use '=' (GC=F gold, CL=F crude), forex/CFD symbols use '+'.
|
||||||
|
for ticker in ("GC=F", "CL=F", "ES=F", "XAUUSD+", "EURUSD+"):
|
||||||
|
self.assertEqual(safe_ticker_component(ticker), ticker)
|
||||||
|
|
||||||
def test_rejects_path_separators(self):
|
def test_rejects_path_separators(self):
|
||||||
for bad in (".", "..", "../etc", "a/b", "a\\b", "/abs", "..\\..\\x"):
|
for bad in (".", "..", "../etc", "a/b", "a\\b", "/abs", "..\\..\\x"):
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
|
|||||||
81
tests/test_symbol_utils.py
Normal file
81
tests/test_symbol_utils.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
"""Tests for symbol normalization and the no-data routing sentinel."""
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from tradingagents.dataflows.symbol_utils import (
|
||||||
|
NoMarketDataError,
|
||||||
|
normalize_symbol,
|
||||||
|
is_yahoo_safe,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestNormalizeSymbol(unittest.TestCase):
|
||||||
|
def test_plain_equities_unchanged(self):
|
||||||
|
for sym in ("AAPL", "MSFT", "TSM", "BRK.B", "0700.HK", "^GSPC", "GC=F"):
|
||||||
|
self.assertEqual(normalize_symbol(sym), sym)
|
||||||
|
|
||||||
|
def test_lowercases_are_upper(self):
|
||||||
|
self.assertEqual(normalize_symbol("aapl"), "AAPL")
|
||||||
|
self.assertEqual(normalize_symbol(" msft "), "MSFT")
|
||||||
|
|
||||||
|
def test_metal_aliases_map_to_futures(self):
|
||||||
|
self.assertEqual(normalize_symbol("XAUUSD"), "GC=F")
|
||||||
|
self.assertEqual(normalize_symbol("XAUUSD+"), "GC=F") # broker CFD suffix
|
||||||
|
self.assertEqual(normalize_symbol("xauusd+"), "GC=F")
|
||||||
|
self.assertEqual(normalize_symbol("GOLD"), "GC=F")
|
||||||
|
self.assertEqual(normalize_symbol("XAGUSD"), "SI=F")
|
||||||
|
|
||||||
|
def test_energy_and_index_aliases(self):
|
||||||
|
self.assertEqual(normalize_symbol("USOIL"), "CL=F")
|
||||||
|
self.assertEqual(normalize_symbol("SPX500"), "^GSPC")
|
||||||
|
self.assertEqual(normalize_symbol("NAS100"), "^NDX")
|
||||||
|
self.assertEqual(normalize_symbol("US30"), "^DJI")
|
||||||
|
|
||||||
|
def test_forex_pairs_get_x_suffix(self):
|
||||||
|
self.assertEqual(normalize_symbol("EURUSD"), "EURUSD=X")
|
||||||
|
self.assertEqual(normalize_symbol("GBPJPY"), "GBPJPY=X")
|
||||||
|
self.assertEqual(normalize_symbol("eurusd"), "EURUSD=X")
|
||||||
|
|
||||||
|
def test_crypto_pairs_get_dash_usd(self):
|
||||||
|
self.assertEqual(normalize_symbol("BTCUSD"), "BTC-USD")
|
||||||
|
self.assertEqual(normalize_symbol("ETHUSD"), "ETH-USD")
|
||||||
|
|
||||||
|
def test_six_letter_non_currency_left_alone(self):
|
||||||
|
# GOOGLE-style 6-letter tickers that aren't two currency codes
|
||||||
|
# must not be mangled into a fake forex pair.
|
||||||
|
self.assertEqual(normalize_symbol("ABCDEF"), "ABCDEF")
|
||||||
|
|
||||||
|
def test_empty_input_passthrough(self):
|
||||||
|
self.assertEqual(normalize_symbol(""), "")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestNoMarketDataError(unittest.TestCase):
|
||||||
|
def test_message_includes_resolution(self):
|
||||||
|
err = NoMarketDataError("XAUUSD+", "GC=F", "no rows")
|
||||||
|
self.assertIn("XAUUSD+", str(err))
|
||||||
|
self.assertIn("GC=F", str(err))
|
||||||
|
self.assertEqual(err.symbol, "XAUUSD+")
|
||||||
|
self.assertEqual(err.canonical, "GC=F")
|
||||||
|
|
||||||
|
def test_canonical_defaults_to_symbol(self):
|
||||||
|
err = NoMarketDataError("FOOBAR")
|
||||||
|
self.assertEqual(err.canonical, "FOOBAR")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestIsYahooSafe(unittest.TestCase):
|
||||||
|
def test_accepts_structural_chars(self):
|
||||||
|
for sym in ("AAPL", "GC=F", "^GSPC", "BRK.B", "BTC-USD"):
|
||||||
|
self.assertTrue(is_yahoo_safe(sym))
|
||||||
|
|
||||||
|
def test_rejects_slash_and_space(self):
|
||||||
|
for sym in ("a/b", "AA PL", ""):
|
||||||
|
self.assertFalse(is_yahoo_safe(sym))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -7,11 +7,24 @@ from io import StringIO
|
|||||||
|
|
||||||
API_BASE_URL = "https://www.alphavantage.co/query"
|
API_BASE_URL = "https://www.alphavantage.co/query"
|
||||||
|
|
||||||
|
|
||||||
|
class AlphaVantageNotConfiguredError(ValueError):
|
||||||
|
"""Raised when Alpha Vantage is selected but no API key is configured.
|
||||||
|
|
||||||
|
Subclasses ValueError for backward compatibility with callers that
|
||||||
|
already catch ValueError, while letting the routing layer distinguish a
|
||||||
|
"vendor unavailable" condition from a genuine data error.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def get_api_key() -> str:
|
def get_api_key() -> str:
|
||||||
"""Retrieve the API key for Alpha Vantage from environment variables."""
|
"""Retrieve the API key for Alpha Vantage from environment variables."""
|
||||||
api_key = os.getenv("ALPHA_VANTAGE_API_KEY")
|
api_key = os.getenv("ALPHA_VANTAGE_API_KEY")
|
||||||
if not api_key:
|
if not api_key:
|
||||||
raise ValueError("ALPHA_VANTAGE_API_KEY environment variable is not set.")
|
raise AlphaVantageNotConfiguredError(
|
||||||
|
"ALPHA_VANTAGE_API_KEY environment variable is not set."
|
||||||
|
)
|
||||||
return api_key
|
return api_key
|
||||||
|
|
||||||
def format_datetime_for_api(date_input) -> str:
|
def format_datetime_for_api(date_input) -> str:
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from .alpha_vantage_common import _make_api_request
|
from .alpha_vantage_common import _make_api_request, AlphaVantageNotConfiguredError
|
||||||
|
|
||||||
def get_indicator(
|
def get_indicator(
|
||||||
symbol: str,
|
symbol: str,
|
||||||
@@ -217,6 +217,11 @@ def get_indicator(
|
|||||||
|
|
||||||
return result_str
|
return result_str
|
||||||
|
|
||||||
|
except AlphaVantageNotConfiguredError:
|
||||||
|
# Vendor unavailable (no API key). Let it propagate so the router can
|
||||||
|
# fall back / emit the no-data sentinel instead of returning this as a
|
||||||
|
# successful-looking error string.
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error getting Alpha Vantage indicator data for {indicator}: {e}")
|
print(f"Error getting Alpha Vantage indicator data for {indicator}: {e}")
|
||||||
return f"Error retrieving {indicator} data: {str(e)}"
|
return f"Error retrieving {indicator} data: {str(e)}"
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ from .alpha_vantage import (
|
|||||||
get_global_news as get_alpha_vantage_global_news,
|
get_global_news as get_alpha_vantage_global_news,
|
||||||
)
|
)
|
||||||
from .alpha_vantage_common import AlphaVantageRateLimitError
|
from .alpha_vantage_common import AlphaVantageRateLimitError
|
||||||
|
from .symbol_utils import NoMarketDataError
|
||||||
|
|
||||||
# Configuration and routing logic
|
# Configuration and routing logic
|
||||||
from .config import get_config
|
from .config import get_config
|
||||||
@@ -147,6 +148,8 @@ def route_to_vendor(method: str, *args, **kwargs):
|
|||||||
if vendor not in fallback_vendors:
|
if vendor not in fallback_vendors:
|
||||||
fallback_vendors.append(vendor)
|
fallback_vendors.append(vendor)
|
||||||
|
|
||||||
|
last_no_data: NoMarketDataError | None = None
|
||||||
|
first_error: Exception | None = None
|
||||||
for vendor in fallback_vendors:
|
for vendor in fallback_vendors:
|
||||||
if vendor not in VENDOR_METHODS[method]:
|
if vendor not in VENDOR_METHODS[method]:
|
||||||
continue
|
continue
|
||||||
@@ -157,6 +160,37 @@ def route_to_vendor(method: str, *args, **kwargs):
|
|||||||
try:
|
try:
|
||||||
return impl_func(*args, **kwargs)
|
return impl_func(*args, **kwargs)
|
||||||
except AlphaVantageRateLimitError:
|
except AlphaVantageRateLimitError:
|
||||||
continue # Only rate limits trigger fallback
|
continue # Rate limits: try the next vendor
|
||||||
|
except NoMarketDataError as e:
|
||||||
|
last_no_data = e # No data here; another vendor may have it
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
# A fallback vendor failing for an incidental reason (e.g. no API
|
||||||
|
# key configured) must not crash the call when another vendor
|
||||||
|
# already determined the symbol simply has no data. Remember the
|
||||||
|
# first error so a genuine primary-vendor failure still surfaces.
|
||||||
|
if first_error is None:
|
||||||
|
first_error = e
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If any vendor reported "no data", the symbol is genuinely unavailable.
|
||||||
|
# Return one explicit, instructive sentinel rather than a vendor-specific
|
||||||
|
# empty string, so the agent reports "unavailable" instead of inventing a
|
||||||
|
# value. This takes precedence over incidental fallback errors.
|
||||||
|
if last_no_data is not None:
|
||||||
|
sym = last_no_data.symbol
|
||||||
|
canonical = last_no_data.canonical
|
||||||
|
resolved = "" if canonical == sym else f" (resolved to '{canonical}')"
|
||||||
|
return (
|
||||||
|
f"NO_DATA_AVAILABLE: No market data found for '{sym}'{resolved} from "
|
||||||
|
f"any configured vendor. The symbol may be invalid, delisted, or not "
|
||||||
|
f"covered by Yahoo Finance / Alpha Vantage. Do not estimate or "
|
||||||
|
f"fabricate values — report that data is unavailable for this symbol."
|
||||||
|
)
|
||||||
|
|
||||||
|
# No vendor returned data and none reported clean "no data" — surface the
|
||||||
|
# first real error (e.g. the primary vendor's network failure).
|
||||||
|
if first_error is not None:
|
||||||
|
raise first_error
|
||||||
|
|
||||||
raise RuntimeError(f"No available vendor for '{method}'")
|
raise RuntimeError(f"No available vendor for '{method}'")
|
||||||
@@ -9,6 +9,7 @@ from typing import Annotated
|
|||||||
import os
|
import os
|
||||||
from .config import get_config
|
from .config import get_config
|
||||||
from .utils import safe_ticker_component
|
from .utils import safe_ticker_component
|
||||||
|
from .symbol_utils import normalize_symbol, NoMarketDataError
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -68,9 +69,11 @@ def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
|
|||||||
subsequent calls the cache is reused. Rows after curr_date are
|
subsequent calls the cache is reused. Rows after curr_date are
|
||||||
filtered out so backtests never see future prices.
|
filtered out so backtests never see future prices.
|
||||||
"""
|
"""
|
||||||
# Reject ticker values that would escape the cache directory when
|
# Resolve broker/forex symbols (XAUUSD+ -> GC=F) to Yahoo's convention,
|
||||||
|
# then reject values that would escape the cache directory when
|
||||||
# interpolated into the cache filename (e.g. ``../../tmp/x``).
|
# interpolated into the cache filename (e.g. ``../../tmp/x``).
|
||||||
safe_symbol = safe_ticker_component(symbol)
|
canonical = normalize_symbol(symbol)
|
||||||
|
safe_symbol = safe_ticker_component(canonical)
|
||||||
|
|
||||||
config = get_config()
|
config = get_config()
|
||||||
curr_date_dt = pd.to_datetime(curr_date)
|
curr_date_dt = pd.to_datetime(curr_date)
|
||||||
@@ -87,19 +90,32 @@ def load_ohlcv(symbol: str, curr_date: str) -> pd.DataFrame:
|
|||||||
f"{safe_symbol}-YFin-data-{start_str}-{end_str}.csv",
|
f"{safe_symbol}-YFin-data-{start_str}-{end_str}.csv",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# A cached file may be empty if a prior fetch failed (unknown symbol,
|
||||||
|
# transient rate limit). Treat an empty/columnless cache as a miss and
|
||||||
|
# re-fetch rather than serving the poisoned file forever.
|
||||||
|
data = None
|
||||||
if os.path.exists(data_file):
|
if os.path.exists(data_file):
|
||||||
data = pd.read_csv(data_file, on_bad_lines="skip", encoding="utf-8")
|
cached = pd.read_csv(data_file, on_bad_lines="skip", encoding="utf-8")
|
||||||
else:
|
if not cached.empty and "Close" in cached.columns:
|
||||||
data = yf_retry(lambda: yf.download(
|
data = cached
|
||||||
symbol,
|
|
||||||
|
if data is None:
|
||||||
|
downloaded = yf_retry(lambda: yf.download(
|
||||||
|
canonical,
|
||||||
start=start_str,
|
start=start_str,
|
||||||
end=end_str,
|
end=end_str,
|
||||||
multi_level_index=False,
|
multi_level_index=False,
|
||||||
progress=False,
|
progress=False,
|
||||||
auto_adjust=True,
|
auto_adjust=True,
|
||||||
))
|
))
|
||||||
data = _ensure_date_column(data.reset_index())
|
downloaded = _ensure_date_column(downloaded.reset_index())
|
||||||
data.to_csv(data_file, index=False, encoding="utf-8")
|
# Only cache real data — never persist an empty frame.
|
||||||
|
if downloaded.empty or "Close" not in downloaded.columns:
|
||||||
|
raise NoMarketDataError(
|
||||||
|
symbol, canonical, "Yahoo Finance returned no rows"
|
||||||
|
)
|
||||||
|
downloaded.to_csv(data_file, index=False, encoding="utf-8")
|
||||||
|
data = downloaded
|
||||||
|
|
||||||
data = _clean_dataframe(data)
|
data = _clean_dataframe(data)
|
||||||
|
|
||||||
|
|||||||
131
tradingagents/dataflows/symbol_utils.py
Normal file
131
tradingagents/dataflows/symbol_utils.py
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
"""Symbol normalization and market-data error types for vendor calls.
|
||||||
|
|
||||||
|
Yahoo Finance (the default vendor) uses specific ticker conventions that
|
||||||
|
differ from the broker / TradingView / MT5 style symbols users often type:
|
||||||
|
|
||||||
|
user types Yahoo wants why
|
||||||
|
--------------- --------------- -----------------------------------
|
||||||
|
XAUUSD, XAUUSD+ GC=F gold has no forex pair on Yahoo;
|
||||||
|
it is quoted as a COMEX future
|
||||||
|
EURUSD EURUSD=X spot forex pairs take a ``=X`` suffix
|
||||||
|
BTCUSD BTC-USD crypto pairs use a ``-`` separator
|
||||||
|
SPX500, US500 ^GSPC index CFDs map to Yahoo index symbols
|
||||||
|
|
||||||
|
Passing the raw broker symbol to Yahoo returns an empty result, which the
|
||||||
|
agents previously received as free text and could hallucinate a price
|
||||||
|
around (see issue #781). Centralizing the mapping here means every yfinance
|
||||||
|
entry point resolves symbols the same way, and new instruments are added by
|
||||||
|
appending a table row rather than editing call sites.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class NoMarketDataError(Exception):
|
||||||
|
"""Raised when a vendor returns no rows/records for a symbol.
|
||||||
|
|
||||||
|
Carries both the symbol the user requested and the canonical symbol the
|
||||||
|
vendor was actually queried with, so callers can build a clear message
|
||||||
|
instead of emitting a vendor-specific empty string into the data channel.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, symbol: str, canonical: str | None = None, detail: str = ""):
|
||||||
|
self.symbol = symbol
|
||||||
|
self.canonical = canonical or symbol
|
||||||
|
self.detail = detail
|
||||||
|
msg = f"No market data for {symbol!r}"
|
||||||
|
if canonical and canonical != symbol:
|
||||||
|
msg += f" (queried as {canonical!r})"
|
||||||
|
if detail:
|
||||||
|
msg += f": {detail}"
|
||||||
|
super().__init__(msg)
|
||||||
|
|
||||||
|
|
||||||
|
# ISO-4217 codes common enough to appear in retail forex pairs. A bare
|
||||||
|
# six-letter symbol whose halves are BOTH in this set is treated as a spot
|
||||||
|
# forex pair and given Yahoo's ``=X`` suffix.
|
||||||
|
_FOREX_CURRENCIES = frozenset(
|
||||||
|
{
|
||||||
|
"USD", "EUR", "GBP", "JPY", "CHF", "CAD", "AUD", "NZD",
|
||||||
|
"CNY", "CNH", "HKD", "SGD", "SEK", "NOK", "DKK", "PLN",
|
||||||
|
"MXN", "ZAR", "TRY", "INR", "KRW", "BRL", "RUB", "THB",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Crypto bases that brokers quote against USD without a separator.
|
||||||
|
_CRYPTO_BASES = frozenset(
|
||||||
|
{"BTC", "ETH", "SOL", "XRP", "ADA", "DOGE", "LTC", "BCH", "DOT", "AVAX", "LINK"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Explicit aliases for instruments whose broker symbol does not map to a
|
||||||
|
# Yahoo symbol by rule. Metals/energy resolve to their front-month future;
|
||||||
|
# index CFD names resolve to the underlying Yahoo index symbol. Extend by
|
||||||
|
# adding rows — no call site changes required.
|
||||||
|
_ALIASES = {
|
||||||
|
# Precious metals (spot names -> COMEX/NYMEX futures)
|
||||||
|
"XAUUSD": "GC=F", "XAU": "GC=F", "GOLD": "GC=F",
|
||||||
|
"XAGUSD": "SI=F", "XAG": "SI=F", "SILVER": "SI=F",
|
||||||
|
"XPTUSD": "PL=F", "XPDUSD": "PA=F",
|
||||||
|
# Energy
|
||||||
|
"WTICOUSD": "CL=F", "USOIL": "CL=F", "WTI": "CL=F",
|
||||||
|
"BCOUSD": "BZ=F", "UKOIL": "BZ=F", "BRENT": "BZ=F",
|
||||||
|
"NATGAS": "NG=F", "XNGUSD": "NG=F",
|
||||||
|
"COPPER": "HG=F", "XCUUSD": "HG=F",
|
||||||
|
# Index CFDs -> Yahoo index symbols
|
||||||
|
"SPX500": "^GSPC", "US500": "^GSPC", "SPX": "^GSPC",
|
||||||
|
"NAS100": "^NDX", "US100": "^NDX", "USTEC": "^NDX",
|
||||||
|
"US30": "^DJI", "DJI30": "^DJI", "WS30": "^DJI",
|
||||||
|
"GER40": "^GDAXI", "GER30": "^GDAXI", "DE40": "^GDAXI",
|
||||||
|
"UK100": "^FTSE", "JP225": "^N225", "JPN225": "^N225",
|
||||||
|
"FRA40": "^FCHI", "EU50": "^STOXX50E", "HK50": "^HSI",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Yahoo symbols may contain letters, digits, and these structural characters.
|
||||||
|
_YAHOO_SAFE = re.compile(r"^[A-Za-z0-9._\-\^=]+$")
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_symbol(raw: str) -> str:
|
||||||
|
"""Map a user/broker symbol to its canonical Yahoo Finance symbol.
|
||||||
|
|
||||||
|
Resolution order (first match wins):
|
||||||
|
1. Explicit alias table (metals, energy, index CFDs).
|
||||||
|
2. Crypto rule: ``<BASE>USD`` where BASE is a known crypto -> ``BASE-USD``.
|
||||||
|
3. Forex rule: six letters that are two ISO currency codes -> ``PAIR=X``.
|
||||||
|
4. Otherwise the upper-cased symbol is returned unchanged (plain
|
||||||
|
equities, ETFs, Yahoo-native symbols like ``GC=F`` or ``^GSPC``).
|
||||||
|
|
||||||
|
A trailing ``+`` (broker CFD marker, e.g. ``XAUUSD+``) is stripped before
|
||||||
|
matching. The function is purely syntactic — it performs no network
|
||||||
|
calls — so it is safe to apply on every request.
|
||||||
|
"""
|
||||||
|
if not isinstance(raw, str) or not raw.strip():
|
||||||
|
return raw
|
||||||
|
|
||||||
|
s = raw.strip().upper()
|
||||||
|
# Broker CFD/qualifier suffixes Yahoo never uses.
|
||||||
|
s = s.rstrip("+")
|
||||||
|
|
||||||
|
if s in _ALIASES:
|
||||||
|
canonical = _ALIASES[s]
|
||||||
|
elif len(s) == 6 and s[:3] in _CRYPTO_BASES and s[3:] == "USD":
|
||||||
|
canonical = f"{s[:3]}-USD"
|
||||||
|
elif s[:-3] in _CRYPTO_BASES and s.endswith("USD") and "-" not in s:
|
||||||
|
canonical = f"{s[:-3]}-USD"
|
||||||
|
elif len(s) == 6 and s[:3] in _FOREX_CURRENCIES and s[3:] in _FOREX_CURRENCIES:
|
||||||
|
canonical = f"{s}=X"
|
||||||
|
else:
|
||||||
|
canonical = s
|
||||||
|
|
||||||
|
if canonical != raw.strip().upper():
|
||||||
|
logger.info("Resolved symbol %r to Yahoo symbol %r", raw, canonical)
|
||||||
|
return canonical
|
||||||
|
|
||||||
|
|
||||||
|
def is_yahoo_safe(symbol: str) -> bool:
|
||||||
|
"""True when ``symbol`` only contains characters Yahoo symbols use."""
|
||||||
|
return bool(symbol) and _YAHOO_SAFE.fullmatch(symbol) is not None
|
||||||
@@ -7,10 +7,12 @@ from typing import Annotated
|
|||||||
|
|
||||||
SavePathType = Annotated[str, "File path to save data. If None, data is not saved."]
|
SavePathType = Annotated[str, "File path to save data. If None, data is not saved."]
|
||||||
|
|
||||||
# Tickers can contain letters, digits, dot, dash, underscore, and caret
|
# Tickers can contain letters, digits, dot, dash, underscore, caret
|
||||||
# (for index symbols like ^GSPC). Anything else is rejected so the value
|
# (index symbols like ^GSPC), equals (futures like GC=F), and plus
|
||||||
# never escapes a containing directory when interpolated into a path.
|
# (forex/CFD symbols like XAUUSD+). None of these enable directory
|
||||||
_TICKER_PATH_RE = re.compile(r"^[A-Za-z0-9._\-\^]+$")
|
# traversal, so the value never escapes a containing directory when
|
||||||
|
# interpolated into a path. Anything else is rejected.
|
||||||
|
_TICKER_PATH_RE = re.compile(r"^[A-Za-z0-9._\-\^=+]+$")
|
||||||
|
|
||||||
|
|
||||||
def safe_ticker_component(value: str, *, max_len: int = 32) -> str:
|
def safe_ticker_component(value: str, *, max_len: int = 32) -> str:
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import pandas as pd
|
|||||||
import yfinance as yf
|
import yfinance as yf
|
||||||
import os
|
import os
|
||||||
from .stockstats_utils import StockstatsUtils, _clean_dataframe, yf_retry, load_ohlcv, filter_financials_by_date
|
from .stockstats_utils import StockstatsUtils, _clean_dataframe, yf_retry, load_ohlcv, filter_financials_by_date
|
||||||
|
from .symbol_utils import normalize_symbol, NoMarketDataError
|
||||||
|
|
||||||
def get_YFin_data_online(
|
def get_YFin_data_online(
|
||||||
symbol: Annotated[str, "ticker symbol of the company"],
|
symbol: Annotated[str, "ticker symbol of the company"],
|
||||||
@@ -15,16 +16,19 @@ def get_YFin_data_online(
|
|||||||
datetime.strptime(start_date, "%Y-%m-%d")
|
datetime.strptime(start_date, "%Y-%m-%d")
|
||||||
datetime.strptime(end_date, "%Y-%m-%d")
|
datetime.strptime(end_date, "%Y-%m-%d")
|
||||||
|
|
||||||
# Create ticker object
|
# Resolve broker/forex symbols to Yahoo's convention (XAUUSD+ -> GC=F).
|
||||||
ticker = yf.Ticker(symbol.upper())
|
canonical = normalize_symbol(symbol)
|
||||||
|
ticker = yf.Ticker(canonical)
|
||||||
|
|
||||||
# Fetch historical data for the specified date range
|
# Fetch historical data for the specified date range
|
||||||
data = yf_retry(lambda: ticker.history(start=start_date, end=end_date))
|
data = yf_retry(lambda: ticker.history(start=start_date, end=end_date))
|
||||||
|
|
||||||
# Check if data is empty
|
# Empty result means the symbol is unknown/delisted. Raise a typed error
|
||||||
|
# instead of returning prose: the routing layer turns it into a single
|
||||||
|
# unambiguous "no data" signal so the agent never fabricates a price.
|
||||||
if data.empty:
|
if data.empty:
|
||||||
return (
|
raise NoMarketDataError(
|
||||||
f"No data found for symbol '{symbol}' between {start_date} and {end_date}"
|
symbol, canonical, f"no rows between {start_date} and {end_date}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Remove timezone info from index for cleaner output
|
# Remove timezone info from index for cleaner output
|
||||||
@@ -40,8 +44,10 @@ def get_YFin_data_online(
|
|||||||
# Convert DataFrame to CSV string
|
# Convert DataFrame to CSV string
|
||||||
csv_string = data.to_csv()
|
csv_string = data.to_csv()
|
||||||
|
|
||||||
# Add header information
|
# Add header information; note the resolved symbol when it differs so the
|
||||||
header = f"# Stock data for {symbol.upper()} from {start_date} to {end_date}\n"
|
# agent (and user) can see which instrument was actually priced.
|
||||||
|
label = canonical if canonical == symbol.upper() else f"{canonical} (from {symbol})"
|
||||||
|
header = f"# Stock data for {label} from {start_date} to {end_date}\n"
|
||||||
header += f"# Total records: {len(data)}\n"
|
header += f"# Total records: {len(data)}\n"
|
||||||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||||
|
|
||||||
@@ -162,7 +168,9 @@ def get_stock_stats_indicators_window(
|
|||||||
ind_string = ""
|
ind_string = ""
|
||||||
for date_str, value in date_values:
|
for date_str, value in date_values:
|
||||||
ind_string += f"{date_str}: {value}\n"
|
ind_string += f"{date_str}: {value}\n"
|
||||||
|
|
||||||
|
except NoMarketDataError:
|
||||||
|
raise # Unknown/delisted symbol — let the router emit the sentinel
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error getting bulk stockstats data: {e}")
|
print(f"Error getting bulk stockstats data: {e}")
|
||||||
# Fallback to original implementation if bulk method fails
|
# Fallback to original implementation if bulk method fails
|
||||||
@@ -236,6 +244,8 @@ def get_stockstats_indicator(
|
|||||||
indicator,
|
indicator,
|
||||||
curr_date,
|
curr_date,
|
||||||
)
|
)
|
||||||
|
except NoMarketDataError:
|
||||||
|
raise # Unknown/delisted symbol — let the router emit the sentinel
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(
|
print(
|
||||||
f"Error getting stockstats indicator data for indicator {indicator} on {curr_date}: {e}"
|
f"Error getting stockstats indicator data for indicator {indicator} on {curr_date}: {e}"
|
||||||
@@ -250,12 +260,13 @@ def get_fundamentals(
|
|||||||
curr_date: Annotated[str, "current date (not used for yfinance)"] = None
|
curr_date: Annotated[str, "current date (not used for yfinance)"] = None
|
||||||
):
|
):
|
||||||
"""Get company fundamentals overview from yfinance."""
|
"""Get company fundamentals overview from yfinance."""
|
||||||
|
canonical = normalize_symbol(ticker)
|
||||||
try:
|
try:
|
||||||
ticker_obj = yf.Ticker(ticker.upper())
|
ticker_obj = yf.Ticker(canonical)
|
||||||
info = yf_retry(lambda: ticker_obj.info)
|
info = yf_retry(lambda: ticker_obj.info)
|
||||||
|
|
||||||
if not info:
|
if not info:
|
||||||
return f"No fundamentals data found for symbol '{ticker}'"
|
raise NoMarketDataError(ticker, canonical, "no fundamentals returned")
|
||||||
|
|
||||||
fields = [
|
fields = [
|
||||||
("Name", info.get("longName")),
|
("Name", info.get("longName")),
|
||||||
@@ -293,11 +304,20 @@ def get_fundamentals(
|
|||||||
if value is not None:
|
if value is not None:
|
||||||
lines.append(f"{label}: {value}")
|
lines.append(f"{label}: {value}")
|
||||||
|
|
||||||
header = f"# Company Fundamentals for {ticker.upper()}\n"
|
# yfinance returns a stub dict (e.g. {"trailingPegRatio": None}) for
|
||||||
|
# unknown symbols, so `info` is truthy but every field is empty. Treat
|
||||||
|
# "no usable fields" as no data rather than emitting a bare header the
|
||||||
|
# agent might fabricate around.
|
||||||
|
if not lines:
|
||||||
|
raise NoMarketDataError(ticker, canonical, "no fundamental fields returned")
|
||||||
|
|
||||||
|
header = f"# Company Fundamentals for {canonical}\n"
|
||||||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||||
|
|
||||||
return header + "\n".join(lines)
|
return header + "\n".join(lines)
|
||||||
|
|
||||||
|
except NoMarketDataError:
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error retrieving fundamentals for {ticker}: {str(e)}"
|
return f"Error retrieving fundamentals for {ticker}: {str(e)}"
|
||||||
|
|
||||||
@@ -308,8 +328,9 @@ def get_balance_sheet(
|
|||||||
curr_date: Annotated[str, "current date in YYYY-MM-DD format"] = None
|
curr_date: Annotated[str, "current date in YYYY-MM-DD format"] = None
|
||||||
):
|
):
|
||||||
"""Get balance sheet data from yfinance."""
|
"""Get balance sheet data from yfinance."""
|
||||||
|
canonical = normalize_symbol(ticker)
|
||||||
try:
|
try:
|
||||||
ticker_obj = yf.Ticker(ticker.upper())
|
ticker_obj = yf.Ticker(canonical)
|
||||||
|
|
||||||
if freq.lower() == "quarterly":
|
if freq.lower() == "quarterly":
|
||||||
data = yf_retry(lambda: ticker_obj.quarterly_balance_sheet)
|
data = yf_retry(lambda: ticker_obj.quarterly_balance_sheet)
|
||||||
@@ -319,17 +340,19 @@ def get_balance_sheet(
|
|||||||
data = filter_financials_by_date(data, curr_date)
|
data = filter_financials_by_date(data, curr_date)
|
||||||
|
|
||||||
if data.empty:
|
if data.empty:
|
||||||
return f"No balance sheet data found for symbol '{ticker}'"
|
raise NoMarketDataError(ticker, canonical, "no balance sheet data")
|
||||||
|
|
||||||
# Convert to CSV string for consistency with other functions
|
# Convert to CSV string for consistency with other functions
|
||||||
csv_string = data.to_csv()
|
csv_string = data.to_csv()
|
||||||
|
|
||||||
# Add header information
|
# Add header information
|
||||||
header = f"# Balance Sheet data for {ticker.upper()} ({freq})\n"
|
header = f"# Balance Sheet data for {canonical} ({freq})\n"
|
||||||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||||
|
|
||||||
return header + csv_string
|
return header + csv_string
|
||||||
|
|
||||||
|
except NoMarketDataError:
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error retrieving balance sheet for {ticker}: {str(e)}"
|
return f"Error retrieving balance sheet for {ticker}: {str(e)}"
|
||||||
|
|
||||||
@@ -340,8 +363,9 @@ def get_cashflow(
|
|||||||
curr_date: Annotated[str, "current date in YYYY-MM-DD format"] = None
|
curr_date: Annotated[str, "current date in YYYY-MM-DD format"] = None
|
||||||
):
|
):
|
||||||
"""Get cash flow data from yfinance."""
|
"""Get cash flow data from yfinance."""
|
||||||
|
canonical = normalize_symbol(ticker)
|
||||||
try:
|
try:
|
||||||
ticker_obj = yf.Ticker(ticker.upper())
|
ticker_obj = yf.Ticker(canonical)
|
||||||
|
|
||||||
if freq.lower() == "quarterly":
|
if freq.lower() == "quarterly":
|
||||||
data = yf_retry(lambda: ticker_obj.quarterly_cashflow)
|
data = yf_retry(lambda: ticker_obj.quarterly_cashflow)
|
||||||
@@ -351,17 +375,19 @@ def get_cashflow(
|
|||||||
data = filter_financials_by_date(data, curr_date)
|
data = filter_financials_by_date(data, curr_date)
|
||||||
|
|
||||||
if data.empty:
|
if data.empty:
|
||||||
return f"No cash flow data found for symbol '{ticker}'"
|
raise NoMarketDataError(ticker, canonical, "no cash flow data")
|
||||||
|
|
||||||
# Convert to CSV string for consistency with other functions
|
# Convert to CSV string for consistency with other functions
|
||||||
csv_string = data.to_csv()
|
csv_string = data.to_csv()
|
||||||
|
|
||||||
# Add header information
|
# Add header information
|
||||||
header = f"# Cash Flow data for {ticker.upper()} ({freq})\n"
|
header = f"# Cash Flow data for {canonical} ({freq})\n"
|
||||||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||||
|
|
||||||
return header + csv_string
|
return header + csv_string
|
||||||
|
|
||||||
|
except NoMarketDataError:
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error retrieving cash flow for {ticker}: {str(e)}"
|
return f"Error retrieving cash flow for {ticker}: {str(e)}"
|
||||||
|
|
||||||
@@ -372,8 +398,9 @@ def get_income_statement(
|
|||||||
curr_date: Annotated[str, "current date in YYYY-MM-DD format"] = None
|
curr_date: Annotated[str, "current date in YYYY-MM-DD format"] = None
|
||||||
):
|
):
|
||||||
"""Get income statement data from yfinance."""
|
"""Get income statement data from yfinance."""
|
||||||
|
canonical = normalize_symbol(ticker)
|
||||||
try:
|
try:
|
||||||
ticker_obj = yf.Ticker(ticker.upper())
|
ticker_obj = yf.Ticker(canonical)
|
||||||
|
|
||||||
if freq.lower() == "quarterly":
|
if freq.lower() == "quarterly":
|
||||||
data = yf_retry(lambda: ticker_obj.quarterly_income_stmt)
|
data = yf_retry(lambda: ticker_obj.quarterly_income_stmt)
|
||||||
@@ -383,17 +410,19 @@ def get_income_statement(
|
|||||||
data = filter_financials_by_date(data, curr_date)
|
data = filter_financials_by_date(data, curr_date)
|
||||||
|
|
||||||
if data.empty:
|
if data.empty:
|
||||||
return f"No income statement data found for symbol '{ticker}'"
|
raise NoMarketDataError(ticker, canonical, "no income statement data")
|
||||||
|
|
||||||
# Convert to CSV string for consistency with other functions
|
# Convert to CSV string for consistency with other functions
|
||||||
csv_string = data.to_csv()
|
csv_string = data.to_csv()
|
||||||
|
|
||||||
# Add header information
|
# Add header information
|
||||||
header = f"# Income Statement data for {ticker.upper()} ({freq})\n"
|
header = f"# Income Statement data for {canonical} ({freq})\n"
|
||||||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||||
|
|
||||||
return header + csv_string
|
return header + csv_string
|
||||||
|
|
||||||
|
except NoMarketDataError:
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error retrieving income statement for {ticker}: {str(e)}"
|
return f"Error retrieving income statement for {ticker}: {str(e)}"
|
||||||
|
|
||||||
@@ -402,18 +431,21 @@ def get_insider_transactions(
|
|||||||
ticker: Annotated[str, "ticker symbol of the company"]
|
ticker: Annotated[str, "ticker symbol of the company"]
|
||||||
):
|
):
|
||||||
"""Get insider transactions data from yfinance."""
|
"""Get insider transactions data from yfinance."""
|
||||||
|
canonical = normalize_symbol(ticker)
|
||||||
try:
|
try:
|
||||||
ticker_obj = yf.Ticker(ticker.upper())
|
ticker_obj = yf.Ticker(canonical)
|
||||||
data = yf_retry(lambda: ticker_obj.insider_transactions)
|
data = yf_retry(lambda: ticker_obj.insider_transactions)
|
||||||
|
|
||||||
|
# Empty is normal here (many valid symbols have no insider filings),
|
||||||
|
# so report it plainly rather than treating the symbol as invalid.
|
||||||
if data is None or data.empty:
|
if data is None or data.empty:
|
||||||
return f"No insider transactions data found for symbol '{ticker}'"
|
return f"No insider transactions reported for symbol '{canonical}'"
|
||||||
|
|
||||||
# Convert to CSV string for consistency with other functions
|
# Convert to CSV string for consistency with other functions
|
||||||
csv_string = data.to_csv()
|
csv_string = data.to_csv()
|
||||||
|
|
||||||
# Add header information
|
# Add header information
|
||||||
header = f"# Insider Transactions data for {ticker.upper()}\n"
|
header = f"# Insider Transactions data for {canonical}\n"
|
||||||
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||||
|
|
||||||
return header + csv_string
|
return header + csv_string
|
||||||
|
|||||||
Reference in New Issue
Block a user