fix(dataflows): degrade gracefully when an optional vendor fails

Optional enrichment vendors (FRED macro, Polymarket events) raised on a bad LLM
indicator, a missing key, or a network blip, which aborted the whole run.

- Router: mark macro_data and prediction_markets optional; a sole-vendor failure
  returns a sentinel instead of re-raising. Core categories still raise.
- FRED: reject a descriptive phrase up front and return guidance instead of
  400ing the API; an unknown series returns a not-found message, not a crash.
This commit is contained in:
Yijia-Xiao
2026-06-21 21:28:59 +00:00
parent 7bb16c5daa
commit ee1ece3347
4 changed files with 90 additions and 16 deletions

View File

@@ -61,6 +61,19 @@ class FredResolutionTests(unittest.TestCase):
self.assertEqual(fred._resolve_series_id("dgs30"), "DGS30") self.assertEqual(fred._resolve_series_id("dgs30"), "DGS30")
self.assertEqual(fred._resolve_series_id("MyCustomSeries"), "MYCUSTOMSERIES") self.assertEqual(fred._resolve_series_id("MyCustomSeries"), "MYCUSTOMSERIES")
def test_descriptive_phrase_is_rejected(self):
# An LLM phrase (spaces / too long) is not a series ID — reject up front
# with guidance rather than 400ing the API.
for bad in ("bank of japan rate", "the unemployment number", "X" * 31):
with self.assertRaises(ValueError):
fred._resolve_series_id(bad)
def test_get_macro_data_returns_guidance_on_bad_indicator(self):
# Invalid indicator -> actionable message, not a crash (no API call).
out = fred.get_macro_data("bank of japan rate", "2026-01-01")
self.assertIn("FRED", out)
self.assertIn("not a known macro alias", out)
@pytest.mark.unit @pytest.mark.unit
class FredConfigTests(unittest.TestCase): class FredConfigTests(unittest.TestCase):
@@ -99,11 +112,13 @@ class FredFormattingTests(unittest.TestCase):
out = fred.get_macro_data("unemployment", "2025-09-30", 30) out = fred.get_macro_data("unemployment", "2025-09-30", 30)
self.assertIn("No observations", out) self.assertIn("No observations", out)
def test_unknown_series_raises(self): def test_unknown_series_returns_not_found_message(self):
# A well-formed but unknown series ID returns guidance, not a crash, so
# the run is not aborted over an optional macro lookup.
no_series = {"seriess": []} no_series = {"seriess": []}
with mock.patch.object(fred, "_request", side_effect=_request_stub(meta=no_series)), \ with mock.patch.object(fred, "_request", side_effect=_request_stub(meta=no_series)):
self.assertRaises(ValueError): out = fred.get_macro_data("totally_unknown_xyz", "2025-09-30", 30)
fred.get_macro_data("totally_unknown_xyz", "2025-09-30", 30) self.assertIn("not found", out)
def test_long_series_is_truncated_but_change_uses_full_range(self): def test_long_series_is_truncated_but_change_uses_full_range(self):
# Build > MAX_ROWS observations deterministically. # Build > MAX_ROWS observations deterministically.
@@ -157,9 +172,10 @@ class FredRoutingTests(unittest.TestCase):
out = interface.route_to_vendor("get_macro_indicators", "cpi", "2026-06-01", 365) out = interface.route_to_vendor("get_macro_indicators", "cpi", "2026-06-01", 365)
self.assertEqual(out, "MACRO_OK") self.assertEqual(out, "MACRO_OK")
def test_not_configured_surfaces_through_router(self): def test_not_configured_degrades_gracefully(self):
# With only fred and no key, the router has no fallback and must surface # macro_data is optional: with only fred and no key, the router degrades
# the real "not configured" failure rather than masking it. # to a sentinel instead of aborting the run — a missing optional key must
# not crash an analysis.
set_config({"data_vendors": {"macro_data": "fred"}}) set_config({"data_vendors": {"macro_data": "fred"}})
def _unconfigured(*a, **k): def _unconfigured(*a, **k):
@@ -169,8 +185,9 @@ class FredRoutingTests(unittest.TestCase):
interface.VENDOR_METHODS, interface.VENDOR_METHODS,
{"get_macro_indicators": {"fred": _unconfigured}}, {"get_macro_indicators": {"fred": _unconfigured}},
clear=False, clear=False,
), self.assertRaises(fred.FredNotConfiguredError): ):
interface.route_to_vendor("get_macro_indicators", "cpi", "2026-06-01", 365) out = interface.route_to_vendor("get_macro_indicators", "cpi", "2026-06-01", 365)
self.assertIn("DATA_UNAVAILABLE", out)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -96,6 +96,28 @@ class VendorRoutingTests(unittest.TestCase):
result = interface.route_to_vendor("get_stock_data", "AAPL", "2026-01-01", "2026-01-10") result = interface.route_to_vendor("get_stock_data", "AAPL", "2026-01-01", "2026-01-10")
self.assertEqual(result, "AV_DATA") self.assertEqual(result, "AV_DATA")
def _route_method(self, method, vendors):
return mock.patch.dict(interface.VENDOR_METHODS, {method: vendors}, clear=False)
def test_optional_category_degrades_instead_of_raising(self):
# An optional enrichment vendor (FRED macro) that raises must NOT abort
# the run — the router returns a sentinel so the analysis proceeds.
set_config({"data_vendors": {"macro_data": "fred"}})
with self._route_method(
"get_macro_indicators", {"fred": _raises(ValueError("FRED 400: bad series"))}
):
result = interface.route_to_vendor("get_macro_indicators", "cpi", "2026-01-01")
self.assertIn("DATA_UNAVAILABLE", result)
self.assertIn("macro_data", result)
def test_core_category_still_raises_on_error(self):
# A core category (single configured vendor) propagates the error so a
# broken primary is loud, not silently degraded.
set_config({"data_vendors": {"core_stock_apis": "yfinance"}})
with self._route({"yfinance": _raises(ValueError("boom"))}), \
self.assertRaises(ValueError):
interface.route_to_vendor("get_stock_data", "AAPL", "2026-01-01", "2026-01-10")
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@@ -93,13 +93,26 @@ def get_api_key() -> str:
def _resolve_series_id(indicator: str) -> str: def _resolve_series_id(indicator: str) -> str:
"""Map a friendly alias to a FRED series ID, or pass a raw ID through.""" """Map a friendly alias to a FRED series ID, or pass a raw ID through.
Raises ``ValueError`` when the input is neither a known alias nor a plausible
series ID — typically a descriptive phrase the LLM passed instead (e.g.
"bank of japan rate"). FRED IDs are short and alphanumeric, so this rejects
it up front with guidance rather than letting it 400 the API.
"""
key = indicator.strip().lower().replace(" ", "_").replace("-", "_") key = indicator.strip().lower().replace(" ", "_").replace("-", "_")
if key in MACRO_SERIES: if key in MACRO_SERIES:
return MACRO_SERIES[key] return MACRO_SERIES[key]
# Not a known alias: treat the input as a raw FRED series ID (FRED IDs are candidate = indicator.strip().upper()
# conventionally uppercase, e.g. "DGS10", "CPIAUCSL"). # FRED series IDs never contain whitespace and are short; reject anything
return indicator.strip().upper() # else (a descriptive phrase the LLM passed) rather than 400ing the API.
if not candidate or len(candidate) > 30 or any(c.isspace() for c in candidate):
raise ValueError(
f"'{indicator}' is not a known macro alias or a valid FRED series ID. "
f"Use an alias (e.g. 'cpi', 'unemployment', '10y_treasury') or a raw "
f"FRED series ID (e.g. 'CPIAUCSL')."
)
return candidate
def _request(path: str, params: dict) -> dict: def _request(path: str, params: dict) -> dict:
@@ -143,11 +156,18 @@ def get_macro_data(
end_dt = datetime.strptime(curr_date, "%Y-%m-%d") end_dt = datetime.strptime(curr_date, "%Y-%m-%d")
start_date = (end_dt - timedelta(days=look_back_days)).strftime("%Y-%m-%d") start_date = (end_dt - timedelta(days=look_back_days)).strftime("%Y-%m-%d")
series_id = _resolve_series_id(indicator)
# Invalid LLM-supplied indicator: return guidance rather than raising, so a
# bad argument doesn't abort the run (the routing layer also degrades macro
# data, but a specific message is more useful to the analyst).
try:
series_id = _resolve_series_id(indicator)
except ValueError as e:
return f"FRED: {e}"
meta = _request("series", {"series_id": series_id}).get("seriess") or [] meta = _request("series", {"series_id": series_id}).get("seriess") or []
if not meta: if not meta:
raise ValueError( return (
f"FRED series '{series_id}' not found. Pass a known alias " f"FRED series '{series_id}' not found. Pass a known alias "
f"(e.g. 'cpi', 'unemployment') or a valid FRED series ID." f"(e.g. 'cpi', 'unemployment') or a valid FRED series ID."
) )

View File

@@ -84,6 +84,13 @@ VENDOR_LIST = [
"alpha_vantage", "alpha_vantage",
] ]
# Optional enrichment categories. These add macro/event context to the news
# analyst but are not core to a decision, so a vendor failure here degrades to a
# sentinel instead of aborting the run (a bad LLM-supplied indicator, a missing
# key, or a network blip should not crash an analysis over flavour data). Core
# categories (prices, fundamentals, news) still raise so a broken primary is loud.
OPTIONAL_CATEGORIES = {"macro_data", "prediction_markets"}
# Mapping of methods to their vendor-specific implementations # Mapping of methods to their vendor-specific implementations
VENDOR_METHODS = { VENDOR_METHODS = {
# core_stock_apis # core_stock_apis
@@ -240,8 +247,16 @@ def route_to_vendor(method: str, *args, **kwargs):
) )
# No vendor returned data and none reported clean "no data" — surface the # No vendor returned data and none reported clean "no data" — surface the
# first real error (e.g. the primary vendor's network failure). # first real error (e.g. the primary vendor's network failure). Optional
# enrichment categories degrade to a sentinel instead, so flavour data can't
# abort the run.
if first_error is not None: if first_error is not None:
if category in OPTIONAL_CATEGORIES:
logger.warning("Optional %s unavailable for %s: %s", category, method, first_error)
return (
f"DATA_UNAVAILABLE: optional {category} could not be retrieved "
f"({first_error}). Proceed without it; do not fabricate values."
)
raise first_error raise first_error
raise RuntimeError(f"No available vendor for '{method}'") raise RuntimeError(f"No available vendor for '{method}'")