fix(cli): unify ticker handling with the data-path symbol normalizer

The CLI validated, normalized, and classified tickers with its own logic that
diverged from the data layer: it rejected '=' symbols like GC=F (#980),
classified BTCUSD as a stock (#981), and accepted unpriceable BTC-USDT (#982).
Route the CLI through normalize_symbol (now mapping USDT/USDC crypto quotes to
Yahoo's -USD pair), so validation, classification, and pricing agree.
This commit is contained in:
Yijia-Xiao
2026-06-13 20:50:21 +00:00
parent 7c8fe2fe9f
commit 76add9048f
3 changed files with 120 additions and 12 deletions

View File

@@ -24,6 +24,17 @@ ANALYST_ORDER = [
CRYPTO_SUFFIXES = ("-USD", "-USDT", "-USDC", "-BTC", "-ETH")
def is_valid_ticker_input(value: str) -> bool:
"""Whether a ticker entry is acceptable (charset + length).
Allows the characters Yahoo symbols use, including ``=`` for futures/forex
like ``GC=F`` and ``EURUSD=X`` (#980), and ``^`` for indices. Empty input is
allowed (it defaults to SPY downstream).
"""
v = value.strip()
return not v or (all(ch.isalnum() or ch in "._-^=" for ch in v) and len(v) <= 32)
def get_ticker() -> str:
"""Prompt the user to enter a ticker symbol, preserving exchange suffixes.
@@ -34,9 +45,8 @@ def get_ticker() -> str:
ticker = questionary.text(
f"Enter ticker symbol (e.g. {TICKER_INPUT_EXAMPLES}):",
validate=lambda x: (
not x.strip()
or (all(ch.isalnum() or ch in "._-^" for ch in x.strip()) and len(x.strip()) <= 32)
or "Please enter a valid ticker symbol, e.g. AAPL, 000404.SZ, 0700.HK."
is_valid_ticker_input(x)
or "Please enter a valid ticker symbol, e.g. AAPL, 000404.SZ, 0700.HK, GC=F."
),
style=questionary.Style(
[
@@ -54,13 +64,26 @@ def get_ticker() -> str:
def normalize_ticker_symbol(ticker: str) -> str:
"""Normalize ticker input while preserving exchange suffixes."""
return ticker.strip().upper()
"""Resolve user input to its canonical Yahoo symbol (single source of truth).
Delegates to the data layer's ``normalize_symbol`` so the symbol the CLI
passes through the pipeline is exactly the one the data path will price
(e.g. ``BTCUSD`` -> ``BTC-USD``, ``XAUUSD`` -> ``GC=F``). Falls back to the
plain upper-case if the data layer is unavailable.
"""
try:
from tradingagents.dataflows.symbol_utils import normalize_symbol
return normalize_symbol(ticker)
except Exception:
return ticker.strip().upper()
def detect_asset_type(ticker: str) -> AssetType:
normalized_ticker = ticker.strip().upper()
if normalized_ticker.endswith(CRYPTO_SUFFIXES):
"""Classify on the canonical symbol so e.g. BTCUSD and BTC-USDT both read as
crypto (#981/#982), matching what the data path will actually fetch."""
canonical = normalize_ticker_symbol(ticker)
if canonical.endswith(CRYPTO_SUFFIXES):
return AssetType.CRYPTO
return AssetType.STOCK

View File

@@ -0,0 +1,62 @@
"""CLI symbol validation/classification must agree with the data path.
Regressions for #980 (validation rejected GC=F), #981 (BTCUSD misclassified as
stock), #982 (BTC-USDT accepted but unpriceable on Yahoo).
"""
import pytest
from cli.models import AssetType
from cli.utils import detect_asset_type, is_valid_ticker_input, normalize_ticker_symbol
from tradingagents.dataflows.symbol_utils import normalize_symbol
# --- #982: stablecoin-quoted crypto normalizes to Yahoo's -USD pair ---
@pytest.mark.parametrize("raw,expected", [
("BTCUSD", "BTC-USD"),
("BTCUSDT", "BTC-USD"),
("BTC-USDT", "BTC-USD"),
("BTC-USDC", "BTC-USD"),
("ethusdt", "ETH-USD"),
# non-crypto must be untouched
("AAPL", "AAPL"),
("GC=F", "GC=F"),
("600519.SS", "600519.SS"),
("EURUSD", "EURUSD=X"),
])
def test_normalize_symbol_crypto_and_passthrough(raw, expected):
assert normalize_symbol(raw) == expected
# --- #980: validation accepts Yahoo futures/forex symbols ---
@pytest.mark.parametrize("value,ok", [
("GC=F", True),
("EURUSD=X", True),
("AAPL", True),
("0700.HK", True),
("^GSPC", True),
("", True), # empty -> defaults to SPY downstream
("bad symbol!", False), # space + '!' rejected
("A" * 40, False), # too long
])
def test_ticker_input_validation(value, ok):
assert is_valid_ticker_input(value) is ok
# --- #981/#982: asset-type classified on the canonical symbol ---
@pytest.mark.parametrize("raw,expected", [
("BTCUSD", AssetType.CRYPTO),
("BTC-USDT", AssetType.CRYPTO),
("BTC-USD", AssetType.CRYPTO),
("ETHUSD", AssetType.CRYPTO),
("AAPL", AssetType.STOCK),
("GC=F", AssetType.STOCK),
("600519.SS", AssetType.STOCK),
])
def test_detect_asset_type(raw, expected):
assert detect_asset_type(raw) == expected
def test_cli_normalize_delegates_to_data_layer():
# CLI must produce the same canonical symbol the data path will price.
for raw in ("XAUUSD", "BTCUSD", "btc-usdt", "AAPL"):
assert normalize_ticker_symbol(raw) == normalize_symbol(raw)

View File

@@ -89,12 +89,36 @@ _ALIASES = {
_YAHOO_SAFE = re.compile(r"^[A-Za-z0-9._\-\^=]+$")
# Crypto quote currencies that all map to Yahoo's USD pair. Yahoo lists only
# ``<BASE>-USD`` (not the USDT/USDC stablecoin pairs), so a broker symbol quoted
# in any of these resolves to ``-USD`` (#982). Longest first so ``USDT``/``USDC``
# match before the ``USD`` substring.
_CRYPTO_QUOTES = ("USDT", "USDC", "USD")
def _normalize_crypto(s: str) -> str | None:
"""Return ``<BASE>-USD`` if ``s`` is a known crypto quoted in USD/USDT/USDC.
Accepts dashed or undashed forms: ``BTCUSD``, ``BTCUSDT``, ``BTC-USDT``,
``BTC-USDC`` all resolve to ``BTC-USD``. Returns None otherwise.
"""
compact = s.replace("-", "")
for quote in _CRYPTO_QUOTES:
if compact.endswith(quote):
base = compact[: -len(quote)]
if base in _CRYPTO_BASES:
return f"{base}-USD"
break
return None
def normalize_symbol(raw: str) -> str:
"""Map a user/broker symbol to its canonical Yahoo Finance symbol.
Resolution order (first match wins):
1. Explicit alias table (metals, energy, index CFDs).
2. Crypto rule: ``<BASE>USD`` where BASE is a known crypto -> ``BASE-USD``.
2. Crypto rule: a known crypto base quoted in USD/USDT/USDC (dashed or
not) -> ``BASE-USD``.
3. Forex rule: six letters that are two ISO currency codes -> ``PAIR=X``.
4. Otherwise the upper-cased symbol is returned unchanged (plain
equities, ETFs, Yahoo-native symbols like ``GC=F`` or ``^GSPC``).
@@ -110,12 +134,11 @@ def normalize_symbol(raw: str) -> str:
# Broker CFD/qualifier suffixes Yahoo never uses.
s = s.rstrip("+")
crypto = _normalize_crypto(s)
if s in _ALIASES:
canonical = _ALIASES[s]
elif len(s) == 6 and s[:3] in _CRYPTO_BASES and s[3:] == "USD":
canonical = f"{s[:3]}-USD"
elif s[:-3] in _CRYPTO_BASES and s.endswith("USD") and "-" not in s:
canonical = f"{s[:-3]}-USD"
elif crypto is not None:
canonical = crypto
elif len(s) == 6 and s[:3] in _FOREX_CURRENCIES and s[3:] in _FOREX_CURRENCIES:
canonical = f"{s}=X"
else: