mirror of
https://github.com/TauricResearch/TradingAgents.git
synced 2026-06-29 19:26:24 +03:00
Trim each provider to current-generation models and drop the special-casing they required: - OpenAI: remove gpt-4.1 (deprecated; the only non-reasoning model). - Anthropic: remove Claude Sonnet 4.5 (legacy; the only Sonnet that 400s on effort). - Google: remove the Gemini 2.5 line (superseded by 3.x). - Gemini client: drop the integer thinking_budget mapping; 3.x takes the string thinking_level directly. Effort/reasoning gates stay as defense in depth for custom model IDs. All kept IDs verified against live APIs.
211 lines
9.2 KiB
Python
211 lines
9.2 KiB
Python
"""Shared model catalog for CLI selections and validation."""
|
|
|
|
from __future__ import annotations
|
|
|
|
ModelOption = tuple[str, str]
|
|
ProviderModeOptions = dict[str, dict[str, list[ModelOption]]]
|
|
|
|
# Providers that serve many / frequently-changing models: offer only "Custom
|
|
# model ID" rather than a list that goes stale.
|
|
_CUSTOM_ONLY: dict[str, list[ModelOption]] = {
|
|
"quick": [("Custom model ID", "custom")],
|
|
"deep": [("Custom model ID", "custom")],
|
|
}
|
|
|
|
|
|
# Shared model list for GLM via Z.AI (international) and BigModel (China).
|
|
# Source: docs.z.ai (GLM Coding Plan supported models + LLM guides).
|
|
# All GLM 4.7+ entries support thinking mode via thinking={"type":"enabled"}.
|
|
_GLM_MODELS: dict[str, list[ModelOption]] = {
|
|
"quick": [
|
|
("GLM-5-Turbo - Fast, switchable thinking modes", "glm-5-turbo"),
|
|
("GLM-4.7 - Previous-gen flagship", "glm-4.7"),
|
|
("GLM-4.5-Air - Lightweight, cost-efficient", "glm-4.5-air"),
|
|
("Custom model ID", "custom"),
|
|
],
|
|
"deep": [
|
|
("GLM-5.2 - Latest flagship, 1M ctx", "glm-5.2"),
|
|
("GLM-5.1 - 745B, 200K ctx", "glm-5.1"),
|
|
("GLM-5 - Flagship, 204K ctx", "glm-5"),
|
|
("GLM-4.7 - Previous-gen flagship", "glm-4.7"),
|
|
("Custom model ID", "custom"),
|
|
],
|
|
}
|
|
|
|
|
|
# Shared model list for Qwen's global (dashscope-intl) and CN (dashscope) endpoints.
|
|
# Source: modelstudio.console.alibabacloud.com (Featured Models — Flagship + Cost-optimized).
|
|
#
|
|
# Only versioned IDs are exposed in the dropdown. The version-less aliases
|
|
# (qwen-plus, qwen-flash) are documented by Alibaba as auto-upgrading
|
|
# pointers ("backbone, latest, and snapshot ... have been upgraded to the
|
|
# Qwen3 series"), which means their behavior shifts when Alibaba rotates
|
|
# the backing model. Users who want a specific generation pick it
|
|
# explicitly; users who really want auto-latest can enter the alias via
|
|
# "Custom model ID".
|
|
_QWEN_MODELS: dict[str, list[ModelOption]] = {
|
|
"quick": [
|
|
("Qwen 3.7 Plus - Latest, balanced speed/cost", "qwen3.7-plus"),
|
|
("Qwen 3.6 Plus - Previous-gen balanced", "qwen3.6-plus"),
|
|
("Custom model ID", "custom"),
|
|
],
|
|
"deep": [
|
|
("Qwen 3.7 Max - Latest flagship, most intelligent, 1M ctx", "qwen3.7-max"),
|
|
("Qwen 3.6 Max - Previous-gen flagship", "qwen3.6-max"),
|
|
("Qwen 3.7 Plus - Balanced alternative", "qwen3.7-plus"),
|
|
("Custom model ID", "custom"),
|
|
],
|
|
}
|
|
|
|
|
|
# Shared model list for MiniMax's global and CN endpoints (same IDs).
|
|
# Full official lineup per platform.minimax.io/docs/api-reference/text-openai-api.
|
|
# M3 carries a 1M-token context window; the M2.x line is 204,800 tokens.
|
|
_MINIMAX_MODELS: dict[str, list[ModelOption]] = {
|
|
"quick": [
|
|
("MiniMax-M3 - Latest, 1M ctx, native multimodal", "MiniMax-M3"),
|
|
("MiniMax-M2.7-highspeed - Fast M2.7, 204K ctx, ~100 TPS", "MiniMax-M2.7-highspeed"),
|
|
("MiniMax-M2.5-highspeed - Previous-gen highspeed, 204K ctx", "MiniMax-M2.5-highspeed"),
|
|
("Custom model ID", "custom"),
|
|
],
|
|
"deep": [
|
|
("MiniMax-M3 - Latest flagship, 1M ctx, multimodal coding/agent", "MiniMax-M3"),
|
|
("MiniMax-M2.7 - Previous flagship, 204K ctx", "MiniMax-M2.7"),
|
|
("MiniMax-M2.7-highspeed - Same quality as M2.7, ~100 TPS", "MiniMax-M2.7-highspeed"),
|
|
("MiniMax-M2.5 - Earlier flagship, 204K ctx", "MiniMax-M2.5"),
|
|
("Custom model ID", "custom"),
|
|
],
|
|
}
|
|
|
|
|
|
MODEL_OPTIONS: ProviderModeOptions = {
|
|
"openai": {
|
|
"quick": [
|
|
("GPT-5.4 Mini - Fast, strong coding and tool use", "gpt-5.4-mini"),
|
|
("GPT-5.4 Nano - Cheapest, high-volume tasks", "gpt-5.4-nano"),
|
|
("GPT-5.5 - Latest frontier, 1M context", "gpt-5.5"),
|
|
],
|
|
"deep": [
|
|
("GPT-5.5 - Latest frontier, 1M context", "gpt-5.5"),
|
|
("GPT-5.4 - Previous-gen frontier, 1M context, cost-effective", "gpt-5.4"),
|
|
("GPT-5.2 - Strong reasoning, cost-effective", "gpt-5.2"),
|
|
("GPT-5.5 Pro - Most capable, expensive ($30/$180 per 1M tokens)", "gpt-5.5-pro"),
|
|
],
|
|
},
|
|
"anthropic": {
|
|
"quick": [
|
|
("Claude Sonnet 4.6 - Best speed and intelligence balance", "claude-sonnet-4-6"),
|
|
("Claude Haiku 4.5 - Fastest with near-frontier intelligence", "claude-haiku-4-5"),
|
|
],
|
|
"deep": [
|
|
("Claude Opus 4.8 - Latest frontier, agentic coding and reasoning", "claude-opus-4-8"),
|
|
("Claude Opus 4.7 - Previous frontier, long-running agents", "claude-opus-4-7"),
|
|
("Claude Opus 4.6 - Frontier intelligence, agents and coding", "claude-opus-4-6"),
|
|
("Claude Sonnet 4.6 - Best speed and intelligence balance", "claude-sonnet-4-6"),
|
|
],
|
|
},
|
|
"google": {
|
|
"quick": [
|
|
("Gemini 3.5 Flash - Latest, frontier agentic + coding (GA)", "gemini-3.5-flash"),
|
|
("Gemini 3.1 Flash Lite - Most cost-efficient", "gemini-3.1-flash-lite"),
|
|
],
|
|
"deep": [
|
|
("Gemini 3.1 Pro - Reasoning-first, complex workflows (preview)", "gemini-3.1-pro-preview"),
|
|
("Gemini 3.5 Flash - Latest GA, strong agentic + coding", "gemini-3.5-flash"),
|
|
],
|
|
},
|
|
"xai": {
|
|
"quick": [
|
|
("Grok 4.3 - Latest flagship, fast with built-in reasoning", "grok-4.3"),
|
|
("Grok 4.20 (Non-Reasoning) - Speed-optimized", "grok-4.20-0309-non-reasoning"),
|
|
("Grok Build 0.1 - Coding-specialized, 256K ctx", "grok-build-0.1"),
|
|
],
|
|
"deep": [
|
|
("Grok 4.3 - Latest flagship, built-in reasoning, 1M ctx", "grok-4.3"),
|
|
("Grok 4.20 (Reasoning) - Previous-gen reasoning", "grok-4.20-0309-reasoning"),
|
|
("Grok 4.20 Multi-Agent - Multi-agent reasoning", "grok-4.20-multi-agent-0309"),
|
|
],
|
|
},
|
|
# DeepSeek: the deepseek-chat / deepseek-reasoner aliases are deprecated
|
|
# (2026-07-24) and now map to V4 Flash; expose the V4 IDs directly. V4 Flash
|
|
# serves both non-thinking and thinking modes (the DeepSeekChatOpenAI client
|
|
# handles the reasoning_content round-trip).
|
|
"deepseek": {
|
|
"quick": [
|
|
("DeepSeek V4 Flash - Latest fast model, thinking + non-thinking", "deepseek-v4-flash"),
|
|
("Custom model ID", "custom"),
|
|
],
|
|
"deep": [
|
|
("DeepSeek V4 Pro - Latest flagship", "deepseek-v4-pro"),
|
|
("DeepSeek V4 Flash - Fast, supports thinking", "deepseek-v4-flash"),
|
|
("Custom model ID", "custom"),
|
|
],
|
|
},
|
|
# Qwen: same model IDs across global (dashscope-intl) and China
|
|
# (dashscope) endpoints, so the two provider keys share one model list.
|
|
"qwen": _QWEN_MODELS,
|
|
"qwen-cn": _QWEN_MODELS,
|
|
# GLM: Z.AI (international) and BigModel (China) host the same model
|
|
# IDs; the two provider keys share one model list.
|
|
"glm": _GLM_MODELS,
|
|
"glm-cn": _GLM_MODELS,
|
|
# MiniMax: same model IDs across global (.io) and China (.com) regions,
|
|
# so the two provider keys share one model list.
|
|
"minimax": _MINIMAX_MODELS,
|
|
"minimax-cn": _MINIMAX_MODELS,
|
|
# OpenRouter: fetched dynamically. Azure: any deployed model name.
|
|
# Ollama display labels intentionally omit a "local" marker — the
|
|
# endpoint is now configurable via OLLAMA_BASE_URL, so the same labels
|
|
# apply whether the user runs ollama-serve on localhost or against a
|
|
# remote host. The actual resolved endpoint is surfaced separately by
|
|
# cli.utils.confirm_ollama_endpoint() right after provider selection.
|
|
# "Custom model ID" lets users pick any model they have pulled via
|
|
# `ollama pull` beyond the three suggested defaults.
|
|
"ollama": {
|
|
"quick": [
|
|
("Qwen3:latest (8B)", "qwen3:latest"),
|
|
("GPT-OSS:latest (20B)", "gpt-oss:latest"),
|
|
("GLM-4.7-Flash:latest (30B)", "glm-4.7-flash:latest"),
|
|
("Custom model ID", "custom"),
|
|
],
|
|
"deep": [
|
|
("GLM-4.7-Flash:latest (30B)", "glm-4.7-flash:latest"),
|
|
("GPT-OSS:latest (20B)", "gpt-oss:latest"),
|
|
("Qwen3:latest (8B)", "qwen3:latest"),
|
|
("Custom model ID", "custom"),
|
|
],
|
|
},
|
|
# Generic OpenAI-compatible endpoint: the model is whatever the user's
|
|
# server serves, so only "Custom model ID" is offered.
|
|
"openai_compatible": _CUSTOM_ONLY,
|
|
# Hosted OpenAI-compatible providers that serve many (and frequently
|
|
# changing) models — offer "Custom model ID" rather than a list that goes
|
|
# stale. The endpoint + key are wired by the provider; the user picks the
|
|
# model their account has access to.
|
|
"mistral": _CUSTOM_ONLY,
|
|
"kimi": _CUSTOM_ONLY,
|
|
"groq": _CUSTOM_ONLY,
|
|
"nvidia": _CUSTOM_ONLY,
|
|
# Bedrock model IDs / cross-region inference profile IDs are user-specified.
|
|
"bedrock": _CUSTOM_ONLY,
|
|
}
|
|
|
|
|
|
def get_model_options(provider: str, mode: str) -> list[ModelOption]:
|
|
"""Return shared model options for a provider and selection mode."""
|
|
return MODEL_OPTIONS[provider.lower()][mode]
|
|
|
|
|
|
def get_known_models() -> dict[str, list[str]]:
|
|
"""Build known model names from the shared CLI catalog."""
|
|
return {
|
|
provider: sorted(
|
|
{
|
|
value
|
|
for options in mode_options.values()
|
|
for _, value in options
|
|
}
|
|
)
|
|
for provider, mode_options in MODEL_OPTIONS.items()
|
|
}
|