From 22bb91bd839dc382f244313fc7392d0e64b04590 Mon Sep 17 00:00:00 2001 From: Yijia-Xiao Date: Mon, 11 May 2026 01:12:28 +0000 Subject: [PATCH] fix(llm): structured output for DeepSeek V4 and reasoner DeepSeek V4 and reasoner reject tool_choice but accept tools. Route via a per-model capability table that suppresses tool_choice for thinking-mode models. #678 #689 --- tests/test_capabilities.py | 79 +++++++++++++ tests/test_deepseek_reasoning.py | 125 ++++++++++++++++----- tradingagents/llm_clients/capabilities.py | 95 ++++++++++++++++ tradingagents/llm_clients/openai_client.py | 64 ++++++----- 4 files changed, 306 insertions(+), 57 deletions(-) create mode 100644 tests/test_capabilities.py create mode 100644 tradingagents/llm_clients/capabilities.py diff --git a/tests/test_capabilities.py b/tests/test_capabilities.py new file mode 100644 index 000000000..d65e93d0e --- /dev/null +++ b/tests/test_capabilities.py @@ -0,0 +1,79 @@ +"""Unit tests for the LLM capability table.""" + +import pytest + +from tradingagents.llm_clients.capabilities import ( + ModelCapabilities, + get_capabilities, +) + + +@pytest.mark.unit +class TestExactIdMatches: + def test_deepseek_chat_supports_tool_choice(self): + caps = get_capabilities("deepseek-chat") + assert caps.supports_tool_choice is True + + def test_deepseek_reasoner_rejects_tool_choice(self): + caps = get_capabilities("deepseek-reasoner") + assert caps.supports_tool_choice is False + assert caps.requires_reasoning_content_roundtrip is True + + def test_deepseek_v4_flash_rejects_tool_choice(self): + caps = get_capabilities("deepseek-v4-flash") + assert caps.supports_tool_choice is False + assert caps.requires_reasoning_content_roundtrip is True + + def test_deepseek_v4_pro_rejects_tool_choice(self): + caps = get_capabilities("deepseek-v4-pro") + assert caps.supports_tool_choice is False + assert caps.requires_reasoning_content_roundtrip is True + + +@pytest.mark.unit +class TestPatternMatches: + """Forward-compat regex patterns catch unknown DeepSeek variants.""" + + def test_future_deepseek_v5_inherits_thinking_quirks(self): + caps = get_capabilities("deepseek-v5-flash") + assert caps.supports_tool_choice is False + assert caps.requires_reasoning_content_roundtrip is True + + def test_future_deepseek_v9_inherits_thinking_quirks(self): + caps = get_capabilities("deepseek-v9-anything") + assert caps.supports_tool_choice is False + + def test_reasoner_variant_inherits_thinking_quirks(self): + caps = get_capabilities("deepseek-reasoner-pro") + assert caps.supports_tool_choice is False + + +@pytest.mark.unit +class TestDefault: + """Unknown / non-DeepSeek models get the permissive default.""" + + def test_gpt_default(self): + caps = get_capabilities("gpt-4.1") + assert caps.supports_tool_choice is True + assert caps.preferred_structured_method == "function_calling" + + def test_grok_default(self): + caps = get_capabilities("grok-4-0709") + assert caps.supports_tool_choice is True + + def test_unknown_model_default(self): + caps = get_capabilities("totally-made-up-model-id") + assert caps.supports_tool_choice is True + + def test_exact_match_precedes_pattern(self): + """deepseek-chat must NOT match the v\\d regex.""" + caps = get_capabilities("deepseek-chat") + assert caps.supports_tool_choice is True + + +@pytest.mark.unit +def test_capabilities_dataclass_is_frozen(): + """Capability rows are immutable so they can be safely shared.""" + caps = get_capabilities("deepseek-chat") + with pytest.raises(Exception): + caps.supports_tool_choice = False # type: ignore[misc] diff --git a/tests/test_deepseek_reasoning.py b/tests/test_deepseek_reasoning.py index fb300336d..62c1b3497 100644 --- a/tests/test_deepseek_reasoning.py +++ b/tests/test_deepseek_reasoning.py @@ -5,9 +5,10 @@ Two pieces verified: 1. ``reasoning_content`` is captured on receive into the AIMessage's ``additional_kwargs`` and re-attached on send so DeepSeek's API sees the same value across turns. -2. ``with_structured_output`` raises NotImplementedError for - ``deepseek-reasoner`` so the agent factories' free-text fallback - handles the request instead of failing at runtime. +2. ``with_structured_output`` consults the capability table and + suppresses ``tool_choice`` for models that reject it (V4 + reasoner), + matching DeepSeek's official tool-calling pattern at + https://api-docs.deepseek.com/guides/tool_calls. """ import os @@ -15,6 +16,7 @@ import os import pytest from langchain_core.messages import AIMessage, HumanMessage from langchain_core.prompt_values import ChatPromptValue +from pydantic import BaseModel from tradingagents.llm_clients.openai_client import ( DeepSeekChatOpenAI, @@ -115,42 +117,111 @@ class TestDeepSeekReasoningContent: # --------------------------------------------------------------------------- -# deepseek-reasoner: structured output unavailable, falls through to free-text +# Capability-driven structured output: tool_choice suppressed for V4 + reasoner # --------------------------------------------------------------------------- +def _bound_kwargs(runnable): + """Extract bind() kwargs from a with_structured_output result.""" + first = runnable.steps[0] if hasattr(runnable, "steps") else runnable + return getattr(first, "kwargs", {}) + + @pytest.mark.unit -class TestDeepSeekReasonerStructuredOutput: - def test_with_structured_output_raises_for_reasoner(self): - client = DeepSeekChatOpenAI( - model="deepseek-reasoner", - api_key="placeholder", - base_url="https://api.deepseek.com", +class TestStructuredOutputCapabilityDispatch: + """DeepSeek V4 and reasoner reject the tool_choice parameter + (official guide: api-docs.deepseek.com/guides/tool_calls passes + tools=[...] without tool_choice). Verify the capability dispatch + suppresses tool_choice for those models and sends it for chat.""" + + class _Sample(BaseModel): + answer: str + + def _client(self, model): + return DeepSeekChatOpenAI( + model=model, api_key="placeholder", base_url="https://api.deepseek.com", ) - from pydantic import BaseModel - class _Sample(BaseModel): - answer: str + def test_chat_sends_tool_choice(self): + bound = self._client("deepseek-chat").with_structured_output(self._Sample) + assert _bound_kwargs(bound).get("tool_choice") is not None - with pytest.raises(NotImplementedError): - client.with_structured_output(_Sample) + def test_reasoner_suppresses_tool_choice(self): + bound = self._client("deepseek-reasoner").with_structured_output(self._Sample) + # tool_choice is either absent or explicitly None — both are valid + # signals that langchain's bind_tools will skip the parameter. + assert _bound_kwargs(bound).get("tool_choice") in (None, ...) or \ + "tool_choice" not in _bound_kwargs(bound) - def test_with_structured_output_works_for_v4(self): - """V4 models (non-reasoner) accept tool_choice; structured output works.""" + def test_v4_flash_suppresses_tool_choice(self): + bound = self._client("deepseek-v4-flash").with_structured_output(self._Sample) + assert _bound_kwargs(bound).get("tool_choice") is None or \ + "tool_choice" not in _bound_kwargs(bound) + + def test_v4_pro_suppresses_tool_choice(self): + bound = self._client("deepseek-v4-pro").with_structured_output(self._Sample) + assert _bound_kwargs(bound).get("tool_choice") is None or \ + "tool_choice" not in _bound_kwargs(bound) + + def test_future_v_variant_via_regex(self): + """Forward-compat: unknown deepseek-v\\d-* IDs inherit V4 quirks.""" + bound = self._client("deepseek-v5-hypothetical").with_structured_output(self._Sample) + assert _bound_kwargs(bound).get("tool_choice") is None or \ + "tool_choice" not in _bound_kwargs(bound) + + def test_schema_is_still_bound_as_tool(self): + """tool_choice is suppressed, but the schema is still bound as a tool — + exactly matching DeepSeek's official tool-calling examples.""" + bound = self._client("deepseek-reasoner").with_structured_output(self._Sample) + kwargs = _bound_kwargs(bound) + tools = kwargs.get("tools", []) + assert any( + t.get("function", {}).get("name") == "_Sample" for t in tools + ), f"schema not bound as a tool: {tools}" + + +# --------------------------------------------------------------------------- +# Live API: structured output round-trips against the real DeepSeek backend +# --------------------------------------------------------------------------- + + +def _has_real_deepseek_key(): + key = os.environ.get("DEEPSEEK_API_KEY", "") + return bool(key) and key != "placeholder" + + +@pytest.mark.integration +@pytest.mark.skipif( + not _has_real_deepseek_key(), + reason="DEEPSEEK_API_KEY not set (or placeholder); skipping live API call", +) +class TestDeepSeekLiveStructuredOutput: + """End-to-end: a real DeepSeek V4-flash call returns a typed instance. + + Verifies the no-tool_choice path doesn't trigger the 400 reported in + issue #678 and that the structured-output binding still parses to a + Pydantic instance. + """ + + class _Pick(BaseModel): + action: str + confidence: float + + def test_v4_flash_returns_structured_output(self): client = DeepSeekChatOpenAI( model="deepseek-v4-flash", - api_key="placeholder", + api_key=os.environ["DEEPSEEK_API_KEY"], base_url="https://api.deepseek.com", + timeout=60, ) - from pydantic import BaseModel - - class _Sample(BaseModel): - answer: str - - # Should return a Runnable, not raise. (The actual API call would - # require a real key; we only assert binding succeeds.) - wrapped = client.with_structured_output(_Sample) - assert wrapped is not None + bound = client.with_structured_output(self._Pick) + result = bound.invoke( + "Pick BUY or SELL or HOLD for a tech stock with strong earnings. " + "Confidence is a float between 0 and 1." + ) + assert isinstance(result, self._Pick) + assert result.action in {"BUY", "SELL", "HOLD"} + assert 0.0 <= result.confidence <= 1.0 # --------------------------------------------------------------------------- diff --git a/tradingagents/llm_clients/capabilities.py b/tradingagents/llm_clients/capabilities.py new file mode 100644 index 000000000..3c14461f2 --- /dev/null +++ b/tradingagents/llm_clients/capabilities.py @@ -0,0 +1,95 @@ +"""Declarative per-model capability table for OpenAI-compatible providers. + +This is the single place that knows which model IDs reject which API +parameters or require which structured-output method. The LLM client +subclasses consult ``get_capabilities(model_name)`` instead of hardcoding +model-name ``if`` ladders, so adding a new model (or a new provider quirk) +means editing this table — not the client code. + +Pattern adapted from the per-model ``compat:`` flags DeepSeek themselves +publish in their integration guides (e.g. the Oh My Pi config schema +documents ``supportsToolChoice``, ``requiresReasoningContentForToolCalls`` +as declarative per-model fields). +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import Literal + + +StructuredMethod = Literal[ + "function_calling", # uses tools; respects supports_tool_choice + "json_mode", # uses response_format={"type":"json_object"} + "json_schema", # uses response_format={"type":"json_schema",...} + "none", # no structured output available; caller falls back to free-text +] + + +@dataclass(frozen=True) +class ModelCapabilities: + """What an OpenAI-compatible model accepts at the API level.""" + + supports_tool_choice: bool + supports_json_mode: bool + supports_json_schema: bool + preferred_structured_method: StructuredMethod + # DeepSeek thinking-mode models 400 if reasoning_content from prior + # assistant turns is not echoed back on the next request. + requires_reasoning_content_roundtrip: bool = False + + +# DeepSeek's thinking models accept the ``tools`` array but reject the +# ``tool_choice`` parameter (official Oh My Pi integration guide and the +# 400 response in issue #678). Their official tool-calling examples +# (api-docs.deepseek.com/guides/tool_calls) pass ``tools=[...]`` without +# ``tool_choice`` — we mirror that pattern by setting supports_tool_choice +# to False and letting the client suppress the kwarg. +_DEEPSEEK_THINKING = ModelCapabilities( + supports_tool_choice=False, + supports_json_mode=True, + supports_json_schema=False, + preferred_structured_method="function_calling", + requires_reasoning_content_roundtrip=True, +) + +_DEEPSEEK_CHAT = ModelCapabilities( + supports_tool_choice=True, + supports_json_mode=True, + supports_json_schema=False, + preferred_structured_method="function_calling", +) + +_DEFAULT = ModelCapabilities( + supports_tool_choice=True, + supports_json_mode=True, + supports_json_schema=True, + preferred_structured_method="function_calling", +) + + +# Exact-ID matches take precedence over pattern matches. +_BY_ID: dict[str, ModelCapabilities] = { + "deepseek-chat": _DEEPSEEK_CHAT, + "deepseek-reasoner": _DEEPSEEK_THINKING, + "deepseek-v4-flash": _DEEPSEEK_THINKING, + "deepseek-v4-pro": _DEEPSEEK_THINKING, +} + +# Forward-compat patterns. A new ``deepseek-v5-*`` or ``deepseek-reasoner-*`` +# variant inherits the thinking-mode quirks automatically. +_BY_PATTERN: list[tuple[re.Pattern[str], ModelCapabilities]] = [ + (re.compile(r"^deepseek-v\d"), _DEEPSEEK_THINKING), + (re.compile(r"^deepseek-reasoner"), _DEEPSEEK_THINKING), +] + + +def get_capabilities(model_name: str) -> ModelCapabilities: + """Resolve capabilities by exact ID, then pattern, then default.""" + if model_name in _BY_ID: + return _BY_ID[model_name] + for pattern, caps in _BY_PATTERN: + if pattern.match(model_name): + return caps + return _DEFAULT diff --git a/tradingagents/llm_clients/openai_client.py b/tradingagents/llm_clients/openai_client.py index b74e26ef4..b6ad771c8 100644 --- a/tradingagents/llm_clients/openai_client.py +++ b/tradingagents/llm_clients/openai_client.py @@ -5,30 +5,45 @@ from langchain_core.messages import AIMessage from langchain_openai import ChatOpenAI from .base_client import BaseLLMClient, normalize_content +from .capabilities import get_capabilities from .validators import validate_model class NormalizedChatOpenAI(ChatOpenAI): - """ChatOpenAI with normalized content output. + """ChatOpenAI with normalized content output and capability-aware binding. The Responses API returns content as a list of typed blocks (reasoning, text, etc.). ``invoke`` normalizes to string for - consistent downstream handling. ``with_structured_output`` defaults - to function-calling so the Responses-API parse path is avoided - (langchain-openai's parse path emits noisy - PydanticSerializationUnexpectedValue warnings per call without - affecting correctness). + consistent downstream handling. - Provider-specific quirks (e.g. DeepSeek's thinking mode) live in - purpose-built subclasses below so this base class stays small. + ``with_structured_output`` consults the per-model capability table + (``capabilities.get_capabilities``) to pick the method and to decide + whether ``tool_choice`` may be sent. Models that reject ``tool_choice`` + (e.g. DeepSeek V4 and reasoner — per their official tool-calling + guide) still bind the schema as a tool, but no ``tool_choice`` + parameter is sent. + + Provider-specific quirks beyond structured-output (e.g. DeepSeek's + reasoning_content roundtrip) live in subclasses so this base class + stays small. """ def invoke(self, input, config=None, **kwargs): return normalize_content(super().invoke(input, config, **kwargs)) def with_structured_output(self, schema, *, method=None, **kwargs): - if method is None: - method = "function_calling" + caps = get_capabilities(self.model_name) + if caps.preferred_structured_method == "none": + raise NotImplementedError( + f"{self.model_name} has no structured-output method available; " + f"agent factories will fall back to free-text generation." + ) + method = method or caps.preferred_structured_method + # When the model rejects tool_choice, suppress langchain's hardcoded + # value. The schema is still bound as a tool — exactly what + # DeepSeek's official tool-calling examples do. + if method == "function_calling" and not caps.supports_tool_choice: + kwargs.setdefault("tool_choice", None) return super().with_structured_output(schema, method=method, **kwargs) @@ -52,18 +67,16 @@ def _input_to_messages(input_: Any) -> list: class DeepSeekChatOpenAI(NormalizedChatOpenAI): """DeepSeek-specific overrides on top of the OpenAI-compatible client. - Two quirks that don't apply to other OpenAI-compatible providers: + Thinking-mode round-trip is the only DeepSeek-specific behavior that + stays here. When DeepSeek's thinking models return a response with + ``reasoning_content``, that field must be echoed back as part of the + assistant message on the next turn or the API fails with HTTP 400. + ``_create_chat_result`` captures it on receive and + ``_get_request_payload`` re-attaches it on send. - 1. **Thinking-mode round-trip.** When DeepSeek's thinking models return - a response with ``reasoning_content``, that field must be echoed - back as part of the assistant message on the next turn or the API - fails with HTTP 400. ``_create_chat_result`` captures the field on - receive and ``_get_request_payload`` re-attaches it on send. - - 2. **deepseek-reasoner has no tool_choice.** Structured output via - function-calling is unavailable, so we raise NotImplementedError - and let the agent factories fall back to free-text generation - (see ``tradingagents/agents/utils/structured.py``). + Tool-choice handling for V4 and reasoner — those models reject the + ``tool_choice`` parameter — is handled by the capability dispatch in + ``NormalizedChatOpenAI.with_structured_output``, not here. """ def _get_request_payload(self, input_, *, stop=None, **kwargs): @@ -94,15 +107,6 @@ class DeepSeekChatOpenAI(NormalizedChatOpenAI): generation.message.additional_kwargs["reasoning_content"] = reasoning return chat_result - def with_structured_output(self, schema, *, method=None, **kwargs): - if self.model_name == "deepseek-reasoner": - raise NotImplementedError( - "deepseek-reasoner does not support tool_choice; structured " - "output is unavailable. Agent factories fall back to " - "free-text generation automatically." - ) - return super().with_structured_output(schema, method=method, **kwargs) - # Kwargs forwarded from user config to ChatOpenAI _PASSTHROUGH_KWARGS = ( "timeout", "max_retries", "reasoning_effort",