fix(llm): structured output for DeepSeek V4 and reasoner

DeepSeek V4 and reasoner reject tool_choice but accept tools. Route via a per-model capability table that suppresses tool_choice for thinking-mode models. #678 #689
2026-08-01 19:34:24 +03:00 · 2026-05-11 01:12:28 +00:00
parent afdc6d4ec1
commit 22bb91bd83
4 changed files with 306 additions and 57 deletions
--- a/tests/test_capabilities.py
+++ b/tests/test_capabilities.py
@@ -0,0 +1,79 @@
+"""Unit tests for the LLM capability table."""
+
+import pytest
+
+from tradingagents.llm_clients.capabilities import (
+    ModelCapabilities,
+    get_capabilities,
+)
+
+
+@pytest.mark.unit
+class TestExactIdMatches:
+    def test_deepseek_chat_supports_tool_choice(self):
+        caps = get_capabilities("deepseek-chat")
+        assert caps.supports_tool_choice is True
+
+    def test_deepseek_reasoner_rejects_tool_choice(self):
+        caps = get_capabilities("deepseek-reasoner")
+        assert caps.supports_tool_choice is False
+        assert caps.requires_reasoning_content_roundtrip is True
+
+    def test_deepseek_v4_flash_rejects_tool_choice(self):
+        caps = get_capabilities("deepseek-v4-flash")
+        assert caps.supports_tool_choice is False
+        assert caps.requires_reasoning_content_roundtrip is True
+
+    def test_deepseek_v4_pro_rejects_tool_choice(self):
+        caps = get_capabilities("deepseek-v4-pro")
+        assert caps.supports_tool_choice is False
+        assert caps.requires_reasoning_content_roundtrip is True
+
+
+@pytest.mark.unit
+class TestPatternMatches:
+    """Forward-compat regex patterns catch unknown DeepSeek variants."""
+
+    def test_future_deepseek_v5_inherits_thinking_quirks(self):
+        caps = get_capabilities("deepseek-v5-flash")
+        assert caps.supports_tool_choice is False
+        assert caps.requires_reasoning_content_roundtrip is True
+
+    def test_future_deepseek_v9_inherits_thinking_quirks(self):
+        caps = get_capabilities("deepseek-v9-anything")
+        assert caps.supports_tool_choice is False
+
+    def test_reasoner_variant_inherits_thinking_quirks(self):
+        caps = get_capabilities("deepseek-reasoner-pro")
+        assert caps.supports_tool_choice is False
+
+
+@pytest.mark.unit
+class TestDefault:
+    """Unknown / non-DeepSeek models get the permissive default."""
+
+    def test_gpt_default(self):
+        caps = get_capabilities("gpt-4.1")
+        assert caps.supports_tool_choice is True
+        assert caps.preferred_structured_method == "function_calling"
+
+    def test_grok_default(self):
+        caps = get_capabilities("grok-4-0709")
+        assert caps.supports_tool_choice is True
+
+    def test_unknown_model_default(self):
+        caps = get_capabilities("totally-made-up-model-id")
+        assert caps.supports_tool_choice is True
+
+    def test_exact_match_precedes_pattern(self):
+        """deepseek-chat must NOT match the v\\d regex."""
+        caps = get_capabilities("deepseek-chat")
+        assert caps.supports_tool_choice is True
+
+
+@pytest.mark.unit
+def test_capabilities_dataclass_is_frozen():
+    """Capability rows are immutable so they can be safely shared."""
+    caps = get_capabilities("deepseek-chat")
+    with pytest.raises(Exception):
+        caps.supports_tool_choice = False  # type: ignore[misc]
--- a/tests/test_deepseek_reasoning.py
+++ b/tests/test_deepseek_reasoning.py
@@ -5,9 +5,10 @@ Two pieces verified:
 1. ``reasoning_content`` is captured on receive into the AIMessage's
   ``additional_kwargs`` and re-attached on send so DeepSeek's API
   sees the same value across turns.
-2. ``with_structured_output`` raises NotImplementedError for
-   ``deepseek-reasoner`` so the agent factories' free-text fallback
-   handles the request instead of failing at runtime.
+2. ``with_structured_output`` consults the capability table and
+   suppresses ``tool_choice`` for models that reject it (V4 + reasoner),
+   matching DeepSeek's official tool-calling pattern at
+   https://api-docs.deepseek.com/guides/tool_calls.
 """

 import os
@@ -15,6 +16,7 @@ import os
 import pytest
 from langchain_core.messages import AIMessage, HumanMessage
 from langchain_core.prompt_values import ChatPromptValue
+from pydantic import BaseModel

 from tradingagents.llm_clients.openai_client import (
    DeepSeekChatOpenAI,
@@ -115,42 +117,111 @@ class TestDeepSeekReasoningContent:


 # ---------------------------------------------------------------------------
-# deepseek-reasoner: structured output unavailable, falls through to free-text
+# Capability-driven structured output: tool_choice suppressed for V4 + reasoner
 # ---------------------------------------------------------------------------


+def _bound_kwargs(runnable):
+    """Extract bind() kwargs from a with_structured_output result."""
+    first = runnable.steps[0] if hasattr(runnable, "steps") else runnable
+    return getattr(first, "kwargs", {})
+
+
@pytest.mark.unit
-class TestDeepSeekReasonerStructuredOutput:
-    def test_with_structured_output_raises_for_reasoner(self):
-        client = DeepSeekChatOpenAI(
-            model="deepseek-reasoner",
-            api_key="placeholder",
-            base_url="https://api.deepseek.com",
+class TestStructuredOutputCapabilityDispatch:
+    """DeepSeek V4 and reasoner reject the tool_choice parameter
+    (official guide: api-docs.deepseek.com/guides/tool_calls passes
+    tools=[...] without tool_choice). Verify the capability dispatch
+    suppresses tool_choice for those models and sends it for chat."""
+
+    class _Sample(BaseModel):
+        answer: str
+
+    def _client(self, model):
+        return DeepSeekChatOpenAI(
+            model=model, api_key="placeholder", base_url="https://api.deepseek.com",
        )
-        from pydantic import BaseModel

-        class _Sample(BaseModel):
-            answer: str
+    def test_chat_sends_tool_choice(self):
+        bound = self._client("deepseek-chat").with_structured_output(self._Sample)
+        assert _bound_kwargs(bound).get("tool_choice") is not None

-        with pytest.raises(NotImplementedError):
-            client.with_structured_output(_Sample)
+    def test_reasoner_suppresses_tool_choice(self):
+        bound = self._client("deepseek-reasoner").with_structured_output(self._Sample)
+        # tool_choice is either absent or explicitly None — both are valid
+        # signals that langchain's bind_tools will skip the parameter.
+        assert _bound_kwargs(bound).get("tool_choice") in (None, ...) or \
+            "tool_choice" not in _bound_kwargs(bound)

-    def test_with_structured_output_works_for_v4(self):
-        """V4 models (non-reasoner) accept tool_choice; structured output works."""
+    def test_v4_flash_suppresses_tool_choice(self):
+        bound = self._client("deepseek-v4-flash").with_structured_output(self._Sample)
+        assert _bound_kwargs(bound).get("tool_choice") is None or \
+            "tool_choice" not in _bound_kwargs(bound)
+
+    def test_v4_pro_suppresses_tool_choice(self):
+        bound = self._client("deepseek-v4-pro").with_structured_output(self._Sample)
+        assert _bound_kwargs(bound).get("tool_choice") is None or \
+            "tool_choice" not in _bound_kwargs(bound)
+
+    def test_future_v_variant_via_regex(self):
+        """Forward-compat: unknown deepseek-v\\d-* IDs inherit V4 quirks."""
+        bound = self._client("deepseek-v5-hypothetical").with_structured_output(self._Sample)
+        assert _bound_kwargs(bound).get("tool_choice") is None or \
+            "tool_choice" not in _bound_kwargs(bound)
+
+    def test_schema_is_still_bound_as_tool(self):
+        """tool_choice is suppressed, but the schema is still bound as a tool —
+        exactly matching DeepSeek's official tool-calling examples."""
+        bound = self._client("deepseek-reasoner").with_structured_output(self._Sample)
+        kwargs = _bound_kwargs(bound)
+        tools = kwargs.get("tools", [])
+        assert any(
+            t.get("function", {}).get("name") == "_Sample" for t in tools
+        ), f"schema not bound as a tool: {tools}"
+
+
+# ---------------------------------------------------------------------------
+# Live API: structured output round-trips against the real DeepSeek backend
+# ---------------------------------------------------------------------------
+
+
+def _has_real_deepseek_key():
+    key = os.environ.get("DEEPSEEK_API_KEY", "")
+    return bool(key) and key != "placeholder"
+
+
+@pytest.mark.integration
+@pytest.mark.skipif(
+    not _has_real_deepseek_key(),
+    reason="DEEPSEEK_API_KEY not set (or placeholder); skipping live API call",
+)
+class TestDeepSeekLiveStructuredOutput:
+    """End-to-end: a real DeepSeek V4-flash call returns a typed instance.
+
+    Verifies the no-tool_choice path doesn't trigger the 400 reported in
+    issue #678 and that the structured-output binding still parses to a
+    Pydantic instance.
+    """
+
+    class _Pick(BaseModel):
+        action: str
+        confidence: float
+
+    def test_v4_flash_returns_structured_output(self):
        client = DeepSeekChatOpenAI(
            model="deepseek-v4-flash",
-            api_key="placeholder",
+            api_key=os.environ["DEEPSEEK_API_KEY"],
            base_url="https://api.deepseek.com",
+            timeout=60,
        )
-        from pydantic import BaseModel
-
-        class _Sample(BaseModel):
-            answer: str
-
-        # Should return a Runnable, not raise. (The actual API call would
-        # require a real key; we only assert binding succeeds.)
-        wrapped = client.with_structured_output(_Sample)
-        assert wrapped is not None
+        bound = client.with_structured_output(self._Pick)
+        result = bound.invoke(
+            "Pick BUY or SELL or HOLD for a tech stock with strong earnings. "
+            "Confidence is a float between 0 and 1."
+        )
+        assert isinstance(result, self._Pick)
+        assert result.action in {"BUY", "SELL", "HOLD"}
+        assert 0.0 <= result.confidence <= 1.0


 # ---------------------------------------------------------------------------
--- a/tradingagents/llm_clients/capabilities.py
+++ b/tradingagents/llm_clients/capabilities.py
@@ -0,0 +1,95 @@
+"""Declarative per-model capability table for OpenAI-compatible providers.
+
+This is the single place that knows which model IDs reject which API
+parameters or require which structured-output method. The LLM client
+subclasses consult ``get_capabilities(model_name)`` instead of hardcoding
+model-name ``if`` ladders, so adding a new model (or a new provider quirk)
+means editing this table — not the client code.
+
+Pattern adapted from the per-model ``compat:`` flags DeepSeek themselves
+publish in their integration guides (e.g. the Oh My Pi config schema
+documents ``supportsToolChoice``, ``requiresReasoningContentForToolCalls``
+as declarative per-model fields).
+"""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+from typing import Literal
+
+
+StructuredMethod = Literal[
+    "function_calling",  # uses tools; respects supports_tool_choice
+    "json_mode",         # uses response_format={"type":"json_object"}
+    "json_schema",       # uses response_format={"type":"json_schema",...}
+    "none",              # no structured output available; caller falls back to free-text
+]
+
+
+@dataclass(frozen=True)
+class ModelCapabilities:
+    """What an OpenAI-compatible model accepts at the API level."""
+
+    supports_tool_choice: bool
+    supports_json_mode: bool
+    supports_json_schema: bool
+    preferred_structured_method: StructuredMethod
+    # DeepSeek thinking-mode models 400 if reasoning_content from prior
+    # assistant turns is not echoed back on the next request.
+    requires_reasoning_content_roundtrip: bool = False
+
+
+# DeepSeek's thinking models accept the ``tools`` array but reject the
+# ``tool_choice`` parameter (official Oh My Pi integration guide and the
+# 400 response in issue #678). Their official tool-calling examples
+# (api-docs.deepseek.com/guides/tool_calls) pass ``tools=[...]`` without
+# ``tool_choice`` — we mirror that pattern by setting supports_tool_choice
+# to False and letting the client suppress the kwarg.
+_DEEPSEEK_THINKING = ModelCapabilities(
+    supports_tool_choice=False,
+    supports_json_mode=True,
+    supports_json_schema=False,
+    preferred_structured_method="function_calling",
+    requires_reasoning_content_roundtrip=True,
+)
+
+_DEEPSEEK_CHAT = ModelCapabilities(
+    supports_tool_choice=True,
+    supports_json_mode=True,
+    supports_json_schema=False,
+    preferred_structured_method="function_calling",
+)
+
+_DEFAULT = ModelCapabilities(
+    supports_tool_choice=True,
+    supports_json_mode=True,
+    supports_json_schema=True,
+    preferred_structured_method="function_calling",
+)
+
+
+# Exact-ID matches take precedence over pattern matches.
+_BY_ID: dict[str, ModelCapabilities] = {
+    "deepseek-chat": _DEEPSEEK_CHAT,
+    "deepseek-reasoner": _DEEPSEEK_THINKING,
+    "deepseek-v4-flash": _DEEPSEEK_THINKING,
+    "deepseek-v4-pro": _DEEPSEEK_THINKING,
+}
+
+# Forward-compat patterns. A new ``deepseek-v5-*`` or ``deepseek-reasoner-*``
+# variant inherits the thinking-mode quirks automatically.
+_BY_PATTERN: list[tuple[re.Pattern[str], ModelCapabilities]] = [
+    (re.compile(r"^deepseek-v\d"), _DEEPSEEK_THINKING),
+    (re.compile(r"^deepseek-reasoner"), _DEEPSEEK_THINKING),
+]
+
+
+def get_capabilities(model_name: str) -> ModelCapabilities:
+    """Resolve capabilities by exact ID, then pattern, then default."""
+    if model_name in _BY_ID:
+        return _BY_ID[model_name]
+    for pattern, caps in _BY_PATTERN:
+        if pattern.match(model_name):
+            return caps
+    return _DEFAULT
--- a/tradingagents/llm_clients/openai_client.py
+++ b/tradingagents/llm_clients/openai_client.py
@@ -5,30 +5,45 @@ from langchain_core.messages import AIMessage
 from langchain_openai import ChatOpenAI

 from .base_client import BaseLLMClient, normalize_content
+from .capabilities import get_capabilities
 from .validators import validate_model


 class NormalizedChatOpenAI(ChatOpenAI):
-    """ChatOpenAI with normalized content output.
+    """ChatOpenAI with normalized content output and capability-aware binding.

    The Responses API returns content as a list of typed blocks
    (reasoning, text, etc.). ``invoke`` normalizes to string for
-    consistent downstream handling. ``with_structured_output`` defaults
-    to function-calling so the Responses-API parse path is avoided
-    (langchain-openai's parse path emits noisy
-    PydanticSerializationUnexpectedValue warnings per call without
-    affecting correctness).
+    consistent downstream handling.

-    Provider-specific quirks (e.g. DeepSeek's thinking mode) live in
-    purpose-built subclasses below so this base class stays small.
+    ``with_structured_output`` consults the per-model capability table
+    (``capabilities.get_capabilities``) to pick the method and to decide
+    whether ``tool_choice`` may be sent. Models that reject ``tool_choice``
+    (e.g. DeepSeek V4 and reasoner — per their official tool-calling
+    guide) still bind the schema as a tool, but no ``tool_choice``
+    parameter is sent.
+
+    Provider-specific quirks beyond structured-output (e.g. DeepSeek's
+    reasoning_content roundtrip) live in subclasses so this base class
+    stays small.
    """

    def invoke(self, input, config=None, **kwargs):
        return normalize_content(super().invoke(input, config, **kwargs))

    def with_structured_output(self, schema, *, method=None, **kwargs):
-        if method is None:
-            method = "function_calling"
+        caps = get_capabilities(self.model_name)
+        if caps.preferred_structured_method == "none":
+            raise NotImplementedError(
+                f"{self.model_name} has no structured-output method available; "
+                f"agent factories will fall back to free-text generation."
+            )
+        method = method or caps.preferred_structured_method
+        # When the model rejects tool_choice, suppress langchain's hardcoded
+        # value. The schema is still bound as a tool — exactly what
+        # DeepSeek's official tool-calling examples do.
+        if method == "function_calling" and not caps.supports_tool_choice:
+            kwargs.setdefault("tool_choice", None)
        return super().with_structured_output(schema, method=method, **kwargs)


@@ -52,18 +67,16 @@ def _input_to_messages(input_: Any) -> list:
 class DeepSeekChatOpenAI(NormalizedChatOpenAI):
    """DeepSeek-specific overrides on top of the OpenAI-compatible client.

-    Two quirks that don't apply to other OpenAI-compatible providers:
+    Thinking-mode round-trip is the only DeepSeek-specific behavior that
+    stays here. When DeepSeek's thinking models return a response with
+    ``reasoning_content``, that field must be echoed back as part of the
+    assistant message on the next turn or the API fails with HTTP 400.
+    ``_create_chat_result`` captures it on receive and
+    ``_get_request_payload`` re-attaches it on send.

-    1. **Thinking-mode round-trip.** When DeepSeek's thinking models return
-       a response with ``reasoning_content``, that field must be echoed
-       back as part of the assistant message on the next turn or the API
-       fails with HTTP 400. ``_create_chat_result`` captures the field on
-       receive and ``_get_request_payload`` re-attaches it on send.
-
-    2. **deepseek-reasoner has no tool_choice.** Structured output via
-       function-calling is unavailable, so we raise NotImplementedError
-       and let the agent factories fall back to free-text generation
-       (see ``tradingagents/agents/utils/structured.py``).
+    Tool-choice handling for V4 and reasoner — those models reject the
+    ``tool_choice`` parameter — is handled by the capability dispatch in
+    ``NormalizedChatOpenAI.with_structured_output``, not here.
    """

    def _get_request_payload(self, input_, *, stop=None, **kwargs):
@@ -94,15 +107,6 @@ class DeepSeekChatOpenAI(NormalizedChatOpenAI):
                generation.message.additional_kwargs["reasoning_content"] = reasoning
        return chat_result

-    def with_structured_output(self, schema, *, method=None, **kwargs):
-        if self.model_name == "deepseek-reasoner":
-            raise NotImplementedError(
-                "deepseek-reasoner does not support tool_choice; structured "
-                "output is unavailable. Agent factories fall back to "
-                "free-text generation automatically."
-            )
-        return super().with_structured_output(schema, method=method, **kwargs)
-
 # Kwargs forwarded from user config to ChatOpenAI
 _PASSTHROUGH_KWARGS = (
    "timeout", "max_retries", "reasoning_effort",