mirror of
https://github.com/TauricResearch/TradingAgents.git
synced 2026-06-16 21:06:15 +03:00
fix(llm): structured output for DeepSeek V4 and reasoner
DeepSeek V4 and reasoner reject tool_choice but accept tools. Route via a per-model capability table that suppresses tool_choice for thinking-mode models. #678 #689
This commit is contained in:
79
tests/test_capabilities.py
Normal file
79
tests/test_capabilities.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""Unit tests for the LLM capability table."""
|
||||
|
||||
import pytest
|
||||
|
||||
from tradingagents.llm_clients.capabilities import (
|
||||
ModelCapabilities,
|
||||
get_capabilities,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestExactIdMatches:
|
||||
def test_deepseek_chat_supports_tool_choice(self):
|
||||
caps = get_capabilities("deepseek-chat")
|
||||
assert caps.supports_tool_choice is True
|
||||
|
||||
def test_deepseek_reasoner_rejects_tool_choice(self):
|
||||
caps = get_capabilities("deepseek-reasoner")
|
||||
assert caps.supports_tool_choice is False
|
||||
assert caps.requires_reasoning_content_roundtrip is True
|
||||
|
||||
def test_deepseek_v4_flash_rejects_tool_choice(self):
|
||||
caps = get_capabilities("deepseek-v4-flash")
|
||||
assert caps.supports_tool_choice is False
|
||||
assert caps.requires_reasoning_content_roundtrip is True
|
||||
|
||||
def test_deepseek_v4_pro_rejects_tool_choice(self):
|
||||
caps = get_capabilities("deepseek-v4-pro")
|
||||
assert caps.supports_tool_choice is False
|
||||
assert caps.requires_reasoning_content_roundtrip is True
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestPatternMatches:
|
||||
"""Forward-compat regex patterns catch unknown DeepSeek variants."""
|
||||
|
||||
def test_future_deepseek_v5_inherits_thinking_quirks(self):
|
||||
caps = get_capabilities("deepseek-v5-flash")
|
||||
assert caps.supports_tool_choice is False
|
||||
assert caps.requires_reasoning_content_roundtrip is True
|
||||
|
||||
def test_future_deepseek_v9_inherits_thinking_quirks(self):
|
||||
caps = get_capabilities("deepseek-v9-anything")
|
||||
assert caps.supports_tool_choice is False
|
||||
|
||||
def test_reasoner_variant_inherits_thinking_quirks(self):
|
||||
caps = get_capabilities("deepseek-reasoner-pro")
|
||||
assert caps.supports_tool_choice is False
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestDefault:
|
||||
"""Unknown / non-DeepSeek models get the permissive default."""
|
||||
|
||||
def test_gpt_default(self):
|
||||
caps = get_capabilities("gpt-4.1")
|
||||
assert caps.supports_tool_choice is True
|
||||
assert caps.preferred_structured_method == "function_calling"
|
||||
|
||||
def test_grok_default(self):
|
||||
caps = get_capabilities("grok-4-0709")
|
||||
assert caps.supports_tool_choice is True
|
||||
|
||||
def test_unknown_model_default(self):
|
||||
caps = get_capabilities("totally-made-up-model-id")
|
||||
assert caps.supports_tool_choice is True
|
||||
|
||||
def test_exact_match_precedes_pattern(self):
|
||||
"""deepseek-chat must NOT match the v\\d regex."""
|
||||
caps = get_capabilities("deepseek-chat")
|
||||
assert caps.supports_tool_choice is True
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_capabilities_dataclass_is_frozen():
|
||||
"""Capability rows are immutable so they can be safely shared."""
|
||||
caps = get_capabilities("deepseek-chat")
|
||||
with pytest.raises(Exception):
|
||||
caps.supports_tool_choice = False # type: ignore[misc]
|
||||
@@ -5,9 +5,10 @@ Two pieces verified:
|
||||
1. ``reasoning_content`` is captured on receive into the AIMessage's
|
||||
``additional_kwargs`` and re-attached on send so DeepSeek's API
|
||||
sees the same value across turns.
|
||||
2. ``with_structured_output`` raises NotImplementedError for
|
||||
``deepseek-reasoner`` so the agent factories' free-text fallback
|
||||
handles the request instead of failing at runtime.
|
||||
2. ``with_structured_output`` consults the capability table and
|
||||
suppresses ``tool_choice`` for models that reject it (V4 + reasoner),
|
||||
matching DeepSeek's official tool-calling pattern at
|
||||
https://api-docs.deepseek.com/guides/tool_calls.
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -15,6 +16,7 @@ import os
|
||||
import pytest
|
||||
from langchain_core.messages import AIMessage, HumanMessage
|
||||
from langchain_core.prompt_values import ChatPromptValue
|
||||
from pydantic import BaseModel
|
||||
|
||||
from tradingagents.llm_clients.openai_client import (
|
||||
DeepSeekChatOpenAI,
|
||||
@@ -115,42 +117,111 @@ class TestDeepSeekReasoningContent:
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# deepseek-reasoner: structured output unavailable, falls through to free-text
|
||||
# Capability-driven structured output: tool_choice suppressed for V4 + reasoner
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _bound_kwargs(runnable):
|
||||
"""Extract bind() kwargs from a with_structured_output result."""
|
||||
first = runnable.steps[0] if hasattr(runnable, "steps") else runnable
|
||||
return getattr(first, "kwargs", {})
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestDeepSeekReasonerStructuredOutput:
|
||||
def test_with_structured_output_raises_for_reasoner(self):
|
||||
client = DeepSeekChatOpenAI(
|
||||
model="deepseek-reasoner",
|
||||
api_key="placeholder",
|
||||
base_url="https://api.deepseek.com",
|
||||
class TestStructuredOutputCapabilityDispatch:
|
||||
"""DeepSeek V4 and reasoner reject the tool_choice parameter
|
||||
(official guide: api-docs.deepseek.com/guides/tool_calls passes
|
||||
tools=[...] without tool_choice). Verify the capability dispatch
|
||||
suppresses tool_choice for those models and sends it for chat."""
|
||||
|
||||
class _Sample(BaseModel):
|
||||
answer: str
|
||||
|
||||
def _client(self, model):
|
||||
return DeepSeekChatOpenAI(
|
||||
model=model, api_key="placeholder", base_url="https://api.deepseek.com",
|
||||
)
|
||||
from pydantic import BaseModel
|
||||
|
||||
class _Sample(BaseModel):
|
||||
answer: str
|
||||
def test_chat_sends_tool_choice(self):
|
||||
bound = self._client("deepseek-chat").with_structured_output(self._Sample)
|
||||
assert _bound_kwargs(bound).get("tool_choice") is not None
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
client.with_structured_output(_Sample)
|
||||
def test_reasoner_suppresses_tool_choice(self):
|
||||
bound = self._client("deepseek-reasoner").with_structured_output(self._Sample)
|
||||
# tool_choice is either absent or explicitly None — both are valid
|
||||
# signals that langchain's bind_tools will skip the parameter.
|
||||
assert _bound_kwargs(bound).get("tool_choice") in (None, ...) or \
|
||||
"tool_choice" not in _bound_kwargs(bound)
|
||||
|
||||
def test_with_structured_output_works_for_v4(self):
|
||||
"""V4 models (non-reasoner) accept tool_choice; structured output works."""
|
||||
def test_v4_flash_suppresses_tool_choice(self):
|
||||
bound = self._client("deepseek-v4-flash").with_structured_output(self._Sample)
|
||||
assert _bound_kwargs(bound).get("tool_choice") is None or \
|
||||
"tool_choice" not in _bound_kwargs(bound)
|
||||
|
||||
def test_v4_pro_suppresses_tool_choice(self):
|
||||
bound = self._client("deepseek-v4-pro").with_structured_output(self._Sample)
|
||||
assert _bound_kwargs(bound).get("tool_choice") is None or \
|
||||
"tool_choice" not in _bound_kwargs(bound)
|
||||
|
||||
def test_future_v_variant_via_regex(self):
|
||||
"""Forward-compat: unknown deepseek-v\\d-* IDs inherit V4 quirks."""
|
||||
bound = self._client("deepseek-v5-hypothetical").with_structured_output(self._Sample)
|
||||
assert _bound_kwargs(bound).get("tool_choice") is None or \
|
||||
"tool_choice" not in _bound_kwargs(bound)
|
||||
|
||||
def test_schema_is_still_bound_as_tool(self):
|
||||
"""tool_choice is suppressed, but the schema is still bound as a tool —
|
||||
exactly matching DeepSeek's official tool-calling examples."""
|
||||
bound = self._client("deepseek-reasoner").with_structured_output(self._Sample)
|
||||
kwargs = _bound_kwargs(bound)
|
||||
tools = kwargs.get("tools", [])
|
||||
assert any(
|
||||
t.get("function", {}).get("name") == "_Sample" for t in tools
|
||||
), f"schema not bound as a tool: {tools}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Live API: structured output round-trips against the real DeepSeek backend
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _has_real_deepseek_key():
|
||||
key = os.environ.get("DEEPSEEK_API_KEY", "")
|
||||
return bool(key) and key != "placeholder"
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.skipif(
|
||||
not _has_real_deepseek_key(),
|
||||
reason="DEEPSEEK_API_KEY not set (or placeholder); skipping live API call",
|
||||
)
|
||||
class TestDeepSeekLiveStructuredOutput:
|
||||
"""End-to-end: a real DeepSeek V4-flash call returns a typed instance.
|
||||
|
||||
Verifies the no-tool_choice path doesn't trigger the 400 reported in
|
||||
issue #678 and that the structured-output binding still parses to a
|
||||
Pydantic instance.
|
||||
"""
|
||||
|
||||
class _Pick(BaseModel):
|
||||
action: str
|
||||
confidence: float
|
||||
|
||||
def test_v4_flash_returns_structured_output(self):
|
||||
client = DeepSeekChatOpenAI(
|
||||
model="deepseek-v4-flash",
|
||||
api_key="placeholder",
|
||||
api_key=os.environ["DEEPSEEK_API_KEY"],
|
||||
base_url="https://api.deepseek.com",
|
||||
timeout=60,
|
||||
)
|
||||
from pydantic import BaseModel
|
||||
|
||||
class _Sample(BaseModel):
|
||||
answer: str
|
||||
|
||||
# Should return a Runnable, not raise. (The actual API call would
|
||||
# require a real key; we only assert binding succeeds.)
|
||||
wrapped = client.with_structured_output(_Sample)
|
||||
assert wrapped is not None
|
||||
bound = client.with_structured_output(self._Pick)
|
||||
result = bound.invoke(
|
||||
"Pick BUY or SELL or HOLD for a tech stock with strong earnings. "
|
||||
"Confidence is a float between 0 and 1."
|
||||
)
|
||||
assert isinstance(result, self._Pick)
|
||||
assert result.action in {"BUY", "SELL", "HOLD"}
|
||||
assert 0.0 <= result.confidence <= 1.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
95
tradingagents/llm_clients/capabilities.py
Normal file
95
tradingagents/llm_clients/capabilities.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""Declarative per-model capability table for OpenAI-compatible providers.
|
||||
|
||||
This is the single place that knows which model IDs reject which API
|
||||
parameters or require which structured-output method. The LLM client
|
||||
subclasses consult ``get_capabilities(model_name)`` instead of hardcoding
|
||||
model-name ``if`` ladders, so adding a new model (or a new provider quirk)
|
||||
means editing this table — not the client code.
|
||||
|
||||
Pattern adapted from the per-model ``compat:`` flags DeepSeek themselves
|
||||
publish in their integration guides (e.g. the Oh My Pi config schema
|
||||
documents ``supportsToolChoice``, ``requiresReasoningContentForToolCalls``
|
||||
as declarative per-model fields).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Literal
|
||||
|
||||
|
||||
StructuredMethod = Literal[
|
||||
"function_calling", # uses tools; respects supports_tool_choice
|
||||
"json_mode", # uses response_format={"type":"json_object"}
|
||||
"json_schema", # uses response_format={"type":"json_schema",...}
|
||||
"none", # no structured output available; caller falls back to free-text
|
||||
]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ModelCapabilities:
|
||||
"""What an OpenAI-compatible model accepts at the API level."""
|
||||
|
||||
supports_tool_choice: bool
|
||||
supports_json_mode: bool
|
||||
supports_json_schema: bool
|
||||
preferred_structured_method: StructuredMethod
|
||||
# DeepSeek thinking-mode models 400 if reasoning_content from prior
|
||||
# assistant turns is not echoed back on the next request.
|
||||
requires_reasoning_content_roundtrip: bool = False
|
||||
|
||||
|
||||
# DeepSeek's thinking models accept the ``tools`` array but reject the
|
||||
# ``tool_choice`` parameter (official Oh My Pi integration guide and the
|
||||
# 400 response in issue #678). Their official tool-calling examples
|
||||
# (api-docs.deepseek.com/guides/tool_calls) pass ``tools=[...]`` without
|
||||
# ``tool_choice`` — we mirror that pattern by setting supports_tool_choice
|
||||
# to False and letting the client suppress the kwarg.
|
||||
_DEEPSEEK_THINKING = ModelCapabilities(
|
||||
supports_tool_choice=False,
|
||||
supports_json_mode=True,
|
||||
supports_json_schema=False,
|
||||
preferred_structured_method="function_calling",
|
||||
requires_reasoning_content_roundtrip=True,
|
||||
)
|
||||
|
||||
_DEEPSEEK_CHAT = ModelCapabilities(
|
||||
supports_tool_choice=True,
|
||||
supports_json_mode=True,
|
||||
supports_json_schema=False,
|
||||
preferred_structured_method="function_calling",
|
||||
)
|
||||
|
||||
_DEFAULT = ModelCapabilities(
|
||||
supports_tool_choice=True,
|
||||
supports_json_mode=True,
|
||||
supports_json_schema=True,
|
||||
preferred_structured_method="function_calling",
|
||||
)
|
||||
|
||||
|
||||
# Exact-ID matches take precedence over pattern matches.
|
||||
_BY_ID: dict[str, ModelCapabilities] = {
|
||||
"deepseek-chat": _DEEPSEEK_CHAT,
|
||||
"deepseek-reasoner": _DEEPSEEK_THINKING,
|
||||
"deepseek-v4-flash": _DEEPSEEK_THINKING,
|
||||
"deepseek-v4-pro": _DEEPSEEK_THINKING,
|
||||
}
|
||||
|
||||
# Forward-compat patterns. A new ``deepseek-v5-*`` or ``deepseek-reasoner-*``
|
||||
# variant inherits the thinking-mode quirks automatically.
|
||||
_BY_PATTERN: list[tuple[re.Pattern[str], ModelCapabilities]] = [
|
||||
(re.compile(r"^deepseek-v\d"), _DEEPSEEK_THINKING),
|
||||
(re.compile(r"^deepseek-reasoner"), _DEEPSEEK_THINKING),
|
||||
]
|
||||
|
||||
|
||||
def get_capabilities(model_name: str) -> ModelCapabilities:
|
||||
"""Resolve capabilities by exact ID, then pattern, then default."""
|
||||
if model_name in _BY_ID:
|
||||
return _BY_ID[model_name]
|
||||
for pattern, caps in _BY_PATTERN:
|
||||
if pattern.match(model_name):
|
||||
return caps
|
||||
return _DEFAULT
|
||||
@@ -5,30 +5,45 @@ from langchain_core.messages import AIMessage
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from .base_client import BaseLLMClient, normalize_content
|
||||
from .capabilities import get_capabilities
|
||||
from .validators import validate_model
|
||||
|
||||
|
||||
class NormalizedChatOpenAI(ChatOpenAI):
|
||||
"""ChatOpenAI with normalized content output.
|
||||
"""ChatOpenAI with normalized content output and capability-aware binding.
|
||||
|
||||
The Responses API returns content as a list of typed blocks
|
||||
(reasoning, text, etc.). ``invoke`` normalizes to string for
|
||||
consistent downstream handling. ``with_structured_output`` defaults
|
||||
to function-calling so the Responses-API parse path is avoided
|
||||
(langchain-openai's parse path emits noisy
|
||||
PydanticSerializationUnexpectedValue warnings per call without
|
||||
affecting correctness).
|
||||
consistent downstream handling.
|
||||
|
||||
Provider-specific quirks (e.g. DeepSeek's thinking mode) live in
|
||||
purpose-built subclasses below so this base class stays small.
|
||||
``with_structured_output`` consults the per-model capability table
|
||||
(``capabilities.get_capabilities``) to pick the method and to decide
|
||||
whether ``tool_choice`` may be sent. Models that reject ``tool_choice``
|
||||
(e.g. DeepSeek V4 and reasoner — per their official tool-calling
|
||||
guide) still bind the schema as a tool, but no ``tool_choice``
|
||||
parameter is sent.
|
||||
|
||||
Provider-specific quirks beyond structured-output (e.g. DeepSeek's
|
||||
reasoning_content roundtrip) live in subclasses so this base class
|
||||
stays small.
|
||||
"""
|
||||
|
||||
def invoke(self, input, config=None, **kwargs):
|
||||
return normalize_content(super().invoke(input, config, **kwargs))
|
||||
|
||||
def with_structured_output(self, schema, *, method=None, **kwargs):
|
||||
if method is None:
|
||||
method = "function_calling"
|
||||
caps = get_capabilities(self.model_name)
|
||||
if caps.preferred_structured_method == "none":
|
||||
raise NotImplementedError(
|
||||
f"{self.model_name} has no structured-output method available; "
|
||||
f"agent factories will fall back to free-text generation."
|
||||
)
|
||||
method = method or caps.preferred_structured_method
|
||||
# When the model rejects tool_choice, suppress langchain's hardcoded
|
||||
# value. The schema is still bound as a tool — exactly what
|
||||
# DeepSeek's official tool-calling examples do.
|
||||
if method == "function_calling" and not caps.supports_tool_choice:
|
||||
kwargs.setdefault("tool_choice", None)
|
||||
return super().with_structured_output(schema, method=method, **kwargs)
|
||||
|
||||
|
||||
@@ -52,18 +67,16 @@ def _input_to_messages(input_: Any) -> list:
|
||||
class DeepSeekChatOpenAI(NormalizedChatOpenAI):
|
||||
"""DeepSeek-specific overrides on top of the OpenAI-compatible client.
|
||||
|
||||
Two quirks that don't apply to other OpenAI-compatible providers:
|
||||
Thinking-mode round-trip is the only DeepSeek-specific behavior that
|
||||
stays here. When DeepSeek's thinking models return a response with
|
||||
``reasoning_content``, that field must be echoed back as part of the
|
||||
assistant message on the next turn or the API fails with HTTP 400.
|
||||
``_create_chat_result`` captures it on receive and
|
||||
``_get_request_payload`` re-attaches it on send.
|
||||
|
||||
1. **Thinking-mode round-trip.** When DeepSeek's thinking models return
|
||||
a response with ``reasoning_content``, that field must be echoed
|
||||
back as part of the assistant message on the next turn or the API
|
||||
fails with HTTP 400. ``_create_chat_result`` captures the field on
|
||||
receive and ``_get_request_payload`` re-attaches it on send.
|
||||
|
||||
2. **deepseek-reasoner has no tool_choice.** Structured output via
|
||||
function-calling is unavailable, so we raise NotImplementedError
|
||||
and let the agent factories fall back to free-text generation
|
||||
(see ``tradingagents/agents/utils/structured.py``).
|
||||
Tool-choice handling for V4 and reasoner — those models reject the
|
||||
``tool_choice`` parameter — is handled by the capability dispatch in
|
||||
``NormalizedChatOpenAI.with_structured_output``, not here.
|
||||
"""
|
||||
|
||||
def _get_request_payload(self, input_, *, stop=None, **kwargs):
|
||||
@@ -94,15 +107,6 @@ class DeepSeekChatOpenAI(NormalizedChatOpenAI):
|
||||
generation.message.additional_kwargs["reasoning_content"] = reasoning
|
||||
return chat_result
|
||||
|
||||
def with_structured_output(self, schema, *, method=None, **kwargs):
|
||||
if self.model_name == "deepseek-reasoner":
|
||||
raise NotImplementedError(
|
||||
"deepseek-reasoner does not support tool_choice; structured "
|
||||
"output is unavailable. Agent factories fall back to "
|
||||
"free-text generation automatically."
|
||||
)
|
||||
return super().with_structured_output(schema, method=method, **kwargs)
|
||||
|
||||
# Kwargs forwarded from user config to ChatOpenAI
|
||||
_PASSTHROUGH_KWARGS = (
|
||||
"timeout", "max_retries", "reasoning_effort",
|
||||
|
||||
Reference in New Issue
Block a user