[TRTLLM-11616][fix] Accept strict field in tools and store field in chat requests

JunyiXu-nv · JunyiXu-nv · commit 1085480d43b7 · 2026-03-24T04:40:34.000Z
- Add `strict: Optional[bool]` to `FunctionDefinition` in openai_protocol.py
  so OpenAI-compatible clients can pass `tools[].function.strict` without
  getting an HTTP 400 "extra_forbidden" error.
- Add `store: Optional[bool]` to `ChatCompletionRequest` so the `store`
  field is accepted (but not acted on) matching the OpenAI API spec.
- When `strict=True`, build structural tag guided decoding params to
  constrain tool call arguments to the function's JSON Schema via the
  existing `structural_tag` machinery.
- Add 14 unit tests covering the new fields and guided decoding logic.

Signed-off-by: JunyiXu-nv &lt;219237550+JunyiXu-nv@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/serve/openai_protocol.py b/tensorrt_llm/serve/openai_protocol.py
@@ -618,6 +618,7 @@ class FunctionDefinition(OpenAIBaseModel):
     name: str
     description: Optional[str] = None
     parameters: Optional[Dict[str, Any]] = None
+    strict: Optional[bool] = None
 
 
 class ChatCompletionToolsParam(OpenAIBaseModel):
diff --git a/tensorrt_llm/serve/openai_server.py b/tensorrt_llm/serve/openai_server.py
@@ -44,6 +44,7 @@
 from tensorrt_llm.llmapi.llm import RequestOutput
 from tensorrt_llm.logger import logger
 from tensorrt_llm.metrics.collector import MetricsCollector
+from tensorrt_llm.sampling_params import GuidedDecodingParams
 from tensorrt_llm.serve.chat_utils import (load_chat_template,
                                            parse_chat_messages_coroutines)
 from tensorrt_llm.serve.cluster_storage import create_cluster_storage_client
@@ -62,6 +63,7 @@
                                                 ImageObject,
                                                 MemoryUpdateRequest, ModelCard,
                                                 ModelList, PromptTokensDetails,
+                                                ResponseFormat,
                                                 ResponsesRequest,
                                                 ResponsesResponse,
                                                 UpdateWeightsRequest, UsageInfo,
@@ -96,6 +98,78 @@
 TIMEOUT_KEEP_ALIVE = 5  # seconds.
 
 
+def _build_tool_strict_guided_decoding_params(tools, tool_parser_name):
+    """Build GuidedDecodingParams with structural tags for tools with strict=True.
+
+    When a tool has ``strict=True`` in its function definition, the server
+    should use constrained decoding to guarantee that the generated tool call
+    arguments exactly match the function's ``parameters`` JSON Schema.
+
+    This function builds structural tag items from each tool parser's
+    ``structure_info()`` and the tool's ``parameters`` schema, then returns
+    a ``GuidedDecodingParams`` with the structural tag format.
+
+    Returns None if no tool has strict=True or the parser doesn't support
+    structural tags.
+    """
+    if not tools or not tool_parser_name:
+        return None
+
+    # Check if any tool has strict=True
+    has_strict = any(tool.function.strict for tool in tools
+                     if tool.function.strict)
+    if not has_strict:
+        return None
+
+    tool_parser_cls = ToolParserFactory.parsers.get(tool_parser_name.lower())
+    if tool_parser_cls is None:
+        logger.warning(
+            "Tool parser '%s' not found, cannot enforce strict mode for tools.",
+            tool_parser_name)
+        return None
+
+    parser = tool_parser_cls()
+    if not parser.supports_structural_tag():
+        logger.warning(
+            "Tool parser '%s' does not support structural tags, "
+            "cannot enforce strict mode for tools.", tool_parser_name)
+        return None
+
+    get_info = parser.structure_info()
+
+    tags = []
+    triggers = set()
+    for tool in tools:
+        info = get_info(tool.function.name)
+        triggers.add(info.trigger)
+
+        if tool.function.strict and tool.function.parameters:
+            # Strict tool: constrain arguments to match the JSON Schema
+            content = {
+                "type": "json_schema",
+                "json_schema": tool.function.parameters,
+            }
+        else:
+            # Non-strict tool or no parameters: allow any text
+            content = {"type": "any_text"}
+
+        tags.append({
+            "begin": info.begin,
+            "content": content,
+            "end": info.end,
+        })
+
+    stag_format = {
+        "type": "triggered_tags",
+        "triggers": sorted(triggers),
+        "tags": tags,
+    }
+
+    resp_format = ResponseFormat(type="structural_tag", format=stag_format)
+    return GuidedDecodingParams(structural_tag=resp_format.model_dump_json(
+        by_alias=True, exclude_none=True))
+
+
 class OpenAIServer:
 
     def __init__(
@@ -856,6 +930,14 @@ async def chat_stream_generator(
                 if tool_parser_cls and getattr(
                         tool_parser_cls, 'needs_raw_special_tokens', False):
                     sampling_params.skip_special_tokens = False
+                # When strict=True on any tool, apply constrained decoding
+                # via structural tags (only if response_format doesn't already
+                # set guided decoding).
+                if sampling_params.guided_decoding is None:
+                    strict_guided = _build_tool_strict_guided_decoding_params(
+                        request.tools, self.tool_parser)
+                    if strict_guided is not None:
+                        sampling_params.guided_decoding = strict_guided
             postproc_args = ChatPostprocArgs.from_request(request)
             disaggregated_params = to_llm_disaggregated_params(
                 request.disaggregated_params)
diff --git a/tests/unittest/llmapi/apps/test_tool_parsers.py b/tests/unittest/llmapi/apps/test_tool_parsers.py