[None][fix] Handle unset attention_dp_relax in ADP routers

peihu-nv · peihu-nv · commit 4fca6c1be229 · 2026-05-18T13:47:45.000-07:00
OpenAI requests can carry a SchedulingParams object while leaving attention_dp_relax unset. The ADP routers used that field directly as the sort key, which makes mixed None and False requests fail when Python tries to compare None with bool values.

Treat only an explicit False as strict and keep None aligned with the existing missing-scheduling-params behavior. Apply the same logic to both DefaultADPRouter and KVCacheAwareADPRouter, with regression coverage for both paths.

Signed-off-by: peihengh &lt;259410613+peihu-nv@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/scheduler/adp_router.py b/tensorrt_llm/_torch/pyexecutor/scheduler/adp_router.py
@@ -304,7 +304,7 @@ def get_relax_value(req_item):
             scheduling_params = getattr(req_item.request, "py_scheduling_params", None)
             if scheduling_params is None:
                 return True
-            return scheduling_params.attention_dp_relax
+            return scheduling_params.attention_dp_relax is not False
 
         sorted_requests = sorted(new_requests, key=get_relax_value)
 
@@ -576,7 +576,7 @@ def get_relax_value(req_item):
             scheduling_params = getattr(req_item.request, "py_scheduling_params", None)
             if scheduling_params is None:
                 return True
-            return scheduling_params.attention_dp_relax
+            return scheduling_params.attention_dp_relax is not False
 
         sorted_requests = sorted(new_requests, key=get_relax_value)
 
diff --git a/tests/unittest/_torch/executor/test_adp_router.py b/tests/unittest/_torch/executor/test_adp_router.py
@@ -203,6 +203,22 @@ def test_target_dp_rank_at_capacity_falls_through(self):
         assert len(result[0]) == 1
         assert len(result[1]) == 0
 
+    def test_none_attention_dp_relax_is_relaxed(self):
+        router = DefaultADPRouter(dist=_mock_dist())
+        states = [
+            RankState(rank=0, num_active_requests=0, num_active_tokens=0),
+            RankState(rank=1, num_active_requests=0, num_active_tokens=0),
+        ]
+        req_relax = _make_request_item(1, target_dp_rank=0, attention_dp_relax=None)
+        req_strict = _make_request_item(2, target_dp_rank=0, attention_dp_relax=False)
+
+        result, _ = router.route_requests(
+            states, [req_relax, req_strict], max_num_active_requests=1
+        )
+
+        assert result[0] == [req_strict]
+        assert req_relax in result[1]
+
     def test_favors_less_loaded_rank(self):
         router = DefaultADPRouter(dist=_mock_dist())
         states = [
@@ -926,6 +942,18 @@ def test_cache_affinity_wins(self):
         assert result[0] == []
         assert result[1] == [req]
 
+    def test_none_attention_dp_relax_is_relaxed(self):
+        router = self._make_router(tp_size=2)
+        req_relax = _make_request_item(1, target_dp_rank=0, attention_dp_relax=None)
+        req_strict = _make_request_item(2, target_dp_rank=0, attention_dp_relax=False)
+
+        result, _ = router.route_requests(
+            self._rank_states(2), [req_relax, req_strict], max_num_active_requests=1
+        )
+
+        assert result[0] == [req_strict]
+        assert req_relax in result[1]
+
     def test_match_rate_threshold_gates_cache_affinity(self):
         """With rank 0 loaded but holding cache, and rank 1 idle with no
         cache: