feat(api): enabling fast-mode in claude-opus-4-6

stainless-app[bot] · stainless-app[bot] · commit 5953ba7b425b · 2026-02-07T18:00:17.000Z
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 34
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic%2Fanthropic-267f913f89364cb8df3a758335a974b43eb98019a8ceef0a9b0a94ef34c2a3b5.yml
-openapi_spec_hash: aa708f3d3bc54992526cbf5894427446
-config_hash: d56fbaeeb3934b1a3b374590c9837ddd
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic%2Fanthropic-fee5dc365a4948e68639582c5301d4d0666c7d85a11628d7917e1477f76d3da1.yml
+openapi_spec_hash: d5543958074cd2bd74096cd69f3bb4f9
+config_hash: c4802b6c7f8ffae62f7d73b2ac61e635
diff --git a/src/anthropic/resources/beta/messages/messages.py b/src/anthropic/resources/beta/messages/messages.py
diff --git a/src/anthropic/types/anthropic_beta_param.py b/src/anthropic/types/anthropic_beta_param.py
@@ -29,5 +29,6 @@
         "context-management-2025-06-27",
         "model-context-window-exceeded-2025-08-26",
         "skills-2025-10-02",
+        "fast-mode-2026-02-01",
     ],
 ]
diff --git a/src/anthropic/types/beta/beta_usage.py b/src/anthropic/types/beta/beta_usage.py
@@ -46,3 +46,6 @@ class BetaUsage(BaseModel):
 
     service_tier: Optional[Literal["standard", "priority", "batch"]] = None
     """If the request used the priority, standard, or batch tier."""
+
+    speed: Optional[Literal["standard", "fast"]] = None
+    """The inference speed mode used for this request."""
diff --git a/src/anthropic/types/beta/message_count_tokens_params.py b/src/anthropic/types/beta/message_count_tokens_params.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from typing import List, Union, Iterable, Optional
-from typing_extensions import Required, Annotated, TypeAlias, TypedDict
+from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
 
 from ..._utils import PropertyInfo
 from ..model_param import ModelParam
@@ -137,6 +137,12 @@ class MessageCountTokensParams(TypedDict, total=False):
     removed in a future release.
     """
 
+    speed: Optional[Literal["standard", "fast"]]
+    """The inference speed mode for this request.
+
+    `"fast"` enables high output-tokens-per-second inference.
+    """
+
     system: Union[str, Iterable[BetaTextBlockParam]]
     """System prompt.
 
diff --git a/src/anthropic/types/beta/message_create_params.py b/src/anthropic/types/beta/message_create_params.py
@@ -161,6 +161,12 @@ class MessageCreateParamsBase(TypedDict, total=False):
     [service-tiers](https://docs.claude.com/en/api/service-tiers) for details.
     """
 
+    speed: Optional[Literal["standard", "fast"]]
+    """The inference speed mode for this request.
+
+    `"fast"` enables high output-tokens-per-second inference.
+    """
+
     stop_sequences: SequenceNotStr[str]
     """Custom text sequences that will cause the model to stop generating.
 
diff --git a/tests/api_resources/beta/messages/test_batches.py b/tests/api_resources/beta/messages/test_batches.py
@@ -119,6 +119,7 @@ def test_method_create_with_all_params(self, client: Anthropic) -> None:
                             "type": "json_schema",
                         },
                         "service_tier": "auto",
+                        "speed": "standard",
                         "stop_sequences": ["string"],
                         "stream": False,
                         "system": [
@@ -550,6 +551,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncAnthropic)
                             "type": "json_schema",
                         },
                         "service_tier": "auto",
+                        "speed": "standard",
                         "stop_sequences": ["string"],
                         "stream": False,
                         "system": [
diff --git a/tests/api_resources/beta/test_messages.py b/tests/api_resources/beta/test_messages.py
@@ -105,6 +105,7 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
                 "type": "json_schema",
             },
             service_tier="auto",
+            speed="standard",
             stop_sequences=["string"],
             stream=False,
             system=[
@@ -292,6 +293,7 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
                 "type": "json_schema",
             },
             service_tier="auto",
+            speed="standard",
             stop_sequences=["string"],
             system=[
                 {
@@ -462,6 +464,7 @@ def test_method_count_tokens_with_all_params(self, client: Anthropic) -> None:
                 "schema": {"foo": "bar"},
                 "type": "json_schema",
             },
+            speed="fast",
             system=[
                 {
                     "text": "Today's date is 2024-06-01.",
@@ -663,6 +666,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                 "type": "json_schema",
             },
             service_tier="auto",
+            speed="standard",
             stop_sequences=["string"],
             stream=False,
             system=[
@@ -850,6 +854,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                 "type": "json_schema",
             },
             service_tier="auto",
+            speed="standard",
             stop_sequences=["string"],
             system=[
                 {
@@ -1020,6 +1025,7 @@ async def test_method_count_tokens_with_all_params(self, async_client: AsyncAnth
                 "schema": {"foo": "bar"},
                 "type": "json_schema",
             },
+            speed="fast",
             system=[
                 {
                     "text": "Today's date is 2024-06-01.",
diff --git a/tests/lib/tools/test_runners.py b/tests/lib/tools/test_runners.py
@@ -37,7 +37,7 @@
             ]
         ),
         "result": snapshot(
-            "ParsedBetaMessage(container=None, content=[ParsedBetaTextBlock(citations=None, parsed_output=None, text=\"The weather in San Francisco, CA is currently **68°F** and **Sunny**. It's a nice day! ☀️\", type='text')], context_management=None, id='msg_014x2Sxq2p6sewFyUbJp8Mg3', model='claude-haiku-4-5-20251001', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=BetaUsage(cache_creation=BetaCacheCreation(ephemeral_1h_input_tokens=0, ephemeral_5m_input_tokens=0), cache_creation_input_tokens=0, cache_read_input_tokens=0, inference_geo=None, input_tokens=770, iterations=None, output_tokens=33, server_tool_use=None, service_tier='standard'))\n"
+            "ParsedBetaMessage(container=None, content=[ParsedBetaTextBlock(citations=None, parsed_output=None, text=\"The weather in San Francisco, CA is currently **68°F** and **Sunny**. It's a nice day! ☀️\", type='text')], context_management=None, id='msg_014x2Sxq2p6sewFyUbJp8Mg3', model='claude-haiku-4-5-20251001', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=BetaUsage(cache_creation=BetaCacheCreation(ephemeral_1h_input_tokens=0, ephemeral_5m_input_tokens=0), cache_creation_input_tokens=0, cache_read_input_tokens=0, inference_geo=None, input_tokens=770, iterations=None, output_tokens=33, server_tool_use=None, service_tier='standard', speed=None))\n"
         ),
     },
     "custom": {
@@ -48,12 +48,12 @@
             ]
         ),
         "result": snapshot(
-            "ParsedBetaMessage(container=None, content=[ParsedBetaTextBlock(citations=None, parsed_output=None, text='The weather in San Francisco, CA is currently **20°C** and **Sunny**. Nice weather!', type='text')], context_management=None, id='msg_01DSPL7PHKQYTe9VAFkHzsA3', model='claude-haiku-4-5-20251001', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=BetaUsage(cache_creation=BetaCacheCreation(ephemeral_1h_input_tokens=0, ephemeral_5m_input_tokens=0), cache_creation_input_tokens=0, cache_read_input_tokens=0, inference_geo=None, input_tokens=787, iterations=None, output_tokens=26, server_tool_use=None, service_tier='standard'))\n"
+            "ParsedBetaMessage(container=None, content=[ParsedBetaTextBlock(citations=None, parsed_output=None, text='The weather in San Francisco, CA is currently **20°C** and **Sunny**. Nice weather!', type='text')], context_management=None, id='msg_01DSPL7PHKQYTe9VAFkHzsA3', model='claude-haiku-4-5-20251001', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=BetaUsage(cache_creation=BetaCacheCreation(ephemeral_1h_input_tokens=0, ephemeral_5m_input_tokens=0), cache_creation_input_tokens=0, cache_read_input_tokens=0, inference_geo=None, input_tokens=787, iterations=None, output_tokens=26, server_tool_use=None, service_tier='standard', speed=None))\n"
         ),
     },
     "streaming": {
         "result": snapshot(
-            "ParsedBetaMessage(container=None, content=[ParsedBetaTextBlock(citations=None, parsed_output=None, text='The weather in San Francisco, CA is currently **Sunny** with a temperature of **68°F**.', type='text')], context_management=None, id='msg_01Vm8Ddgc8qm4iuUSKbf6jku', model='claude-haiku-4-5-20251001', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=BetaUsage(cache_creation=BetaCacheCreation(ephemeral_1h_input_tokens=0, ephemeral_5m_input_tokens=0), cache_creation_input_tokens=0, cache_read_input_tokens=0, inference_geo=None, input_tokens=781, iterations=None, output_tokens=25, server_tool_use=None, service_tier='standard'))\n"
+            "ParsedBetaMessage(container=None, content=[ParsedBetaTextBlock(citations=None, parsed_output=None, text='The weather in San Francisco, CA is currently **Sunny** with a temperature of **68°F**.', type='text')], context_management=None, id='msg_01Vm8Ddgc8qm4iuUSKbf6jku', model='claude-haiku-4-5-20251001', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=BetaUsage(cache_creation=BetaCacheCreation(ephemeral_1h_input_tokens=0, ephemeral_5m_input_tokens=0), cache_creation_input_tokens=0, cache_read_input_tokens=0, inference_geo=None, input_tokens=781, iterations=None, output_tokens=25, server_tool_use=None, service_tier='standard', speed=None))\n"
         )
     },
     "tool_call": {

Original file line number	Diff line number	Diff line change
`@@ -29,5 +29,6 @@`
`29`	`29`	`"context-management-2025-06-27",`
`30`	`30`	`"model-context-window-exceeded-2025-08-26",`
`31`	`31`	`"skills-2025-10-02",`
	`32`	`+ "fast-mode-2026-02-01",`
`32`	`33`	`],`
`33`	`34`	`]`