Commit c5ede36c

stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>
2025-04-17 00:42:28
feat(api): add o3 and o4-mini model IDs
1 parent bf6dd81
src/openai/resources/chat/completions/completions.py
@@ -99,7 +99,7 @@ class Completions(SyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -145,7 +145,7 @@ class Completions(SyncAPIResource):
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -201,7 +201,7 @@ class Completions(SyncAPIResource):
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -270,12 +270,17 @@ class Completions(SyncAPIResource):
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -364,7 +369,7 @@ class Completions(SyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -409,7 +414,7 @@ class Completions(SyncAPIResource):
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -474,7 +479,7 @@ class Completions(SyncAPIResource):
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -543,12 +548,17 @@ class Completions(SyncAPIResource):
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -628,7 +638,7 @@ class Completions(SyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -673,7 +683,7 @@ class Completions(SyncAPIResource):
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -738,7 +748,7 @@ class Completions(SyncAPIResource):
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -807,12 +817,17 @@ class Completions(SyncAPIResource):
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -891,7 +906,7 @@ class Completions(SyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
@@ -1187,7 +1202,7 @@ class AsyncCompletions(AsyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -1233,7 +1248,7 @@ class AsyncCompletions(AsyncAPIResource):
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1289,7 +1304,7 @@ class AsyncCompletions(AsyncAPIResource):
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -1358,12 +1373,17 @@ class AsyncCompletions(AsyncAPIResource):
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1452,7 +1472,7 @@ class AsyncCompletions(AsyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1497,7 +1517,7 @@ class AsyncCompletions(AsyncAPIResource):
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1562,7 +1582,7 @@ class AsyncCompletions(AsyncAPIResource):
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -1631,12 +1651,17 @@ class AsyncCompletions(AsyncAPIResource):
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1716,7 +1741,7 @@ class AsyncCompletions(AsyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1761,7 +1786,7 @@ class AsyncCompletions(AsyncAPIResource):
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1826,7 +1851,7 @@ class AsyncCompletions(AsyncAPIResource):
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -1895,12 +1920,17 @@ class AsyncCompletions(AsyncAPIResource):
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1979,7 +2009,7 @@ class AsyncCompletions(AsyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
src/openai/resources/responses/responses.py
@@ -89,6 +89,7 @@ class Responses(SyncAPIResource):
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -130,7 +131,7 @@ class Responses(SyncAPIResource):
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -174,6 +175,24 @@ class Responses(SyncAPIResource):
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           stream: If set to true, the model response data will be streamed to the client as it is
@@ -255,6 +274,7 @@ class Responses(SyncAPIResource):
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -295,7 +315,7 @@ class Responses(SyncAPIResource):
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -346,6 +366,24 @@ class Responses(SyncAPIResource):
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -420,6 +458,7 @@ class Responses(SyncAPIResource):
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -460,7 +499,7 @@ class Responses(SyncAPIResource):
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -511,6 +550,24 @@ class Responses(SyncAPIResource):
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -584,6 +641,7 @@ class Responses(SyncAPIResource):
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -613,6 +671,7 @@ class Responses(SyncAPIResource):
                     "parallel_tool_calls": parallel_tool_calls,
                     "previous_response_id": previous_response_id,
                     "reasoning": reasoning,
+                    "service_tier": service_tier,
                     "store": store,
                     "stream": stream,
                     "temperature": temperature,
@@ -903,6 +962,7 @@ class AsyncResponses(AsyncAPIResource):
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -944,7 +1004,7 @@ class AsyncResponses(AsyncAPIResource):
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -988,6 +1048,24 @@ class AsyncResponses(AsyncAPIResource):
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           stream: If set to true, the model response data will be streamed to the client as it is
@@ -1069,6 +1147,7 @@ class AsyncResponses(AsyncAPIResource):
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -1109,7 +1188,7 @@ class AsyncResponses(AsyncAPIResource):
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1160,6 +1239,24 @@ class AsyncResponses(AsyncAPIResource):
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -1234,6 +1331,7 @@ class AsyncResponses(AsyncAPIResource):
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -1274,7 +1372,7 @@ class AsyncResponses(AsyncAPIResource):
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1325,6 +1423,24 @@ class AsyncResponses(AsyncAPIResource):
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -1398,6 +1514,7 @@ class AsyncResponses(AsyncAPIResource):
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1427,6 +1544,7 @@ class AsyncResponses(AsyncAPIResource):
                     "parallel_tool_calls": parallel_tool_calls,
                     "previous_response_id": previous_response_id,
                     "reasoning": reasoning,
+                    "service_tier": service_tier,
                     "store": store,
                     "stream": stream,
                     "temperature": temperature,
src/openai/resources/completions.py
@@ -159,7 +159,9 @@ class Completions(SyncAPIResource):
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream: Whether to stream back partial progress. If set, tokens will be sent as
@@ -319,7 +321,9 @@ class Completions(SyncAPIResource):
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -472,7 +476,9 @@ class Completions(SyncAPIResource):
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -703,7 +709,9 @@ class AsyncCompletions(AsyncAPIResource):
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream: Whether to stream back partial progress. If set, tokens will be sent as
@@ -863,7 +871,9 @@ class AsyncCompletions(AsyncAPIResource):
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -1016,7 +1026,9 @@ class AsyncCompletions(AsyncAPIResource):
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
src/openai/types/chat/chat_completion.py
@@ -59,8 +59,26 @@ class ChatCompletion(BaseModel):
     object: Literal["chat.completion"]
     """The object type, which is always `chat.completion`."""
 
-    service_tier: Optional[Literal["scale", "default"]] = None
-    """The service tier used for processing the request."""
+    service_tier: Optional[Literal["auto", "default", "flex"]] = None
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
+    - If set to 'default', the request will be processed using the default service
+      tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    - When not set, the default behavior is 'auto'.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
 
     system_fingerprint: Optional[str] = None
     """This fingerprint represents the backend configuration that the model runs with.
src/openai/types/chat/chat_completion_audio_param.py
@@ -9,7 +9,7 @@ __all__ = ["ChatCompletionAudioParam"]
 
 
 class ChatCompletionAudioParam(TypedDict, total=False):
-    format: Required[Literal["wav", "mp3", "flac", "opus", "pcm16"]]
+    format: Required[Literal["wav", "aac", "mp3", "flac", "opus", "pcm16"]]
     """Specifies the output audio format.
 
     Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.
@@ -22,6 +22,6 @@ class ChatCompletionAudioParam(TypedDict, total=False):
     ]
     """The voice the model uses to respond.
 
-    Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, and
-    `shimmer`.
+    Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `nova`,
+    `onyx`, `sage`, and `shimmer`.
     """
src/openai/types/chat/chat_completion_chunk.py
@@ -128,8 +128,26 @@ class ChatCompletionChunk(BaseModel):
     object: Literal["chat.completion.chunk"]
     """The object type, which is always `chat.completion.chunk`."""
 
-    service_tier: Optional[Literal["scale", "default"]] = None
-    """The service tier used for processing the request."""
+    service_tier: Optional[Literal["auto", "default", "flex"]] = None
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
+    - If set to 'default', the request will be processed using the default service
+      tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    - When not set, the default behavior is 'auto'.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
 
     system_fingerprint: Optional[str] = None
     """
src/openai/types/chat/completion_create_params.py
@@ -45,7 +45,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     model: Required[Union[str, ChatModel]]
-    """Model ID used to generate the response, like `gpt-4o` or `o1`.
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
 
     OpenAI offers a wide range of models with different capabilities, performance
     characteristics, and price points. Refer to the
@@ -123,7 +123,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     This value is now deprecated in favor of `max_completion_tokens`, and is not
     compatible with
-    [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+    [o-series models](https://platform.openai.com/docs/guides/reasoning).
     """
 
     metadata: Optional[Metadata]
@@ -208,7 +208,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     in the backend.
     """
 
-    service_tier: Optional[Literal["auto", "default"]]
+    service_tier: Optional[Literal["auto", "default", "flex"]]
     """Specifies the latency tier to use for processing the request.
 
     This parameter is relevant for customers subscribed to the scale tier service:
@@ -220,6 +220,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
       latency guarentee.
     - If set to 'default', the request will be processed using the default service
       tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
     - When not set, the default behavior is 'auto'.
 
     When this parameter is set, the response body will include the `service_tier`
@@ -227,9 +230,10 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     stop: Union[Optional[str], List[str], None]
-    """Up to 4 sequences where the API will stop generating further tokens.
+    """Not supported with latest reasoning models `o3` and `o4-mini`.
 
-    The returned text will not contain the stop sequence.
+    Up to 4 sequences where the API will stop generating further tokens. The
+    returned text will not contain the stop sequence.
     """
 
     store: Optional[bool]
src/openai/types/responses/response.py
@@ -62,7 +62,7 @@ class Response(BaseModel):
     """
 
     model: ResponsesModel
-    """Model ID used to generate the response, like `gpt-4o` or `o1`.
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
 
     OpenAI offers a wide range of models with different capabilities, performance
     characteristics, and price points. Refer to the
@@ -149,6 +149,27 @@ class Response(BaseModel):
     [reasoning models](https://platform.openai.com/docs/guides/reasoning).
     """
 
+    service_tier: Optional[Literal["auto", "default", "flex"]] = None
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
+    - If set to 'default', the request will be processed using the default service
+      tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    - When not set, the default behavior is 'auto'.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
+
     status: Optional[ResponseStatus] = None
     """The status of the response generation.
 
src/openai/types/responses/response_create_params.py
@@ -38,7 +38,7 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     """
 
     model: Required[ResponsesModel]
-    """Model ID used to generate the response, like `gpt-4o` or `o1`.
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
 
     OpenAI offers a wide range of models with different capabilities, performance
     characteristics, and price points. Refer to the
@@ -102,6 +102,27 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     [reasoning models](https://platform.openai.com/docs/guides/reasoning).
     """
 
+    service_tier: Optional[Literal["auto", "default", "flex"]]
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
+    - If set to 'default', the request will be processed using the default service
+      tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    - When not set, the default behavior is 'auto'.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
+
     store: Optional[bool]
     """Whether to store the generated model response for later retrieval via API."""
 
src/openai/types/shared/chat_model.py
@@ -11,6 +11,10 @@ ChatModel: TypeAlias = Literal[
     "gpt-4.1-2025-04-14",
     "gpt-4.1-mini-2025-04-14",
     "gpt-4.1-nano-2025-04-14",
+    "o4-mini",
+    "o4-mini-2025-04-16",
+    "o3",
+    "o3-2025-04-16",
     "o3-mini",
     "o3-mini-2025-01-31",
     "o1",
src/openai/types/shared/reasoning.py
@@ -19,10 +19,17 @@ class Reasoning(BaseModel):
     result in faster responses and fewer tokens used on reasoning in a response.
     """
 
-    generate_summary: Optional[Literal["concise", "detailed"]] = None
-    """**computer_use_preview only**
+    generate_summary: Optional[Literal["auto", "concise", "detailed"]] = None
+    """**Deprecated:** use `summary` instead.
 
     A summary of the reasoning performed by the model. This can be useful for
-    debugging and understanding the model's reasoning process. One of `concise` or
-    `detailed`.
+    debugging and understanding the model's reasoning process. One of `auto`,
+    `concise`, or `detailed`.
+    """
+
+    summary: Optional[Literal["auto", "concise", "detailed"]] = None
+    """A summary of the reasoning performed by the model.
+
+    This can be useful for debugging and understanding the model's reasoning
+    process. One of `auto`, `concise`, or `detailed`.
     """
src/openai/types/shared_params/chat_model.py
@@ -13,6 +13,10 @@ ChatModel: TypeAlias = Literal[
     "gpt-4.1-2025-04-14",
     "gpt-4.1-mini-2025-04-14",
     "gpt-4.1-nano-2025-04-14",
+    "o4-mini",
+    "o4-mini-2025-04-16",
+    "o3",
+    "o3-2025-04-16",
     "o3-mini",
     "o3-mini-2025-01-31",
     "o1",
src/openai/types/shared_params/reasoning.py
@@ -20,10 +20,17 @@ class Reasoning(TypedDict, total=False):
     result in faster responses and fewer tokens used on reasoning in a response.
     """
 
-    generate_summary: Optional[Literal["concise", "detailed"]]
-    """**computer_use_preview only**
+    generate_summary: Optional[Literal["auto", "concise", "detailed"]]
+    """**Deprecated:** use `summary` instead.
 
     A summary of the reasoning performed by the model. This can be useful for
-    debugging and understanding the model's reasoning process. One of `concise` or
-    `detailed`.
+    debugging and understanding the model's reasoning process. One of `auto`,
+    `concise`, or `detailed`.
+    """
+
+    summary: Optional[Literal["auto", "concise", "detailed"]]
+    """A summary of the reasoning performed by the model.
+
+    This can be useful for debugging and understanding the model's reasoning
+    process. One of `auto`, `concise`, or `detailed`.
     """
src/openai/types/completion_create_params.py
@@ -120,9 +120,10 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     stop: Union[Optional[str], List[str], None]
-    """Up to 4 sequences where the API will stop generating further tokens.
+    """Not supported with latest reasoning models `o3` and `o4-mini`.
 
-    The returned text will not contain the stop sequence.
+    Up to 4 sequences where the API will stop generating further tokens. The
+    returned text will not contain the stop sequence.
     """
 
     stream_options: Optional[ChatCompletionStreamOptionsParam]
tests/api_resources/test_responses.py
@@ -38,8 +38,10 @@ class TestResponses:
             previous_response_id="previous_response_id",
             reasoning={
                 "effort": "low",
-                "generate_summary": "concise",
+                "generate_summary": "auto",
+                "summary": "auto",
             },
+            service_tier="auto",
             store=True,
             stream=False,
             temperature=1,
@@ -116,8 +118,10 @@ class TestResponses:
             previous_response_id="previous_response_id",
             reasoning={
                 "effort": "low",
-                "generate_summary": "concise",
+                "generate_summary": "auto",
+                "summary": "auto",
             },
+            service_tier="auto",
             store=True,
             temperature=1,
             text={"format": {"type": "text"}},
@@ -280,8 +284,10 @@ class TestAsyncResponses:
             previous_response_id="previous_response_id",
             reasoning={
                 "effort": "low",
-                "generate_summary": "concise",
+                "generate_summary": "auto",
+                "summary": "auto",
             },
+            service_tier="auto",
             store=True,
             stream=False,
             temperature=1,
@@ -358,8 +364,10 @@ class TestAsyncResponses:
             previous_response_id="previous_response_id",
             reasoning={
                 "effort": "low",
-                "generate_summary": "concise",
+                "generate_summary": "auto",
+                "summary": "auto",
             },
+            service_tier="auto",
             store=True,
             temperature=1,
             text={"format": {"type": "text"}},
.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 97
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a555f81249cb084f463dcefa4aba069f9341fdaf3dd6ac27d7f237fc90e8f488.yml
-openapi_spec_hash: 8e590296cd1a54b9508510b0c7a2c45a
-config_hash: 5ea32de61ff42fcf5e66cff8d9e247ea
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-5633633cc38734869cf7d993f7b549bb8e4d10e0ec45381ec2cd91507cd8eb8f.yml
+openapi_spec_hash: c855121b2b2324b99499c9244c21d24d
+config_hash: d20837393b73efdb19cd08e04c1cc9a1