Commit c5ede36c
Changed files (16)
src
openai
resources
tests
api_resources
src/openai/resources/chat/completions/completions.py
@@ -99,7 +99,7 @@ class Completions(SyncAPIResource):
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -145,7 +145,7 @@ class Completions(SyncAPIResource):
[images](https://platform.openai.com/docs/guides/vision), and
[audio](https://platform.openai.com/docs/guides/audio).
- model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+ model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
wide range of models with different capabilities, performance characteristics,
and price points. Refer to the
[model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -201,7 +201,7 @@ class Completions(SyncAPIResource):
This value is now deprecated in favor of `max_completion_tokens`, and is not
compatible with
- [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+ [o-series models](https://platform.openai.com/docs/guides/reasoning).
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
@@ -270,12 +270,17 @@ class Completions(SyncAPIResource):
latency guarentee.
- If set to 'default', the request will be processed using the default service
tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
- When not set, the default behavior is 'auto'.
When this parameter is set, the response body will include the `service_tier`
utilized.
- stop: Up to 4 sequences where the API will stop generating further tokens. The
+ stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+ Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
store: Whether or not to store the output of this chat completion request for use in
@@ -364,7 +369,7 @@ class Completions(SyncAPIResource):
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -409,7 +414,7 @@ class Completions(SyncAPIResource):
[images](https://platform.openai.com/docs/guides/vision), and
[audio](https://platform.openai.com/docs/guides/audio).
- model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+ model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
wide range of models with different capabilities, performance characteristics,
and price points. Refer to the
[model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -474,7 +479,7 @@ class Completions(SyncAPIResource):
This value is now deprecated in favor of `max_completion_tokens`, and is not
compatible with
- [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+ [o-series models](https://platform.openai.com/docs/guides/reasoning).
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
@@ -543,12 +548,17 @@ class Completions(SyncAPIResource):
latency guarentee.
- If set to 'default', the request will be processed using the default service
tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
- When not set, the default behavior is 'auto'.
When this parameter is set, the response body will include the `service_tier`
utilized.
- stop: Up to 4 sequences where the API will stop generating further tokens. The
+ stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+ Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
store: Whether or not to store the output of this chat completion request for use in
@@ -628,7 +638,7 @@ class Completions(SyncAPIResource):
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -673,7 +683,7 @@ class Completions(SyncAPIResource):
[images](https://platform.openai.com/docs/guides/vision), and
[audio](https://platform.openai.com/docs/guides/audio).
- model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+ model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
wide range of models with different capabilities, performance characteristics,
and price points. Refer to the
[model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -738,7 +748,7 @@ class Completions(SyncAPIResource):
This value is now deprecated in favor of `max_completion_tokens`, and is not
compatible with
- [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+ [o-series models](https://platform.openai.com/docs/guides/reasoning).
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
@@ -807,12 +817,17 @@ class Completions(SyncAPIResource):
latency guarentee.
- If set to 'default', the request will be processed using the default service
tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
- When not set, the default behavior is 'auto'.
When this parameter is set, the response body will include the `service_tier`
utilized.
- stop: Up to 4 sequences where the API will stop generating further tokens. The
+ stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+ Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
store: Whether or not to store the output of this chat completion request for use in
@@ -891,7 +906,7 @@ class Completions(SyncAPIResource):
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
@@ -1187,7 +1202,7 @@ class AsyncCompletions(AsyncAPIResource):
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -1233,7 +1248,7 @@ class AsyncCompletions(AsyncAPIResource):
[images](https://platform.openai.com/docs/guides/vision), and
[audio](https://platform.openai.com/docs/guides/audio).
- model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+ model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
wide range of models with different capabilities, performance characteristics,
and price points. Refer to the
[model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1289,7 +1304,7 @@ class AsyncCompletions(AsyncAPIResource):
This value is now deprecated in favor of `max_completion_tokens`, and is not
compatible with
- [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+ [o-series models](https://platform.openai.com/docs/guides/reasoning).
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
@@ -1358,12 +1373,17 @@ class AsyncCompletions(AsyncAPIResource):
latency guarentee.
- If set to 'default', the request will be processed using the default service
tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
- When not set, the default behavior is 'auto'.
When this parameter is set, the response body will include the `service_tier`
utilized.
- stop: Up to 4 sequences where the API will stop generating further tokens. The
+ stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+ Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
store: Whether or not to store the output of this chat completion request for use in
@@ -1452,7 +1472,7 @@ class AsyncCompletions(AsyncAPIResource):
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1497,7 +1517,7 @@ class AsyncCompletions(AsyncAPIResource):
[images](https://platform.openai.com/docs/guides/vision), and
[audio](https://platform.openai.com/docs/guides/audio).
- model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+ model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
wide range of models with different capabilities, performance characteristics,
and price points. Refer to the
[model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1562,7 +1582,7 @@ class AsyncCompletions(AsyncAPIResource):
This value is now deprecated in favor of `max_completion_tokens`, and is not
compatible with
- [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+ [o-series models](https://platform.openai.com/docs/guides/reasoning).
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
@@ -1631,12 +1651,17 @@ class AsyncCompletions(AsyncAPIResource):
latency guarentee.
- If set to 'default', the request will be processed using the default service
tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
- When not set, the default behavior is 'auto'.
When this parameter is set, the response body will include the `service_tier`
utilized.
- stop: Up to 4 sequences where the API will stop generating further tokens. The
+ stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+ Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
store: Whether or not to store the output of this chat completion request for use in
@@ -1716,7 +1741,7 @@ class AsyncCompletions(AsyncAPIResource):
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1761,7 +1786,7 @@ class AsyncCompletions(AsyncAPIResource):
[images](https://platform.openai.com/docs/guides/vision), and
[audio](https://platform.openai.com/docs/guides/audio).
- model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+ model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
wide range of models with different capabilities, performance characteristics,
and price points. Refer to the
[model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1826,7 +1851,7 @@ class AsyncCompletions(AsyncAPIResource):
This value is now deprecated in favor of `max_completion_tokens`, and is not
compatible with
- [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+ [o-series models](https://platform.openai.com/docs/guides/reasoning).
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
@@ -1895,12 +1920,17 @@ class AsyncCompletions(AsyncAPIResource):
latency guarentee.
- If set to 'default', the request will be processed using the default service
tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
- When not set, the default behavior is 'auto'.
When this parameter is set, the response body will include the `service_tier`
utilized.
- stop: Up to 4 sequences where the API will stop generating further tokens. The
+ stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+ Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
store: Whether or not to store the output of this chat completion request for use in
@@ -1979,7 +2009,7 @@ class AsyncCompletions(AsyncAPIResource):
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
src/openai/resources/responses/responses.py
@@ -89,6 +89,7 @@ class Responses(SyncAPIResource):
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -130,7 +131,7 @@ class Responses(SyncAPIResource):
- [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
- [Function calling](https://platform.openai.com/docs/guides/function-calling)
- model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+ model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
wide range of models with different capabilities, performance characteristics,
and price points. Refer to the
[model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -174,6 +175,24 @@ class Responses(SyncAPIResource):
Configuration options for
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
+ service_tier: Specifies the latency tier to use for processing the request. This parameter is
+ relevant for customers subscribed to the scale tier service:
+
+ - If set to 'auto', and the Project is Scale tier enabled, the system will
+ utilize scale tier credits until they are exhausted.
+ - If set to 'auto', and the Project is not Scale tier enabled, the request will
+ be processed using the default service tier with a lower uptime SLA and no
+ latency guarentee.
+ - If set to 'default', the request will be processed using the default service
+ tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ - When not set, the default behavior is 'auto'.
+
+ When this parameter is set, the response body will include the `service_tier`
+ utilized.
+
store: Whether to store the generated model response for later retrieval via API.
stream: If set to true, the model response data will be streamed to the client as it is
@@ -255,6 +274,7 @@ class Responses(SyncAPIResource):
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -295,7 +315,7 @@ class Responses(SyncAPIResource):
- [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
- [Function calling](https://platform.openai.com/docs/guides/function-calling)
- model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+ model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
wide range of models with different capabilities, performance characteristics,
and price points. Refer to the
[model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -346,6 +366,24 @@ class Responses(SyncAPIResource):
Configuration options for
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
+ service_tier: Specifies the latency tier to use for processing the request. This parameter is
+ relevant for customers subscribed to the scale tier service:
+
+ - If set to 'auto', and the Project is Scale tier enabled, the system will
+ utilize scale tier credits until they are exhausted.
+ - If set to 'auto', and the Project is not Scale tier enabled, the request will
+ be processed using the default service tier with a lower uptime SLA and no
+ latency guarentee.
+ - If set to 'default', the request will be processed using the default service
+ tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ - When not set, the default behavior is 'auto'.
+
+ When this parameter is set, the response body will include the `service_tier`
+ utilized.
+
store: Whether to store the generated model response for later retrieval via API.
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -420,6 +458,7 @@ class Responses(SyncAPIResource):
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -460,7 +499,7 @@ class Responses(SyncAPIResource):
- [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
- [Function calling](https://platform.openai.com/docs/guides/function-calling)
- model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+ model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
wide range of models with different capabilities, performance characteristics,
and price points. Refer to the
[model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -511,6 +550,24 @@ class Responses(SyncAPIResource):
Configuration options for
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
+ service_tier: Specifies the latency tier to use for processing the request. This parameter is
+ relevant for customers subscribed to the scale tier service:
+
+ - If set to 'auto', and the Project is Scale tier enabled, the system will
+ utilize scale tier credits until they are exhausted.
+ - If set to 'auto', and the Project is not Scale tier enabled, the request will
+ be processed using the default service tier with a lower uptime SLA and no
+ latency guarentee.
+ - If set to 'default', the request will be processed using the default service
+ tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ - When not set, the default behavior is 'auto'.
+
+ When this parameter is set, the response body will include the `service_tier`
+ utilized.
+
store: Whether to store the generated model response for later retrieval via API.
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -584,6 +641,7 @@ class Responses(SyncAPIResource):
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -613,6 +671,7 @@ class Responses(SyncAPIResource):
"parallel_tool_calls": parallel_tool_calls,
"previous_response_id": previous_response_id,
"reasoning": reasoning,
+ "service_tier": service_tier,
"store": store,
"stream": stream,
"temperature": temperature,
@@ -903,6 +962,7 @@ class AsyncResponses(AsyncAPIResource):
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -944,7 +1004,7 @@ class AsyncResponses(AsyncAPIResource):
- [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
- [Function calling](https://platform.openai.com/docs/guides/function-calling)
- model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+ model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
wide range of models with different capabilities, performance characteristics,
and price points. Refer to the
[model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -988,6 +1048,24 @@ class AsyncResponses(AsyncAPIResource):
Configuration options for
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
+ service_tier: Specifies the latency tier to use for processing the request. This parameter is
+ relevant for customers subscribed to the scale tier service:
+
+ - If set to 'auto', and the Project is Scale tier enabled, the system will
+ utilize scale tier credits until they are exhausted.
+ - If set to 'auto', and the Project is not Scale tier enabled, the request will
+ be processed using the default service tier with a lower uptime SLA and no
+ latency guarentee.
+ - If set to 'default', the request will be processed using the default service
+ tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ - When not set, the default behavior is 'auto'.
+
+ When this parameter is set, the response body will include the `service_tier`
+ utilized.
+
store: Whether to store the generated model response for later retrieval via API.
stream: If set to true, the model response data will be streamed to the client as it is
@@ -1069,6 +1147,7 @@ class AsyncResponses(AsyncAPIResource):
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -1109,7 +1188,7 @@ class AsyncResponses(AsyncAPIResource):
- [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
- [Function calling](https://platform.openai.com/docs/guides/function-calling)
- model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+ model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
wide range of models with different capabilities, performance characteristics,
and price points. Refer to the
[model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1160,6 +1239,24 @@ class AsyncResponses(AsyncAPIResource):
Configuration options for
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
+ service_tier: Specifies the latency tier to use for processing the request. This parameter is
+ relevant for customers subscribed to the scale tier service:
+
+ - If set to 'auto', and the Project is Scale tier enabled, the system will
+ utilize scale tier credits until they are exhausted.
+ - If set to 'auto', and the Project is not Scale tier enabled, the request will
+ be processed using the default service tier with a lower uptime SLA and no
+ latency guarentee.
+ - If set to 'default', the request will be processed using the default service
+ tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ - When not set, the default behavior is 'auto'.
+
+ When this parameter is set, the response body will include the `service_tier`
+ utilized.
+
store: Whether to store the generated model response for later retrieval via API.
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -1234,6 +1331,7 @@ class AsyncResponses(AsyncAPIResource):
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -1274,7 +1372,7 @@ class AsyncResponses(AsyncAPIResource):
- [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
- [Function calling](https://platform.openai.com/docs/guides/function-calling)
- model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+ model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
wide range of models with different capabilities, performance characteristics,
and price points. Refer to the
[model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1325,6 +1423,24 @@ class AsyncResponses(AsyncAPIResource):
Configuration options for
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
+ service_tier: Specifies the latency tier to use for processing the request. This parameter is
+ relevant for customers subscribed to the scale tier service:
+
+ - If set to 'auto', and the Project is Scale tier enabled, the system will
+ utilize scale tier credits until they are exhausted.
+ - If set to 'auto', and the Project is not Scale tier enabled, the request will
+ be processed using the default service tier with a lower uptime SLA and no
+ latency guarentee.
+ - If set to 'default', the request will be processed using the default service
+ tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ - When not set, the default behavior is 'auto'.
+
+ When this parameter is set, the response body will include the `service_tier`
+ utilized.
+
store: Whether to store the generated model response for later retrieval via API.
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -1398,6 +1514,7 @@ class AsyncResponses(AsyncAPIResource):
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1427,6 +1544,7 @@ class AsyncResponses(AsyncAPIResource):
"parallel_tool_calls": parallel_tool_calls,
"previous_response_id": previous_response_id,
"reasoning": reasoning,
+ "service_tier": service_tier,
"store": store,
"stream": stream,
"temperature": temperature,
src/openai/resources/completions.py
@@ -159,7 +159,9 @@ class Completions(SyncAPIResource):
Determinism is not guaranteed, and you should refer to the `system_fingerprint`
response parameter to monitor changes in the backend.
- stop: Up to 4 sequences where the API will stop generating further tokens. The
+ stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+ Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
stream: Whether to stream back partial progress. If set, tokens will be sent as
@@ -319,7 +321,9 @@ class Completions(SyncAPIResource):
Determinism is not guaranteed, and you should refer to the `system_fingerprint`
response parameter to monitor changes in the backend.
- stop: Up to 4 sequences where the API will stop generating further tokens. The
+ stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+ Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -472,7 +476,9 @@ class Completions(SyncAPIResource):
Determinism is not guaranteed, and you should refer to the `system_fingerprint`
response parameter to monitor changes in the backend.
- stop: Up to 4 sequences where the API will stop generating further tokens. The
+ stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+ Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -703,7 +709,9 @@ class AsyncCompletions(AsyncAPIResource):
Determinism is not guaranteed, and you should refer to the `system_fingerprint`
response parameter to monitor changes in the backend.
- stop: Up to 4 sequences where the API will stop generating further tokens. The
+ stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+ Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
stream: Whether to stream back partial progress. If set, tokens will be sent as
@@ -863,7 +871,9 @@ class AsyncCompletions(AsyncAPIResource):
Determinism is not guaranteed, and you should refer to the `system_fingerprint`
response parameter to monitor changes in the backend.
- stop: Up to 4 sequences where the API will stop generating further tokens. The
+ stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+ Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -1016,7 +1026,9 @@ class AsyncCompletions(AsyncAPIResource):
Determinism is not guaranteed, and you should refer to the `system_fingerprint`
response parameter to monitor changes in the backend.
- stop: Up to 4 sequences where the API will stop generating further tokens. The
+ stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+ Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
stream_options: Options for streaming response. Only set this when you set `stream: true`.
src/openai/types/chat/chat_completion.py
@@ -59,8 +59,26 @@ class ChatCompletion(BaseModel):
object: Literal["chat.completion"]
"""The object type, which is always `chat.completion`."""
- service_tier: Optional[Literal["scale", "default"]] = None
- """The service tier used for processing the request."""
+ service_tier: Optional[Literal["auto", "default", "flex"]] = None
+ """Specifies the latency tier to use for processing the request.
+
+ This parameter is relevant for customers subscribed to the scale tier service:
+
+ - If set to 'auto', and the Project is Scale tier enabled, the system will
+ utilize scale tier credits until they are exhausted.
+ - If set to 'auto', and the Project is not Scale tier enabled, the request will
+ be processed using the default service tier with a lower uptime SLA and no
+ latency guarentee.
+ - If set to 'default', the request will be processed using the default service
+ tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ - When not set, the default behavior is 'auto'.
+
+ When this parameter is set, the response body will include the `service_tier`
+ utilized.
+ """
system_fingerprint: Optional[str] = None
"""This fingerprint represents the backend configuration that the model runs with.
src/openai/types/chat/chat_completion_audio_param.py
@@ -9,7 +9,7 @@ __all__ = ["ChatCompletionAudioParam"]
class ChatCompletionAudioParam(TypedDict, total=False):
- format: Required[Literal["wav", "mp3", "flac", "opus", "pcm16"]]
+ format: Required[Literal["wav", "aac", "mp3", "flac", "opus", "pcm16"]]
"""Specifies the output audio format.
Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.
@@ -22,6 +22,6 @@ class ChatCompletionAudioParam(TypedDict, total=False):
]
"""The voice the model uses to respond.
- Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, and
- `shimmer`.
+ Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `nova`,
+ `onyx`, `sage`, and `shimmer`.
"""
src/openai/types/chat/chat_completion_chunk.py
@@ -128,8 +128,26 @@ class ChatCompletionChunk(BaseModel):
object: Literal["chat.completion.chunk"]
"""The object type, which is always `chat.completion.chunk`."""
- service_tier: Optional[Literal["scale", "default"]] = None
- """The service tier used for processing the request."""
+ service_tier: Optional[Literal["auto", "default", "flex"]] = None
+ """Specifies the latency tier to use for processing the request.
+
+ This parameter is relevant for customers subscribed to the scale tier service:
+
+ - If set to 'auto', and the Project is Scale tier enabled, the system will
+ utilize scale tier credits until they are exhausted.
+ - If set to 'auto', and the Project is not Scale tier enabled, the request will
+ be processed using the default service tier with a lower uptime SLA and no
+ latency guarentee.
+ - If set to 'default', the request will be processed using the default service
+ tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ - When not set, the default behavior is 'auto'.
+
+ When this parameter is set, the response body will include the `service_tier`
+ utilized.
+ """
system_fingerprint: Optional[str] = None
"""
src/openai/types/chat/completion_create_params.py
@@ -45,7 +45,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
"""
model: Required[Union[str, ChatModel]]
- """Model ID used to generate the response, like `gpt-4o` or `o1`.
+ """Model ID used to generate the response, like `gpt-4o` or `o3`.
OpenAI offers a wide range of models with different capabilities, performance
characteristics, and price points. Refer to the
@@ -123,7 +123,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
This value is now deprecated in favor of `max_completion_tokens`, and is not
compatible with
- [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+ [o-series models](https://platform.openai.com/docs/guides/reasoning).
"""
metadata: Optional[Metadata]
@@ -208,7 +208,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
in the backend.
"""
- service_tier: Optional[Literal["auto", "default"]]
+ service_tier: Optional[Literal["auto", "default", "flex"]]
"""Specifies the latency tier to use for processing the request.
This parameter is relevant for customers subscribed to the scale tier service:
@@ -220,6 +220,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
latency guarentee.
- If set to 'default', the request will be processed using the default service
tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
- When not set, the default behavior is 'auto'.
When this parameter is set, the response body will include the `service_tier`
@@ -227,9 +230,10 @@ class CompletionCreateParamsBase(TypedDict, total=False):
"""
stop: Union[Optional[str], List[str], None]
- """Up to 4 sequences where the API will stop generating further tokens.
+ """Not supported with latest reasoning models `o3` and `o4-mini`.
- The returned text will not contain the stop sequence.
+ Up to 4 sequences where the API will stop generating further tokens. The
+ returned text will not contain the stop sequence.
"""
store: Optional[bool]
src/openai/types/responses/response.py
@@ -62,7 +62,7 @@ class Response(BaseModel):
"""
model: ResponsesModel
- """Model ID used to generate the response, like `gpt-4o` or `o1`.
+ """Model ID used to generate the response, like `gpt-4o` or `o3`.
OpenAI offers a wide range of models with different capabilities, performance
characteristics, and price points. Refer to the
@@ -149,6 +149,27 @@ class Response(BaseModel):
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
"""
+ service_tier: Optional[Literal["auto", "default", "flex"]] = None
+ """Specifies the latency tier to use for processing the request.
+
+ This parameter is relevant for customers subscribed to the scale tier service:
+
+ - If set to 'auto', and the Project is Scale tier enabled, the system will
+ utilize scale tier credits until they are exhausted.
+ - If set to 'auto', and the Project is not Scale tier enabled, the request will
+ be processed using the default service tier with a lower uptime SLA and no
+ latency guarentee.
+ - If set to 'default', the request will be processed using the default service
+ tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ - When not set, the default behavior is 'auto'.
+
+ When this parameter is set, the response body will include the `service_tier`
+ utilized.
+ """
+
status: Optional[ResponseStatus] = None
"""The status of the response generation.
src/openai/types/responses/response_create_params.py
@@ -38,7 +38,7 @@ class ResponseCreateParamsBase(TypedDict, total=False):
"""
model: Required[ResponsesModel]
- """Model ID used to generate the response, like `gpt-4o` or `o1`.
+ """Model ID used to generate the response, like `gpt-4o` or `o3`.
OpenAI offers a wide range of models with different capabilities, performance
characteristics, and price points. Refer to the
@@ -102,6 +102,27 @@ class ResponseCreateParamsBase(TypedDict, total=False):
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
"""
+ service_tier: Optional[Literal["auto", "default", "flex"]]
+ """Specifies the latency tier to use for processing the request.
+
+ This parameter is relevant for customers subscribed to the scale tier service:
+
+ - If set to 'auto', and the Project is Scale tier enabled, the system will
+ utilize scale tier credits until they are exhausted.
+ - If set to 'auto', and the Project is not Scale tier enabled, the request will
+ be processed using the default service tier with a lower uptime SLA and no
+ latency guarentee.
+ - If set to 'default', the request will be processed using the default service
+ tier with a lower uptime SLA and no latency guarentee.
+ - If set to 'flex', the request will be processed with the Flex Processing
+ service tier.
+ [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ - When not set, the default behavior is 'auto'.
+
+ When this parameter is set, the response body will include the `service_tier`
+ utilized.
+ """
+
store: Optional[bool]
"""Whether to store the generated model response for later retrieval via API."""
src/openai/types/completion_create_params.py
@@ -120,9 +120,10 @@ class CompletionCreateParamsBase(TypedDict, total=False):
"""
stop: Union[Optional[str], List[str], None]
- """Up to 4 sequences where the API will stop generating further tokens.
+ """Not supported with latest reasoning models `o3` and `o4-mini`.
- The returned text will not contain the stop sequence.
+ Up to 4 sequences where the API will stop generating further tokens. The
+ returned text will not contain the stop sequence.
"""
stream_options: Optional[ChatCompletionStreamOptionsParam]
tests/api_resources/test_responses.py
@@ -38,8 +38,10 @@ class TestResponses:
previous_response_id="previous_response_id",
reasoning={
"effort": "low",
- "generate_summary": "concise",
+ "generate_summary": "auto",
+ "summary": "auto",
},
+ service_tier="auto",
store=True,
stream=False,
temperature=1,
@@ -116,8 +118,10 @@ class TestResponses:
previous_response_id="previous_response_id",
reasoning={
"effort": "low",
- "generate_summary": "concise",
+ "generate_summary": "auto",
+ "summary": "auto",
},
+ service_tier="auto",
store=True,
temperature=1,
text={"format": {"type": "text"}},
@@ -280,8 +284,10 @@ class TestAsyncResponses:
previous_response_id="previous_response_id",
reasoning={
"effort": "low",
- "generate_summary": "concise",
+ "generate_summary": "auto",
+ "summary": "auto",
},
+ service_tier="auto",
store=True,
stream=False,
temperature=1,
@@ -358,8 +364,10 @@ class TestAsyncResponses:
previous_response_id="previous_response_id",
reasoning={
"effort": "low",
- "generate_summary": "concise",
+ "generate_summary": "auto",
+ "summary": "auto",
},
+ service_tier="auto",
store=True,
temperature=1,
text={"format": {"type": "text"}},
.stats.yml
@@ -1,4 +1,4 @@
configured_endpoints: 97
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a555f81249cb084f463dcefa4aba069f9341fdaf3dd6ac27d7f237fc90e8f488.yml
-openapi_spec_hash: 8e590296cd1a54b9508510b0c7a2c45a
-config_hash: 5ea32de61ff42fcf5e66cff8d9e247ea
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-5633633cc38734869cf7d993f7b549bb8e4d10e0ec45381ec2cd91507cd8eb8f.yml
+openapi_spec_hash: c855121b2b2324b99499c9244c21d24d
+config_hash: d20837393b73efdb19cd08e04c1cc9a1