Commit 1039d563
Changed files (65)
src
openai
lib
_parsing
resources
beta
threads
runs
chat
completions
containers
realtime
responses
types
evals
realtime
responses
shared_params
tests
api_resources
src/openai/lib/_parsing/_responses.py
@@ -103,6 +103,7 @@ def parse_response(
or output.type == "file_search_call"
or output.type == "web_search_call"
or output.type == "reasoning"
+ or output.type == "compaction"
or output.type == "mcp_call"
or output.type == "mcp_approval_request"
or output.type == "image_generation_call"
src/openai/resources/beta/threads/runs/runs.py
@@ -169,9 +169,9 @@ class Runs(SyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -179,6 +179,7 @@ class Runs(SyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -330,9 +331,9 @@ class Runs(SyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -340,6 +341,7 @@ class Runs(SyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -487,9 +489,9 @@ class Runs(SyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -497,6 +499,7 @@ class Runs(SyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -1620,9 +1623,9 @@ class AsyncRuns(AsyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1630,6 +1633,7 @@ class AsyncRuns(AsyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -1781,9 +1785,9 @@ class AsyncRuns(AsyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1791,6 +1795,7 @@ class AsyncRuns(AsyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -1938,9 +1943,9 @@ class AsyncRuns(AsyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1948,6 +1953,7 @@ class AsyncRuns(AsyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
src/openai/resources/beta/assistants.py
@@ -98,9 +98,9 @@ class Assistants(SyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -108,6 +108,7 @@ class Assistants(SyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -312,9 +313,9 @@ class Assistants(SyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -322,6 +323,7 @@ class Assistants(SyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -565,9 +567,9 @@ class AsyncAssistants(AsyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -575,6 +577,7 @@ class AsyncAssistants(AsyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -779,9 +782,9 @@ class AsyncAssistants(AsyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -789,6 +792,7 @@ class AsyncAssistants(AsyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
src/openai/resources/chat/completions/completions.py
@@ -411,9 +411,9 @@ class Completions(SyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -421,6 +421,7 @@ class Completions(SyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: An object specifying the format that the model must output.
@@ -721,9 +722,9 @@ class Completions(SyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -731,6 +732,7 @@ class Completions(SyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: An object specifying the format that the model must output.
@@ -1022,9 +1024,9 @@ class Completions(SyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1032,6 +1034,7 @@ class Completions(SyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: An object specifying the format that the model must output.
@@ -1894,9 +1897,9 @@ class AsyncCompletions(AsyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1904,6 +1907,7 @@ class AsyncCompletions(AsyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: An object specifying the format that the model must output.
@@ -2204,9 +2208,9 @@ class AsyncCompletions(AsyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -2214,6 +2218,7 @@ class AsyncCompletions(AsyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: An object specifying the format that the model must output.
@@ -2505,9 +2510,9 @@ class AsyncCompletions(AsyncAPIResource):
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -2515,6 +2520,7 @@ class AsyncCompletions(AsyncAPIResource):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: An object specifying the format that the model must output.
src/openai/resources/containers/containers.py
@@ -60,6 +60,7 @@ class Containers(SyncAPIResource):
name: str,
expires_after: container_create_params.ExpiresAfter | Omit = omit,
file_ids: SequenceNotStr[str] | Omit = omit,
+ memory_limit: Literal["1g", "4g", "16g", "64g"] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -77,6 +78,8 @@ class Containers(SyncAPIResource):
file_ids: IDs of files to copy to the container.
+ memory_limit: Optional memory limit for the container. Defaults to "1g".
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -92,6 +95,7 @@ class Containers(SyncAPIResource):
"name": name,
"expires_after": expires_after,
"file_ids": file_ids,
+ "memory_limit": memory_limit,
},
container_create_params.ContainerCreateParams,
),
@@ -256,6 +260,7 @@ class AsyncContainers(AsyncAPIResource):
name: str,
expires_after: container_create_params.ExpiresAfter | Omit = omit,
file_ids: SequenceNotStr[str] | Omit = omit,
+ memory_limit: Literal["1g", "4g", "16g", "64g"] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -273,6 +278,8 @@ class AsyncContainers(AsyncAPIResource):
file_ids: IDs of files to copy to the container.
+ memory_limit: Optional memory limit for the container. Defaults to "1g".
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -288,6 +295,7 @@ class AsyncContainers(AsyncAPIResource):
"name": name,
"expires_after": expires_after,
"file_ids": file_ids,
+ "memory_limit": memory_limit,
},
container_create_params.ContainerCreateParams,
),
src/openai/resources/realtime/calls.py
@@ -199,15 +199,20 @@ class Calls(SyncAPIResource):
limit, the conversation be truncated, meaning messages (starting from the
oldest) will not be included in the model's context. A 32k context model with
4,096 max output tokens can only include 28,224 tokens in the context before
- truncation occurs. Clients can configure truncation behavior to truncate with a
- lower max token limit, which is an effective way to control token usage and
- cost. Truncation will reduce the number of cached tokens on the next turn
- (busting the cache), since messages are dropped from the beginning of the
- context. However, clients can also configure truncation to retain messages up to
- a fraction of the maximum context size, which will reduce the need for future
- truncations and thus improve the cache rate. Truncation can be disabled
- entirely, which means the server will never truncate but would instead return an
- error if the conversation exceeds the model's input token limit.
+ truncation occurs.
+
+ Clients can configure truncation behavior to truncate with a lower max token
+ limit, which is an effective way to control token usage and cost.
+
+ Truncation will reduce the number of cached tokens on the next turn (busting the
+ cache), since messages are dropped from the beginning of the context. However,
+ clients can also configure truncation to retain messages up to a fraction of the
+ maximum context size, which will reduce the need for future truncations and thus
+ improve the cache rate.
+
+ Truncation can be disabled entirely, which means the server will never truncate
+ but would instead return an error if the conversation exceeds the model's input
+ token limit.
extra_headers: Send extra headers
@@ -519,15 +524,20 @@ class AsyncCalls(AsyncAPIResource):
limit, the conversation be truncated, meaning messages (starting from the
oldest) will not be included in the model's context. A 32k context model with
4,096 max output tokens can only include 28,224 tokens in the context before
- truncation occurs. Clients can configure truncation behavior to truncate with a
- lower max token limit, which is an effective way to control token usage and
- cost. Truncation will reduce the number of cached tokens on the next turn
- (busting the cache), since messages are dropped from the beginning of the
- context. However, clients can also configure truncation to retain messages up to
- a fraction of the maximum context size, which will reduce the need for future
- truncations and thus improve the cache rate. Truncation can be disabled
- entirely, which means the server will never truncate but would instead return an
- error if the conversation exceeds the model's input token limit.
+ truncation occurs.
+
+ Clients can configure truncation behavior to truncate with a lower max token
+ limit, which is an effective way to control token usage and cost.
+
+ Truncation will reduce the number of cached tokens on the next turn (busting the
+ cache), since messages are dropped from the beginning of the context. However,
+ clients can also configure truncation to retain messages up to a fraction of the
+ maximum context size, which will reduce the need for future truncations and thus
+ improve the cache rate.
+
+ Truncation can be disabled entirely, which means the server will never truncate
+ but would instead return an error if the conversation exceeds the model's input
+ token limit.
extra_headers: Send extra headers
src/openai/resources/realtime/realtime.py
@@ -829,7 +829,7 @@ class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
def clear(self, *, event_id: str | Omit = omit) -> None:
- """**WebRTC Only:** Emit to cut off the current audio response.
+ """**WebRTC/SIP Only:** Emit to cut off the current audio response.
This will trigger the server to
stop generating audio and emit a `output_audio_buffer.cleared` event. This
@@ -1066,7 +1066,7 @@ class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource)
class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
async def clear(self, *, event_id: str | Omit = omit) -> None:
- """**WebRTC Only:** Emit to cut off the current audio response.
+ """**WebRTC/SIP Only:** Emit to cut off the current audio response.
This will trigger the server to
stop generating audio and emit a `output_audio_buffer.cleared` event. This
src/openai/resources/responses/responses.py
@@ -34,11 +34,10 @@ from .input_tokens import (
AsyncInputTokensWithStreamingResponse,
)
from ..._base_client import make_request_options
-from ...types.responses import response_create_params, response_retrieve_params
-from ...lib._parsing._responses import (
- TextFormatT,
- parse_response,
- type_to_text_format_param as _type_to_text_format_param,
+from ...types.responses import (
+ response_create_params,
+ response_compact_params,
+ response_retrieve_params,
)
from ...types.responses.response import Response
from ...types.responses.tool_param import ToolParam, ParseableToolParam
@@ -46,11 +45,13 @@ from ...types.shared_params.metadata import Metadata
from ...types.shared_params.reasoning import Reasoning
from ...types.responses.parsed_response import ParsedResponse
from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager
+from ...types.responses.compacted_response import CompactedResponse
from ...types.responses.response_includable import ResponseIncludable
from ...types.shared_params.responses_model import ResponsesModel
from ...types.responses.response_input_param import ResponseInputParam
from ...types.responses.response_prompt_param import ResponsePromptParam
from ...types.responses.response_stream_event import ResponseStreamEvent
+from ...types.responses.response_input_item_param import ResponseInputItemParam
from ...types.responses.response_text_config_param import ResponseTextConfigParam
__all__ = ["Responses", "AsyncResponses"]
@@ -1517,6 +1518,154 @@ class Responses(SyncAPIResource):
cast_to=Response,
)
+ def compact(
+ self,
+ *,
+ input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+ instructions: Optional[str] | Omit = omit,
+ model: Union[
+ Literal[
+ "gpt-5.1",
+ "gpt-5.1-2025-11-13",
+ "gpt-5.1-codex",
+ "gpt-5.1-mini",
+ "gpt-5.1-chat-latest",
+ "gpt-5",
+ "gpt-5-mini",
+ "gpt-5-nano",
+ "gpt-5-2025-08-07",
+ "gpt-5-mini-2025-08-07",
+ "gpt-5-nano-2025-08-07",
+ "gpt-5-chat-latest",
+ "gpt-4.1",
+ "gpt-4.1-mini",
+ "gpt-4.1-nano",
+ "gpt-4.1-2025-04-14",
+ "gpt-4.1-mini-2025-04-14",
+ "gpt-4.1-nano-2025-04-14",
+ "o4-mini",
+ "o4-mini-2025-04-16",
+ "o3",
+ "o3-2025-04-16",
+ "o3-mini",
+ "o3-mini-2025-01-31",
+ "o1",
+ "o1-2024-12-17",
+ "o1-preview",
+ "o1-preview-2024-09-12",
+ "o1-mini",
+ "o1-mini-2024-09-12",
+ "gpt-4o",
+ "gpt-4o-2024-11-20",
+ "gpt-4o-2024-08-06",
+ "gpt-4o-2024-05-13",
+ "gpt-4o-audio-preview",
+ "gpt-4o-audio-preview-2024-10-01",
+ "gpt-4o-audio-preview-2024-12-17",
+ "gpt-4o-audio-preview-2025-06-03",
+ "gpt-4o-mini-audio-preview",
+ "gpt-4o-mini-audio-preview-2024-12-17",
+ "gpt-4o-search-preview",
+ "gpt-4o-mini-search-preview",
+ "gpt-4o-search-preview-2025-03-11",
+ "gpt-4o-mini-search-preview-2025-03-11",
+ "chatgpt-4o-latest",
+ "codex-mini-latest",
+ "gpt-4o-mini",
+ "gpt-4o-mini-2024-07-18",
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0301",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ "o1-pro",
+ "o1-pro-2025-03-19",
+ "o3-pro",
+ "o3-pro-2025-06-10",
+ "o3-deep-research",
+ "o3-deep-research-2025-06-26",
+ "o4-mini-deep-research",
+ "o4-mini-deep-research-2025-06-26",
+ "computer-use-preview",
+ "computer-use-preview-2025-03-11",
+ "gpt-5-codex",
+ "gpt-5-pro",
+ "gpt-5-pro-2025-10-06",
+ "gpt-5.1-codex-max",
+ ],
+ str,
+ None,
+ ]
+ | Omit = omit,
+ previous_response_id: Optional[str] | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> CompactedResponse:
+ """
+ Compact conversation
+
+ Args:
+ input: Text, image, or file inputs to the model, used to generate a response
+
+ instructions: A system (or developer) message inserted into the model's context. When used
+ along with `previous_response_id`, the instructions from a previous response
+ will not be carried over to the next response. This makes it simple to swap out
+ system (or developer) messages in new responses.
+
+ model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
+ wide range of models with different capabilities, performance characteristics,
+ and price points. Refer to the
+ [model guide](https://platform.openai.com/docs/models) to browse and compare
+ available models.
+
+ previous_response_id: The unique ID of the previous response to the model. Use this to create
+ multi-turn conversations. Learn more about
+ [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+ Cannot be used in conjunction with `conversation`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/responses/compact",
+ body=maybe_transform(
+ {
+ "input": input,
+ "instructions": instructions,
+ "model": model,
+ "previous_response_id": previous_response_id,
+ },
+ response_compact_params.ResponseCompactParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=CompactedResponse,
+ )
+
class AsyncResponses(AsyncAPIResource):
@cached_property
@@ -2983,6 +3132,154 @@ class AsyncResponses(AsyncAPIResource):
cast_to=Response,
)
+ async def compact(
+ self,
+ *,
+ input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+ instructions: Optional[str] | Omit = omit,
+ model: Union[
+ Literal[
+ "gpt-5.1",
+ "gpt-5.1-2025-11-13",
+ "gpt-5.1-codex",
+ "gpt-5.1-mini",
+ "gpt-5.1-chat-latest",
+ "gpt-5",
+ "gpt-5-mini",
+ "gpt-5-nano",
+ "gpt-5-2025-08-07",
+ "gpt-5-mini-2025-08-07",
+ "gpt-5-nano-2025-08-07",
+ "gpt-5-chat-latest",
+ "gpt-4.1",
+ "gpt-4.1-mini",
+ "gpt-4.1-nano",
+ "gpt-4.1-2025-04-14",
+ "gpt-4.1-mini-2025-04-14",
+ "gpt-4.1-nano-2025-04-14",
+ "o4-mini",
+ "o4-mini-2025-04-16",
+ "o3",
+ "o3-2025-04-16",
+ "o3-mini",
+ "o3-mini-2025-01-31",
+ "o1",
+ "o1-2024-12-17",
+ "o1-preview",
+ "o1-preview-2024-09-12",
+ "o1-mini",
+ "o1-mini-2024-09-12",
+ "gpt-4o",
+ "gpt-4o-2024-11-20",
+ "gpt-4o-2024-08-06",
+ "gpt-4o-2024-05-13",
+ "gpt-4o-audio-preview",
+ "gpt-4o-audio-preview-2024-10-01",
+ "gpt-4o-audio-preview-2024-12-17",
+ "gpt-4o-audio-preview-2025-06-03",
+ "gpt-4o-mini-audio-preview",
+ "gpt-4o-mini-audio-preview-2024-12-17",
+ "gpt-4o-search-preview",
+ "gpt-4o-mini-search-preview",
+ "gpt-4o-search-preview-2025-03-11",
+ "gpt-4o-mini-search-preview-2025-03-11",
+ "chatgpt-4o-latest",
+ "codex-mini-latest",
+ "gpt-4o-mini",
+ "gpt-4o-mini-2024-07-18",
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0301",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ "o1-pro",
+ "o1-pro-2025-03-19",
+ "o3-pro",
+ "o3-pro-2025-06-10",
+ "o3-deep-research",
+ "o3-deep-research-2025-06-26",
+ "o4-mini-deep-research",
+ "o4-mini-deep-research-2025-06-26",
+ "computer-use-preview",
+ "computer-use-preview-2025-03-11",
+ "gpt-5-codex",
+ "gpt-5-pro",
+ "gpt-5-pro-2025-10-06",
+ "gpt-5.1-codex-max",
+ ],
+ str,
+ None,
+ ]
+ | Omit = omit,
+ previous_response_id: Optional[str] | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> CompactedResponse:
+ """
+ Compact conversation
+
+ Args:
+ input: Text, image, or file inputs to the model, used to generate a response
+
+ instructions: A system (or developer) message inserted into the model's context. When used
+ along with `previous_response_id`, the instructions from a previous response
+ will not be carried over to the next response. This makes it simple to swap out
+ system (or developer) messages in new responses.
+
+ model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
+ wide range of models with different capabilities, performance characteristics,
+ and price points. Refer to the
+ [model guide](https://platform.openai.com/docs/models) to browse and compare
+ available models.
+
+ previous_response_id: The unique ID of the previous response to the model. Use this to create
+ multi-turn conversations. Learn more about
+ [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+ Cannot be used in conjunction with `conversation`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/responses/compact",
+ body=await async_maybe_transform(
+ {
+ "input": input,
+ "instructions": instructions,
+ "model": model,
+ "previous_response_id": previous_response_id,
+ },
+ response_compact_params.ResponseCompactParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=CompactedResponse,
+ )
+
class ResponsesWithRawResponse:
def __init__(self, responses: Responses) -> None:
@@ -3000,9 +3297,6 @@ class ResponsesWithRawResponse:
self.cancel = _legacy_response.to_raw_response_wrapper(
responses.cancel,
)
- self.parse = _legacy_response.to_raw_response_wrapper(
- responses.parse,
- )
@cached_property
def input_items(self) -> InputItemsWithRawResponse:
@@ -3029,9 +3323,6 @@ class AsyncResponsesWithRawResponse:
self.cancel = _legacy_response.async_to_raw_response_wrapper(
responses.cancel,
)
- self.parse = _legacy_response.async_to_raw_response_wrapper(
- responses.parse,
- )
@cached_property
def input_items(self) -> AsyncInputItemsWithRawResponse:
@@ -3058,6 +3349,9 @@ class ResponsesWithStreamingResponse:
self.cancel = to_streamed_response_wrapper(
responses.cancel,
)
+ self.compact = to_streamed_response_wrapper(
+ responses.compact,
+ )
@cached_property
def input_items(self) -> InputItemsWithStreamingResponse:
@@ -3084,6 +3378,9 @@ class AsyncResponsesWithStreamingResponse:
self.cancel = async_to_streamed_response_wrapper(
responses.cancel,
)
+ self.compact = async_to_streamed_response_wrapper(
+ responses.compact,
+ )
@cached_property
def input_items(self) -> AsyncInputItemsWithStreamingResponse:
src/openai/resources/videos.py
@@ -84,11 +84,13 @@ class Videos(SyncAPIResource):
input_reference: Optional image reference that guides generation.
- model: The video generation model to use. Defaults to `sora-2`.
+ model: The video generation model to use (allowed values: sora-2, sora-2-pro). Defaults
+ to `sora-2`.
- seconds: Clip duration in seconds. Defaults to 4 seconds.
+ seconds: Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds.
- size: Output resolution formatted as width x height. Defaults to 720x1280.
+ size: Output resolution formatted as width x height (allowed values: 720x1280,
+ 1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
extra_headers: Send extra headers
@@ -437,11 +439,13 @@ class AsyncVideos(AsyncAPIResource):
input_reference: Optional image reference that guides generation.
- model: The video generation model to use. Defaults to `sora-2`.
+ model: The video generation model to use (allowed values: sora-2, sora-2-pro). Defaults
+ to `sora-2`.
- seconds: Clip duration in seconds. Defaults to 4 seconds.
+ seconds: Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds.
- size: Output resolution formatted as width x height. Defaults to 720x1280.
+ size: Output resolution formatted as width x height (allowed values: 720x1280,
+ 1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
extra_headers: Send extra headers
src/openai/types/beta/threads/run_create_params.py
@@ -111,9 +111,9 @@ class RunCreateParamsBase(TypedDict, total=False):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -121,6 +121,7 @@ class RunCreateParamsBase(TypedDict, total=False):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
response_format: Optional[AssistantResponseFormatOptionParam]
src/openai/types/beta/assistant_create_params.py
@@ -62,9 +62,9 @@ class AssistantCreateParams(TypedDict, total=False):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -72,6 +72,7 @@ class AssistantCreateParams(TypedDict, total=False):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
response_format: Optional[AssistantResponseFormatOptionParam]
src/openai/types/beta/assistant_update_params.py
@@ -97,9 +97,9 @@ class AssistantUpdateParams(TypedDict, total=False):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -107,6 +107,7 @@ class AssistantUpdateParams(TypedDict, total=False):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
response_format: Optional[AssistantResponseFormatOptionParam]
src/openai/types/chat/completion_create_params.py
@@ -197,9 +197,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -207,6 +207,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
response_format: ResponseFormat
src/openai/types/evals/create_eval_completions_run_data_source.py
@@ -172,9 +172,9 @@ class SamplingParams(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -182,6 +182,7 @@ class SamplingParams(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
response_format: Optional[SamplingParamsResponseFormat] = None
src/openai/types/evals/create_eval_completions_run_data_source_param.py
@@ -168,9 +168,9 @@ class SamplingParams(TypedDict, total=False):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -178,6 +178,7 @@ class SamplingParams(TypedDict, total=False):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
response_format: SamplingParamsResponseFormat
src/openai/types/evals/run_cancel_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: Optional[int] = None
src/openai/types/evals/run_create_params.py
@@ -116,9 +116,9 @@ class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -126,6 +126,7 @@ class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
temperature: Optional[float]
@@ -263,9 +264,9 @@ class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -273,6 +274,7 @@ class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: int
src/openai/types/evals/run_create_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: Optional[int] = None
src/openai/types/evals/run_list_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: Optional[int] = None
src/openai/types/evals/run_retrieve_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: Optional[int] = None
src/openai/types/graders/score_model_grader.py
@@ -67,9 +67,9 @@ class SamplingParams(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -77,6 +77,7 @@ class SamplingParams(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: Optional[int] = None
src/openai/types/graders/score_model_grader_param.py
@@ -73,9 +73,9 @@ class SamplingParams(TypedDict, total=False):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -83,6 +83,7 @@ class SamplingParams(TypedDict, total=False):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: Optional[int]
src/openai/types/realtime/__init__.py
@@ -175,6 +175,9 @@ from .realtime_response_usage_input_token_details import (
from .response_function_call_arguments_done_event import (
ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
)
+from .input_audio_buffer_dtmf_event_received_event import (
+ InputAudioBufferDtmfEventReceivedEvent as InputAudioBufferDtmfEventReceivedEvent,
+)
from .realtime_conversation_item_assistant_message import (
RealtimeConversationItemAssistantMessage as RealtimeConversationItemAssistantMessage,
)
src/openai/types/realtime/call_accept_params.py
@@ -110,13 +110,18 @@ class CallAcceptParams(TypedDict, total=False):
limit, the conversation be truncated, meaning messages (starting from the
oldest) will not be included in the model's context. A 32k context model with
4,096 max output tokens can only include 28,224 tokens in the context before
- truncation occurs. Clients can configure truncation behavior to truncate with a
- lower max token limit, which is an effective way to control token usage and
- cost. Truncation will reduce the number of cached tokens on the next turn
- (busting the cache), since messages are dropped from the beginning of the
- context. However, clients can also configure truncation to retain messages up to
- a fraction of the maximum context size, which will reduce the need for future
- truncations and thus improve the cache rate. Truncation can be disabled
- entirely, which means the server will never truncate but would instead return an
- error if the conversation exceeds the model's input token limit.
+ truncation occurs.
+
+ Clients can configure truncation behavior to truncate with a lower max token
+ limit, which is an effective way to control token usage and cost.
+
+ Truncation will reduce the number of cached tokens on the next turn (busting the
+ cache), since messages are dropped from the beginning of the context. However,
+ clients can also configure truncation to retain messages up to a fraction of the
+ maximum context size, which will reduce the need for future truncations and thus
+ improve the cache rate.
+
+ Truncation can be disabled entirely, which means the server will never truncate
+ but would instead return an error if the conversation exceeds the model's input
+ token limit.
"""
src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferDtmfEventReceivedEvent"]
+
+
+class InputAudioBufferDtmfEventReceivedEvent(BaseModel):
+ event: str
+ """The telephone keypad that was pressed by the user."""
+
+ received_at: int
+ """UTC Unix Timestamp when DTMF Event was received by server."""
+
+ type: Literal["input_audio_buffer.dtmf_event_received"]
+ """The event type, must be `input_audio_buffer.dtmf_event_received`."""
src/openai/types/realtime/realtime_audio_input_turn_detection.py
@@ -14,9 +14,14 @@ class ServerVad(BaseModel):
"""Type of turn detection, `server_vad` to turn on simple Server VAD."""
create_response: Optional[bool] = None
- """
- Whether or not to automatically generate a response when a VAD stop event
+ """Whether or not to automatically generate a response when a VAD stop event
occurs.
+
+ If `interrupt_response` is set to `false` this may fail to create a response if
+ the model is already responding.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
idle_timeout_ms: Optional[int] = None
@@ -37,9 +42,13 @@ class ServerVad(BaseModel):
interrupt_response: Optional[bool] = None
"""
- Whether or not to automatically interrupt any ongoing response with output to
- the default conversation (i.e. `conversation` of `auto`) when a VAD start event
- occurs.
+ Whether or not to automatically interrupt (cancel) any ongoing response with
+ output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+ start event occurs. If `true` then the response will be cancelled, otherwise it
+ will continue until complete.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
prefix_padding_ms: Optional[int] = None
src/openai/types/realtime/realtime_audio_input_turn_detection_param.py
@@ -13,9 +13,14 @@ class ServerVad(TypedDict, total=False):
"""Type of turn detection, `server_vad` to turn on simple Server VAD."""
create_response: bool
- """
- Whether or not to automatically generate a response when a VAD stop event
+ """Whether or not to automatically generate a response when a VAD stop event
occurs.
+
+ If `interrupt_response` is set to `false` this may fail to create a response if
+ the model is already responding.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
idle_timeout_ms: Optional[int]
@@ -36,9 +41,13 @@ class ServerVad(TypedDict, total=False):
interrupt_response: bool
"""
- Whether or not to automatically interrupt any ongoing response with output to
- the default conversation (i.e. `conversation` of `auto`) when a VAD start event
- occurs.
+ Whether or not to automatically interrupt (cancel) any ongoing response with
+ output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+ start event occurs. If `true` then the response will be cancelled, otherwise it
+ will continue until complete.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
prefix_padding_ms: int
src/openai/types/realtime/realtime_server_event.py
@@ -42,6 +42,7 @@ from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaE
from .input_audio_buffer_speech_started_event import InputAudioBufferSpeechStartedEvent
from .input_audio_buffer_speech_stopped_event import InputAudioBufferSpeechStoppedEvent
from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .input_audio_buffer_dtmf_event_received_event import InputAudioBufferDtmfEventReceivedEvent
from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
from .conversation_item_input_audio_transcription_segment import ConversationItemInputAudioTranscriptionSegment
from .conversation_item_input_audio_transcription_delta_event import ConversationItemInputAudioTranscriptionDeltaEvent
@@ -116,6 +117,7 @@ RealtimeServerEvent: TypeAlias = Annotated[
RealtimeErrorEvent,
InputAudioBufferClearedEvent,
InputAudioBufferCommittedEvent,
+ InputAudioBufferDtmfEventReceivedEvent,
InputAudioBufferSpeechStartedEvent,
InputAudioBufferSpeechStoppedEvent,
RateLimitsUpdatedEvent,
src/openai/types/realtime/realtime_session_create_request.py
@@ -110,13 +110,18 @@ class RealtimeSessionCreateRequest(BaseModel):
limit, the conversation be truncated, meaning messages (starting from the
oldest) will not be included in the model's context. A 32k context model with
4,096 max output tokens can only include 28,224 tokens in the context before
- truncation occurs. Clients can configure truncation behavior to truncate with a
- lower max token limit, which is an effective way to control token usage and
- cost. Truncation will reduce the number of cached tokens on the next turn
- (busting the cache), since messages are dropped from the beginning of the
- context. However, clients can also configure truncation to retain messages up to
- a fraction of the maximum context size, which will reduce the need for future
- truncations and thus improve the cache rate. Truncation can be disabled
- entirely, which means the server will never truncate but would instead return an
- error if the conversation exceeds the model's input token limit.
+ truncation occurs.
+
+ Clients can configure truncation behavior to truncate with a lower max token
+ limit, which is an effective way to control token usage and cost.
+
+ Truncation will reduce the number of cached tokens on the next turn (busting the
+ cache), since messages are dropped from the beginning of the context. However,
+ clients can also configure truncation to retain messages up to a fraction of the
+ maximum context size, which will reduce the need for future truncations and thus
+ improve the cache rate.
+
+ Truncation can be disabled entirely, which means the server will never truncate
+ but would instead return an error if the conversation exceeds the model's input
+ token limit.
"""
src/openai/types/realtime/realtime_session_create_request_param.py
@@ -110,13 +110,18 @@ class RealtimeSessionCreateRequestParam(TypedDict, total=False):
limit, the conversation be truncated, meaning messages (starting from the
oldest) will not be included in the model's context. A 32k context model with
4,096 max output tokens can only include 28,224 tokens in the context before
- truncation occurs. Clients can configure truncation behavior to truncate with a
- lower max token limit, which is an effective way to control token usage and
- cost. Truncation will reduce the number of cached tokens on the next turn
- (busting the cache), since messages are dropped from the beginning of the
- context. However, clients can also configure truncation to retain messages up to
- a fraction of the maximum context size, which will reduce the need for future
- truncations and thus improve the cache rate. Truncation can be disabled
- entirely, which means the server will never truncate but would instead return an
- error if the conversation exceeds the model's input token limit.
+ truncation occurs.
+
+ Clients can configure truncation behavior to truncate with a lower max token
+ limit, which is an effective way to control token usage and cost.
+
+ Truncation will reduce the number of cached tokens on the next turn (busting the
+ cache), since messages are dropped from the beginning of the context. However,
+ clients can also configure truncation to retain messages up to a fraction of the
+ maximum context size, which will reduce the need for future truncations and thus
+ improve the cache rate.
+
+ Truncation can be disabled entirely, which means the server will never truncate
+ but would instead return an error if the conversation exceeds the model's input
+ token limit.
"""
src/openai/types/realtime/realtime_session_create_response.py
@@ -53,9 +53,14 @@ class AudioInputTurnDetectionServerVad(BaseModel):
"""Type of turn detection, `server_vad` to turn on simple Server VAD."""
create_response: Optional[bool] = None
- """
- Whether or not to automatically generate a response when a VAD stop event
+ """Whether or not to automatically generate a response when a VAD stop event
occurs.
+
+ If `interrupt_response` is set to `false` this may fail to create a response if
+ the model is already responding.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
idle_timeout_ms: Optional[int] = None
@@ -76,9 +81,13 @@ class AudioInputTurnDetectionServerVad(BaseModel):
interrupt_response: Optional[bool] = None
"""
- Whether or not to automatically interrupt any ongoing response with output to
- the default conversation (i.e. `conversation` of `auto`) when a VAD start event
- occurs.
+ Whether or not to automatically interrupt (cancel) any ongoing response with
+ output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+ start event occurs. If `true` then the response will be cancelled, otherwise it
+ will continue until complete.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
prefix_padding_ms: Optional[int] = None
@@ -463,13 +472,18 @@ class RealtimeSessionCreateResponse(BaseModel):
limit, the conversation be truncated, meaning messages (starting from the
oldest) will not be included in the model's context. A 32k context model with
4,096 max output tokens can only include 28,224 tokens in the context before
- truncation occurs. Clients can configure truncation behavior to truncate with a
- lower max token limit, which is an effective way to control token usage and
- cost. Truncation will reduce the number of cached tokens on the next turn
- (busting the cache), since messages are dropped from the beginning of the
- context. However, clients can also configure truncation to retain messages up to
- a fraction of the maximum context size, which will reduce the need for future
- truncations and thus improve the cache rate. Truncation can be disabled
- entirely, which means the server will never truncate but would instead return an
- error if the conversation exceeds the model's input token limit.
+ truncation occurs.
+
+ Clients can configure truncation behavior to truncate with a lower max token
+ limit, which is an effective way to control token usage and cost.
+
+ Truncation will reduce the number of cached tokens on the next turn (busting the
+ cache), since messages are dropped from the beginning of the context. However,
+ clients can also configure truncation to retain messages up to a fraction of the
+ maximum context size, which will reduce the need for future truncations and thus
+ improve the cache rate.
+
+ Truncation can be disabled entirely, which means the server will never truncate
+ but would instead return an error if the conversation exceeds the model's input
+ token limit.
"""
src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py
@@ -14,9 +14,14 @@ class ServerVad(BaseModel):
"""Type of turn detection, `server_vad` to turn on simple Server VAD."""
create_response: Optional[bool] = None
- """
- Whether or not to automatically generate a response when a VAD stop event
+ """Whether or not to automatically generate a response when a VAD stop event
occurs.
+
+ If `interrupt_response` is set to `false` this may fail to create a response if
+ the model is already responding.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
idle_timeout_ms: Optional[int] = None
@@ -37,9 +42,13 @@ class ServerVad(BaseModel):
interrupt_response: Optional[bool] = None
"""
- Whether or not to automatically interrupt any ongoing response with output to
- the default conversation (i.e. `conversation` of `auto`) when a VAD start event
- occurs.
+ Whether or not to automatically interrupt (cancel) any ongoing response with
+ output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+ start event occurs. If `true` then the response will be cancelled, otherwise it
+ will continue until complete.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
prefix_padding_ms: Optional[int] = None
src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py
@@ -13,9 +13,14 @@ class ServerVad(TypedDict, total=False):
"""Type of turn detection, `server_vad` to turn on simple Server VAD."""
create_response: bool
- """
- Whether or not to automatically generate a response when a VAD stop event
+ """Whether or not to automatically generate a response when a VAD stop event
occurs.
+
+ If `interrupt_response` is set to `false` this may fail to create a response if
+ the model is already responding.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
idle_timeout_ms: Optional[int]
@@ -36,9 +41,13 @@ class ServerVad(TypedDict, total=False):
interrupt_response: bool
"""
- Whether or not to automatically interrupt any ongoing response with output to
- the default conversation (i.e. `conversation` of `auto`) when a VAD start event
- occurs.
+ Whether or not to automatically interrupt (cancel) any ongoing response with
+ output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+ start event occurs. If `true` then the response will be cancelled, otherwise it
+ will continue until complete.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
prefix_padding_ms: int
src/openai/types/responses/__init__.py
@@ -28,6 +28,7 @@ from .file_search_tool import FileSearchTool as FileSearchTool
from .custom_tool_param import CustomToolParam as CustomToolParam
from .tool_choice_shell import ToolChoiceShell as ToolChoiceShell
from .tool_choice_types import ToolChoiceTypes as ToolChoiceTypes
+from .compacted_response import CompactedResponse as CompactedResponse
from .easy_input_message import EasyInputMessage as EasyInputMessage
from .response_item_list import ResponseItemList as ResponseItemList
from .tool_choice_custom import ToolChoiceCustom as ToolChoiceCustom
@@ -60,6 +61,7 @@ from .input_item_list_params import InputItemListParams as InputItemListParams
from .response_create_params import ResponseCreateParams as ResponseCreateParams
from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
from .response_input_content import ResponseInputContent as ResponseInputContent
+from .response_compact_params import ResponseCompactParams as ResponseCompactParams
from .response_output_message import ResponseOutputMessage as ResponseOutputMessage
from .response_output_refusal import ResponseOutputRefusal as ResponseOutputRefusal
from .response_reasoning_item import ResponseReasoningItem as ResponseReasoningItem
@@ -69,6 +71,7 @@ from .tool_choice_types_param import ToolChoiceTypesParam as ToolChoiceTypesPara
from .web_search_preview_tool import WebSearchPreviewTool as WebSearchPreviewTool
from .easy_input_message_param import EasyInputMessageParam as EasyInputMessageParam
from .input_token_count_params import InputTokenCountParams as InputTokenCountParams
+from .response_compaction_item import ResponseCompactionItem as ResponseCompactionItem
from .response_completed_event import ResponseCompletedEvent as ResponseCompletedEvent
from .response_retrieve_params import ResponseRetrieveParams as ResponseRetrieveParams
from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
@@ -108,6 +111,7 @@ from .response_reasoning_item_param import ResponseReasoningItemParam as Respons
from .tool_choice_apply_patch_param import ToolChoiceApplyPatchParam as ToolChoiceApplyPatchParam
from .web_search_preview_tool_param import WebSearchPreviewToolParam as WebSearchPreviewToolParam
from .response_apply_patch_tool_call import ResponseApplyPatchToolCall as ResponseApplyPatchToolCall
+from .response_compaction_item_param import ResponseCompactionItemParam as ResponseCompactionItemParam
from .response_file_search_tool_call import ResponseFileSearchToolCall as ResponseFileSearchToolCall
from .response_mcp_call_failed_event import ResponseMcpCallFailedEvent as ResponseMcpCallFailedEvent
from .response_custom_tool_call_param import ResponseCustomToolCallParam as ResponseCustomToolCallParam
@@ -133,6 +137,7 @@ from .response_input_message_content_list import ResponseInputMessageContentList
from .response_mcp_call_in_progress_event import ResponseMcpCallInProgressEvent as ResponseMcpCallInProgressEvent
from .response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent as ResponseReasoningTextDeltaEvent
from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam as ResponseCompactionItemParamParam
from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam as ResponseFileSearchToolCallParam
from .response_mcp_list_tools_failed_event import ResponseMcpListToolsFailedEvent as ResponseMcpListToolsFailedEvent
from .response_apply_patch_tool_call_output import ResponseApplyPatchToolCallOutput as ResponseApplyPatchToolCallOutput
src/openai/types/responses/compacted_response.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_usage import ResponseUsage
+from .response_output_item import ResponseOutputItem
+
+__all__ = ["CompactedResponse"]
+
+
+class CompactedResponse(BaseModel):
+ id: str
+ """The unique identifier for the compacted response."""
+
+ created_at: int
+ """Unix timestamp (in seconds) when the compacted conversation was created."""
+
+ object: Literal["response.compaction"]
+ """The object type. Always `response.compaction`."""
+
+ output: List[ResponseOutputItem]
+ """The compacted list of output items.
+
+ This is a list of all user messages, followed by a single compaction item.
+ """
+
+ usage: ResponseUsage
+ """
+ Token accounting for the compaction pass, including cached, reasoning, and total
+ tokens.
+ """
src/openai/types/responses/parsed_response.py
@@ -6,7 +6,6 @@ from typing_extensions import Annotated, TypeAlias
from ..._utils import PropertyInfo
from .response import Response
from ..._models import GenericModel
-from ..._utils._transform import PropertyInfo
from .response_output_item import (
McpCall,
McpListTools,
@@ -19,6 +18,7 @@ from .response_output_text import ResponseOutputText
from .response_output_message import ResponseOutputMessage
from .response_output_refusal import ResponseOutputRefusal
from .response_reasoning_item import ResponseReasoningItem
+from .response_compaction_item import ResponseCompactionItem
from .response_custom_tool_call import ResponseCustomToolCall
from .response_computer_tool_call import ResponseComputerToolCall
from .response_function_tool_call import ResponseFunctionToolCall
@@ -79,6 +79,7 @@ ParsedResponseOutputItem: TypeAlias = Annotated[
McpListTools,
ResponseCodeInterpreterToolCall,
ResponseCustomToolCall,
+ ResponseCompactionItem,
ResponseFunctionShellToolCall,
ResponseFunctionShellToolCallOutput,
ResponseApplyPatchToolCall,
src/openai/types/responses/response_compact_params.py
@@ -0,0 +1,126 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, TypedDict
+
+from .response_input_item_param import ResponseInputItemParam
+
+__all__ = ["ResponseCompactParams"]
+
+
+class ResponseCompactParams(TypedDict, total=False):
+ input: Union[str, Iterable[ResponseInputItemParam], None]
+ """Text, image, or file inputs to the model, used to generate a response"""
+
+ instructions: Optional[str]
+ """
+ A system (or developer) message inserted into the model's context. When used
+ along with `previous_response_id`, the instructions from a previous response
+ will not be carried over to the next response. This makes it simple to swap out
+ system (or developer) messages in new responses.
+ """
+
+ model: Union[
+ Literal[
+ "gpt-5.1",
+ "gpt-5.1-2025-11-13",
+ "gpt-5.1-codex",
+ "gpt-5.1-mini",
+ "gpt-5.1-chat-latest",
+ "gpt-5",
+ "gpt-5-mini",
+ "gpt-5-nano",
+ "gpt-5-2025-08-07",
+ "gpt-5-mini-2025-08-07",
+ "gpt-5-nano-2025-08-07",
+ "gpt-5-chat-latest",
+ "gpt-4.1",
+ "gpt-4.1-mini",
+ "gpt-4.1-nano",
+ "gpt-4.1-2025-04-14",
+ "gpt-4.1-mini-2025-04-14",
+ "gpt-4.1-nano-2025-04-14",
+ "o4-mini",
+ "o4-mini-2025-04-16",
+ "o3",
+ "o3-2025-04-16",
+ "o3-mini",
+ "o3-mini-2025-01-31",
+ "o1",
+ "o1-2024-12-17",
+ "o1-preview",
+ "o1-preview-2024-09-12",
+ "o1-mini",
+ "o1-mini-2024-09-12",
+ "gpt-4o",
+ "gpt-4o-2024-11-20",
+ "gpt-4o-2024-08-06",
+ "gpt-4o-2024-05-13",
+ "gpt-4o-audio-preview",
+ "gpt-4o-audio-preview-2024-10-01",
+ "gpt-4o-audio-preview-2024-12-17",
+ "gpt-4o-audio-preview-2025-06-03",
+ "gpt-4o-mini-audio-preview",
+ "gpt-4o-mini-audio-preview-2024-12-17",
+ "gpt-4o-search-preview",
+ "gpt-4o-mini-search-preview",
+ "gpt-4o-search-preview-2025-03-11",
+ "gpt-4o-mini-search-preview-2025-03-11",
+ "chatgpt-4o-latest",
+ "codex-mini-latest",
+ "gpt-4o-mini",
+ "gpt-4o-mini-2024-07-18",
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0301",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ "o1-pro",
+ "o1-pro-2025-03-19",
+ "o3-pro",
+ "o3-pro-2025-06-10",
+ "o3-deep-research",
+ "o3-deep-research-2025-06-26",
+ "o4-mini-deep-research",
+ "o4-mini-deep-research-2025-06-26",
+ "computer-use-preview",
+ "computer-use-preview-2025-03-11",
+ "gpt-5-codex",
+ "gpt-5-pro",
+ "gpt-5-pro-2025-10-06",
+ "gpt-5.1-codex-max",
+ ],
+ str,
+ None,
+ ]
+ """Model ID used to generate the response, like `gpt-5` or `o3`.
+
+ OpenAI offers a wide range of models with different capabilities, performance
+ characteristics, and price points. Refer to the
+ [model guide](https://platform.openai.com/docs/models) to browse and compare
+ available models.
+ """
+
+ previous_response_id: Optional[str]
+ """The unique ID of the previous response to the model.
+
+ Use this to create multi-turn conversations. Learn more about
+ [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+ Cannot be used in conjunction with `conversation`.
+ """
src/openai/types/responses/response_compaction_item.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompactionItem"]
+
+
+class ResponseCompactionItem(BaseModel):
+ id: str
+ """The unique ID of the compaction item."""
+
+ encrypted_content: str
+
+ type: Literal["compaction"]
+ """The type of the item. Always `compaction`."""
+
+ created_by: Optional[str] = None
src/openai/types/responses/response_compaction_item_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompactionItemParam"]
+
+
+class ResponseCompactionItemParam(BaseModel):
+ encrypted_content: str
+
+ type: Literal["compaction"]
+ """The type of the item. Always `compaction`."""
+
+ id: Optional[str] = None
+ """The ID of the compaction item."""
src/openai/types/responses/response_compaction_item_param_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseCompactionItemParamParam"]
+
+
+class ResponseCompactionItemParamParam(TypedDict, total=False):
+ encrypted_content: Required[str]
+
+ type: Required[Literal["compaction"]]
+ """The type of the item. Always `compaction`."""
+
+ id: Optional[str]
+ """The ID of the compaction item."""
src/openai/types/responses/response_function_shell_call_output_content.py
@@ -27,10 +27,10 @@ Outcome: TypeAlias = Annotated[Union[OutcomeTimeout, OutcomeExit], PropertyInfo(
class ResponseFunctionShellCallOutputContent(BaseModel):
outcome: Outcome
- """The exit or timeout outcome associated with this chunk."""
+ """The exit or timeout outcome associated with this shell call."""
stderr: str
- """Captured stderr output for this chunk of the shell call."""
+ """Captured stderr output for the shell call."""
stdout: str
- """Captured stdout output for this chunk of the shell call."""
+ """Captured stdout output for the shell call."""
src/openai/types/responses/response_function_shell_call_output_content_param.py
@@ -26,10 +26,10 @@ Outcome: TypeAlias = Union[OutcomeTimeout, OutcomeExit]
class ResponseFunctionShellCallOutputContentParam(TypedDict, total=False):
outcome: Required[Outcome]
- """The exit or timeout outcome associated with this chunk."""
+ """The exit or timeout outcome associated with this shell call."""
stderr: Required[str]
- """Captured stderr output for this chunk of the shell call."""
+ """Captured stderr output for the shell call."""
stdout: Required[str]
- """Captured stdout output for this chunk of the shell call."""
+ """Captured stdout output for the shell call."""
src/openai/types/responses/response_function_shell_tool_call.py
@@ -20,7 +20,7 @@ class Action(BaseModel):
class ResponseFunctionShellToolCall(BaseModel):
id: str
- """The unique ID of the function shell tool call.
+ """The unique ID of the shell tool call.
Populated when this item is returned via API.
"""
@@ -29,7 +29,7 @@ class ResponseFunctionShellToolCall(BaseModel):
"""The shell commands and limits that describe how to run the tool call."""
call_id: str
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
status: Literal["in_progress", "completed", "incomplete"]
"""The status of the shell call.
src/openai/types/responses/response_input_item.py
@@ -12,6 +12,7 @@ from .response_custom_tool_call import ResponseCustomToolCall
from .response_computer_tool_call import ResponseComputerToolCall
from .response_function_tool_call import ResponseFunctionToolCall
from .response_function_web_search import ResponseFunctionWebSearch
+from .response_compaction_item_param import ResponseCompactionItemParam
from .response_file_search_tool_call import ResponseFileSearchToolCall
from .response_custom_tool_call_output import ResponseCustomToolCallOutput
from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
@@ -215,13 +216,13 @@ class ShellCall(BaseModel):
"""The shell commands and limits that describe how to run the tool call."""
call_id: str
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
type: Literal["shell_call"]
- """The type of the item. Always `function_shell_call`."""
+ """The type of the item. Always `shell_call`."""
id: Optional[str] = None
- """The unique ID of the function shell tool call.
+ """The unique ID of the shell tool call.
Populated when this item is returned via API.
"""
@@ -235,7 +236,7 @@ class ShellCall(BaseModel):
class ShellCallOutput(BaseModel):
call_id: str
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
output: List[ResponseFunctionShellCallOutputContent]
"""
@@ -244,10 +245,10 @@ class ShellCallOutput(BaseModel):
"""
type: Literal["shell_call_output"]
- """The type of the item. Always `function_shell_call_output`."""
+ """The type of the item. Always `shell_call_output`."""
id: Optional[str] = None
- """The unique ID of the function shell tool call output.
+ """The unique ID of the shell tool call output.
Populated when this item is returned via API.
"""
@@ -462,6 +463,7 @@ ResponseInputItem: TypeAlias = Annotated[
ResponseFunctionToolCall,
FunctionCallOutput,
ResponseReasoningItem,
+ ResponseCompactionItemParam,
ImageGenerationCall,
ResponseCodeInterpreterToolCall,
LocalShellCall,
src/openai/types/responses/response_input_item_param.py
@@ -13,6 +13,7 @@ from .response_custom_tool_call_param import ResponseCustomToolCallParam
from .response_computer_tool_call_param import ResponseComputerToolCallParam
from .response_function_tool_call_param import ResponseFunctionToolCallParam
from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam
from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
from .response_custom_tool_call_output_param import ResponseCustomToolCallOutputParam
from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam
@@ -216,13 +217,13 @@ class ShellCall(TypedDict, total=False):
"""The shell commands and limits that describe how to run the tool call."""
call_id: Required[str]
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
type: Required[Literal["shell_call"]]
- """The type of the item. Always `function_shell_call`."""
+ """The type of the item. Always `shell_call`."""
id: Optional[str]
- """The unique ID of the function shell tool call.
+ """The unique ID of the shell tool call.
Populated when this item is returned via API.
"""
@@ -236,7 +237,7 @@ class ShellCall(TypedDict, total=False):
class ShellCallOutput(TypedDict, total=False):
call_id: Required[str]
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
output: Required[Iterable[ResponseFunctionShellCallOutputContentParam]]
"""
@@ -245,10 +246,10 @@ class ShellCallOutput(TypedDict, total=False):
"""
type: Required[Literal["shell_call_output"]]
- """The type of the item. Always `function_shell_call_output`."""
+ """The type of the item. Always `shell_call_output`."""
id: Optional[str]
- """The unique ID of the function shell tool call output.
+ """The unique ID of the shell tool call output.
Populated when this item is returned via API.
"""
@@ -461,6 +462,7 @@ ResponseInputItemParam: TypeAlias = Union[
ResponseFunctionToolCallParam,
FunctionCallOutput,
ResponseReasoningItemParam,
+ ResponseCompactionItemParamParam,
ImageGenerationCall,
ResponseCodeInterpreterToolCallParam,
LocalShellCall,
src/openai/types/responses/response_input_param.py
@@ -13,6 +13,7 @@ from .response_custom_tool_call_param import ResponseCustomToolCallParam
from .response_computer_tool_call_param import ResponseComputerToolCallParam
from .response_function_tool_call_param import ResponseFunctionToolCallParam
from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam
from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
from .response_custom_tool_call_output_param import ResponseCustomToolCallOutputParam
from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam
@@ -217,13 +218,13 @@ class ShellCall(TypedDict, total=False):
"""The shell commands and limits that describe how to run the tool call."""
call_id: Required[str]
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
type: Required[Literal["shell_call"]]
- """The type of the item. Always `function_shell_call`."""
+ """The type of the item. Always `shell_call`."""
id: Optional[str]
- """The unique ID of the function shell tool call.
+ """The unique ID of the shell tool call.
Populated when this item is returned via API.
"""
@@ -237,7 +238,7 @@ class ShellCall(TypedDict, total=False):
class ShellCallOutput(TypedDict, total=False):
call_id: Required[str]
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
output: Required[Iterable[ResponseFunctionShellCallOutputContentParam]]
"""
@@ -246,10 +247,10 @@ class ShellCallOutput(TypedDict, total=False):
"""
type: Required[Literal["shell_call_output"]]
- """The type of the item. Always `function_shell_call_output`."""
+ """The type of the item. Always `shell_call_output`."""
id: Optional[str]
- """The unique ID of the function shell tool call output.
+ """The unique ID of the shell tool call output.
Populated when this item is returned via API.
"""
@@ -462,6 +463,7 @@ ResponseInputItemParam: TypeAlias = Union[
ResponseFunctionToolCallParam,
FunctionCallOutput,
ResponseReasoningItemParam,
+ ResponseCompactionItemParamParam,
ImageGenerationCall,
ResponseCodeInterpreterToolCallParam,
LocalShellCall,
src/openai/types/responses/response_output_item.py
@@ -7,6 +7,7 @@ from ..._utils import PropertyInfo
from ..._models import BaseModel
from .response_output_message import ResponseOutputMessage
from .response_reasoning_item import ResponseReasoningItem
+from .response_compaction_item import ResponseCompactionItem
from .response_custom_tool_call import ResponseCustomToolCall
from .response_computer_tool_call import ResponseComputerToolCall
from .response_function_tool_call import ResponseFunctionToolCall
@@ -173,6 +174,7 @@ ResponseOutputItem: TypeAlias = Annotated[
ResponseFunctionWebSearch,
ResponseComputerToolCall,
ResponseReasoningItem,
+ ResponseCompactionItem,
ImageGenerationCall,
ResponseCodeInterpreterToolCall,
LocalShellCall,
src/openai/types/responses/tool.py
@@ -174,7 +174,7 @@ class CodeInterpreter(BaseModel):
"""The code interpreter container.
Can be a container ID or an object that specifies uploaded file IDs to make
- available to your code.
+ available to your code, along with an optional `memory_limit` setting.
"""
type: Literal["code_interpreter"]
src/openai/types/responses/tool_param.py
@@ -174,7 +174,7 @@ class CodeInterpreter(TypedDict, total=False):
"""The code interpreter container.
Can be a container ID or an object that specifies uploaded file IDs to make
- available to your code.
+ available to your code, along with an optional `memory_limit` setting.
"""
type: Required[Literal["code_interpreter"]]
src/openai/types/container_create_params.py
@@ -19,6 +19,9 @@ class ContainerCreateParams(TypedDict, total=False):
file_ids: SequenceNotStr[str]
"""IDs of files to copy to the container."""
+ memory_limit: Literal["1g", "4g", "16g", "64g"]
+ """Optional memory limit for the container. Defaults to "1g"."""
+
class ExpiresAfter(TypedDict, total=False):
anchor: Required[Literal["last_active_at"]]
src/openai/types/container_create_response.py
@@ -38,3 +38,9 @@ class ContainerCreateResponse(BaseModel):
point for the expiration. The minutes is the number of minutes after the anchor
before the container expires.
"""
+
+ last_active_at: Optional[int] = None
+ """Unix timestamp (in seconds) when the container was last active."""
+
+ memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+ """The memory limit configured for the container."""
src/openai/types/container_list_response.py
@@ -38,3 +38,9 @@ class ContainerListResponse(BaseModel):
point for the expiration. The minutes is the number of minutes after the anchor
before the container expires.
"""
+
+ last_active_at: Optional[int] = None
+ """Unix timestamp (in seconds) when the container was last active."""
+
+ memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+ """The memory limit configured for the container."""
src/openai/types/container_retrieve_response.py
@@ -38,3 +38,9 @@ class ContainerRetrieveResponse(BaseModel):
point for the expiration. The minutes is the number of minutes after the anchor
before the container expires.
"""
+
+ last_active_at: Optional[int] = None
+ """Unix timestamp (in seconds) when the container was last active."""
+
+ memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+ """The memory limit configured for the container."""
src/openai/types/video_create_params.py
@@ -20,10 +20,16 @@ class VideoCreateParams(TypedDict, total=False):
"""Optional image reference that guides generation."""
model: VideoModel
- """The video generation model to use. Defaults to `sora-2`."""
+ """The video generation model to use (allowed values: sora-2, sora-2-pro).
+
+ Defaults to `sora-2`.
+ """
seconds: VideoSeconds
- """Clip duration in seconds. Defaults to 4 seconds."""
+ """Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds."""
size: VideoSize
- """Output resolution formatted as width x height. Defaults to 720x1280."""
+ """
+ Output resolution formatted as width x height (allowed values: 720x1280,
+ 1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
+ """
tests/api_resources/test_containers.py
@@ -38,6 +38,7 @@ class TestContainers:
"minutes": 0,
},
file_ids=["string"],
+ memory_limit="1g",
)
assert_matches_type(ContainerCreateResponse, container, path=["response"])
@@ -197,6 +198,7 @@ class TestAsyncContainers:
"minutes": 0,
},
file_ids=["string"],
+ memory_limit="1g",
)
assert_matches_type(ContainerCreateResponse, container, path=["response"])
tests/api_resources/test_responses.py
@@ -12,6 +12,7 @@ from tests.utils import assert_matches_type
from openai._utils import assert_signatures_in_sync
from openai.types.responses import (
Response,
+ CompactedResponse,
)
base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -36,7 +37,7 @@ class TestResponses:
max_output_tokens=0,
max_tool_calls=0,
metadata={"foo": "string"},
- model="gpt-4o",
+ model="gpt-5.1",
parallel_tool_calls=True,
previous_response_id="previous_response_id",
prompt={
@@ -117,7 +118,7 @@ class TestResponses:
max_output_tokens=0,
max_tool_calls=0,
metadata={"foo": "string"},
- model="gpt-4o",
+ model="gpt-5.1",
parallel_tool_calls=True,
previous_response_id="previous_response_id",
prompt={
@@ -358,6 +359,41 @@ class TestResponses:
"",
)
+ @parametrize
+ def test_method_compact(self, client: OpenAI) -> None:
+ response = client.responses.compact()
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ @parametrize
+ def test_method_compact_with_all_params(self, client: OpenAI) -> None:
+ response = client.responses.compact(
+ input="string",
+ instructions="instructions",
+ model="gpt-5.1",
+ previous_response_id="resp_123",
+ )
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ @parametrize
+ def test_raw_response_compact(self, client: OpenAI) -> None:
+ http_response = client.responses.with_raw_response.compact()
+
+ assert http_response.is_closed is True
+ assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+ response = http_response.parse()
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ @parametrize
+ def test_streaming_response_compact(self, client: OpenAI) -> None:
+ with client.responses.with_streaming_response.compact() as http_response:
+ assert not http_response.is_closed
+ assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ response = http_response.parse()
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ assert cast(Any, http_response.is_closed) is True
+
@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
def test_parse_method_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
@@ -391,7 +427,7 @@ class TestAsyncResponses:
max_output_tokens=0,
max_tool_calls=0,
metadata={"foo": "string"},
- model="gpt-4o",
+ model="gpt-5.1",
parallel_tool_calls=True,
previous_response_id="previous_response_id",
prompt={
@@ -472,7 +508,7 @@ class TestAsyncResponses:
max_output_tokens=0,
max_tool_calls=0,
metadata={"foo": "string"},
- model="gpt-4o",
+ model="gpt-5.1",
parallel_tool_calls=True,
previous_response_id="previous_response_id",
prompt={
@@ -712,3 +748,38 @@ class TestAsyncResponses:
await async_client.responses.with_raw_response.cancel(
"",
)
+
+ @parametrize
+ async def test_method_compact(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.responses.compact()
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ @parametrize
+ async def test_method_compact_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.responses.compact(
+ input="string",
+ instructions="instructions",
+ model="gpt-5.1",
+ previous_response_id="resp_123",
+ )
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ @parametrize
+ async def test_raw_response_compact(self, async_client: AsyncOpenAI) -> None:
+ http_response = await async_client.responses.with_raw_response.compact()
+
+ assert http_response.is_closed is True
+ assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+ response = http_response.parse()
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_compact(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.responses.with_streaming_response.compact() as http_response:
+ assert not http_response.is_closed
+ assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ response = await http_response.parse()
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ assert cast(Any, http_response.is_closed) is True
.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 136
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a7e92d12ebe89ca019a7ac5b29759064eefa2c38fe08d03516f2620e66abb32b.yml
-openapi_spec_hash: acbc703b2739447abc6312b2d753631c
-config_hash: b876221dfb213df9f0a999e75d38a65e
+configured_endpoints: 137
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-fe8a79e6fd407e6c9afec60971f03076b65f711ccd6ea16457933b0e24fb1f6d.yml
+openapi_spec_hash: 38c0a73f4e08843732c5f8002a809104
+config_hash: 2c350086d87a4b4532077363087840e7
api.md
@@ -733,6 +733,7 @@ Types:
```python
from openai.types.responses import (
ApplyPatchTool,
+ CompactedResponse,
ComputerTool,
CustomTool,
EasyInputMessage,
@@ -752,6 +753,8 @@ from openai.types.responses import (
ResponseCodeInterpreterCallInProgressEvent,
ResponseCodeInterpreterCallInterpretingEvent,
ResponseCodeInterpreterToolCall,
+ ResponseCompactionItem,
+ ResponseCompactionItemParam,
ResponseCompletedEvent,
ResponseComputerToolCall,
ResponseComputerToolCallOutputItem,
@@ -861,6 +864,7 @@ Methods:
- <code title="get /responses/{response_id}">client.responses.<a href="./src/openai/resources/responses/responses.py">retrieve</a>(response_id, \*\*<a href="src/openai/types/responses/response_retrieve_params.py">params</a>) -> <a href="./src/openai/types/responses/response.py">Response</a></code>
- <code title="delete /responses/{response_id}">client.responses.<a href="./src/openai/resources/responses/responses.py">delete</a>(response_id) -> None</code>
- <code title="post /responses/{response_id}/cancel">client.responses.<a href="./src/openai/resources/responses/responses.py">cancel</a>(response_id) -> <a href="./src/openai/types/responses/response.py">Response</a></code>
+- <code title="post /responses/compact">client.responses.<a href="./src/openai/resources/responses/responses.py">compact</a>(\*\*<a href="src/openai/types/responses/response_compact_params.py">params</a>) -> <a href="./src/openai/types/responses/compacted_response.py">CompactedResponse</a></code>
## InputItems
@@ -914,6 +918,7 @@ from openai.types.realtime import (
InputAudioBufferClearedEvent,
InputAudioBufferCommitEvent,
InputAudioBufferCommittedEvent,
+ InputAudioBufferDtmfEventReceivedEvent,
InputAudioBufferSpeechStartedEvent,
InputAudioBufferSpeechStoppedEvent,
InputAudioBufferTimeoutTriggered,