Commit 1039d563

stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>
2025-12-05 01:50:57
feat(api): gpt-5.1-codex-max and responses/compact
1 parent bd98847
Changed files (65)
src
openai
lib
_parsing
resources
beta
chat
completions
containers
realtime
responses
types
beta
chat
evals
graders
realtime
responses
shared
shared_params
tests
src/openai/lib/_parsing/_responses.py
@@ -103,6 +103,7 @@ def parse_response(
             or output.type == "file_search_call"
             or output.type == "web_search_call"
             or output.type == "reasoning"
+            or output.type == "compaction"
             or output.type == "mcp_call"
             or output.type == "mcp_approval_request"
             or output.type == "image_generation_call"
src/openai/resources/beta/threads/runs/runs.py
@@ -169,9 +169,9 @@ class Runs(SyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -179,6 +179,7 @@ class Runs(SyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -330,9 +331,9 @@ class Runs(SyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -340,6 +341,7 @@ class Runs(SyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -487,9 +489,9 @@ class Runs(SyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -497,6 +499,7 @@ class Runs(SyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -1620,9 +1623,9 @@ class AsyncRuns(AsyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1630,6 +1633,7 @@ class AsyncRuns(AsyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -1781,9 +1785,9 @@ class AsyncRuns(AsyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1791,6 +1795,7 @@ class AsyncRuns(AsyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -1938,9 +1943,9 @@ class AsyncRuns(AsyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1948,6 +1953,7 @@ class AsyncRuns(AsyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
src/openai/resources/beta/assistants.py
@@ -98,9 +98,9 @@ class Assistants(SyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -108,6 +108,7 @@ class Assistants(SyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -312,9 +313,9 @@ class Assistants(SyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -322,6 +323,7 @@ class Assistants(SyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -565,9 +567,9 @@ class AsyncAssistants(AsyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -575,6 +577,7 @@ class AsyncAssistants(AsyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -779,9 +782,9 @@ class AsyncAssistants(AsyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -789,6 +792,7 @@ class AsyncAssistants(AsyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
src/openai/resources/chat/completions/completions.py
@@ -411,9 +411,9 @@ class Completions(SyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -421,6 +421,7 @@ class Completions(SyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: An object specifying the format that the model must output.
 
@@ -721,9 +722,9 @@ class Completions(SyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -731,6 +732,7 @@ class Completions(SyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: An object specifying the format that the model must output.
 
@@ -1022,9 +1024,9 @@ class Completions(SyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1032,6 +1034,7 @@ class Completions(SyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: An object specifying the format that the model must output.
 
@@ -1894,9 +1897,9 @@ class AsyncCompletions(AsyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1904,6 +1907,7 @@ class AsyncCompletions(AsyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: An object specifying the format that the model must output.
 
@@ -2204,9 +2208,9 @@ class AsyncCompletions(AsyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -2214,6 +2218,7 @@ class AsyncCompletions(AsyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: An object specifying the format that the model must output.
 
@@ -2505,9 +2510,9 @@ class AsyncCompletions(AsyncAPIResource):
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -2515,6 +2520,7 @@ class AsyncCompletions(AsyncAPIResource):
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: An object specifying the format that the model must output.
 
src/openai/resources/containers/containers.py
@@ -60,6 +60,7 @@ class Containers(SyncAPIResource):
         name: str,
         expires_after: container_create_params.ExpiresAfter | Omit = omit,
         file_ids: SequenceNotStr[str] | Omit = omit,
+        memory_limit: Literal["1g", "4g", "16g", "64g"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -77,6 +78,8 @@ class Containers(SyncAPIResource):
 
           file_ids: IDs of files to copy to the container.
 
+          memory_limit: Optional memory limit for the container. Defaults to "1g".
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -92,6 +95,7 @@ class Containers(SyncAPIResource):
                     "name": name,
                     "expires_after": expires_after,
                     "file_ids": file_ids,
+                    "memory_limit": memory_limit,
                 },
                 container_create_params.ContainerCreateParams,
             ),
@@ -256,6 +260,7 @@ class AsyncContainers(AsyncAPIResource):
         name: str,
         expires_after: container_create_params.ExpiresAfter | Omit = omit,
         file_ids: SequenceNotStr[str] | Omit = omit,
+        memory_limit: Literal["1g", "4g", "16g", "64g"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -273,6 +278,8 @@ class AsyncContainers(AsyncAPIResource):
 
           file_ids: IDs of files to copy to the container.
 
+          memory_limit: Optional memory limit for the container. Defaults to "1g".
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -288,6 +295,7 @@ class AsyncContainers(AsyncAPIResource):
                     "name": name,
                     "expires_after": expires_after,
                     "file_ids": file_ids,
+                    "memory_limit": memory_limit,
                 },
                 container_create_params.ContainerCreateParams,
             ),
src/openai/resources/realtime/calls.py
@@ -199,15 +199,20 @@ class Calls(SyncAPIResource):
               limit, the conversation be truncated, meaning messages (starting from the
               oldest) will not be included in the model's context. A 32k context model with
               4,096 max output tokens can only include 28,224 tokens in the context before
-              truncation occurs. Clients can configure truncation behavior to truncate with a
-              lower max token limit, which is an effective way to control token usage and
-              cost. Truncation will reduce the number of cached tokens on the next turn
-              (busting the cache), since messages are dropped from the beginning of the
-              context. However, clients can also configure truncation to retain messages up to
-              a fraction of the maximum context size, which will reduce the need for future
-              truncations and thus improve the cache rate. Truncation can be disabled
-              entirely, which means the server will never truncate but would instead return an
-              error if the conversation exceeds the model's input token limit.
+              truncation occurs.
+
+              Clients can configure truncation behavior to truncate with a lower max token
+              limit, which is an effective way to control token usage and cost.
+
+              Truncation will reduce the number of cached tokens on the next turn (busting the
+              cache), since messages are dropped from the beginning of the context. However,
+              clients can also configure truncation to retain messages up to a fraction of the
+              maximum context size, which will reduce the need for future truncations and thus
+              improve the cache rate.
+
+              Truncation can be disabled entirely, which means the server will never truncate
+              but would instead return an error if the conversation exceeds the model's input
+              token limit.
 
           extra_headers: Send extra headers
 
@@ -519,15 +524,20 @@ class AsyncCalls(AsyncAPIResource):
               limit, the conversation be truncated, meaning messages (starting from the
               oldest) will not be included in the model's context. A 32k context model with
               4,096 max output tokens can only include 28,224 tokens in the context before
-              truncation occurs. Clients can configure truncation behavior to truncate with a
-              lower max token limit, which is an effective way to control token usage and
-              cost. Truncation will reduce the number of cached tokens on the next turn
-              (busting the cache), since messages are dropped from the beginning of the
-              context. However, clients can also configure truncation to retain messages up to
-              a fraction of the maximum context size, which will reduce the need for future
-              truncations and thus improve the cache rate. Truncation can be disabled
-              entirely, which means the server will never truncate but would instead return an
-              error if the conversation exceeds the model's input token limit.
+              truncation occurs.
+
+              Clients can configure truncation behavior to truncate with a lower max token
+              limit, which is an effective way to control token usage and cost.
+
+              Truncation will reduce the number of cached tokens on the next turn (busting the
+              cache), since messages are dropped from the beginning of the context. However,
+              clients can also configure truncation to retain messages up to a fraction of the
+              maximum context size, which will reduce the need for future truncations and thus
+              improve the cache rate.
+
+              Truncation can be disabled entirely, which means the server will never truncate
+              but would instead return an error if the conversation exceeds the model's input
+              token limit.
 
           extra_headers: Send extra headers
 
src/openai/resources/realtime/realtime.py
@@ -829,7 +829,7 @@ class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
 
 class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
     def clear(self, *, event_id: str | Omit = omit) -> None:
-        """**WebRTC Only:** Emit to cut off the current audio response.
+        """**WebRTC/SIP Only:** Emit to cut off the current audio response.
 
         This will trigger the server to
         stop generating audio and emit a `output_audio_buffer.cleared` event. This
@@ -1066,7 +1066,7 @@ class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource)
 
 class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
     async def clear(self, *, event_id: str | Omit = omit) -> None:
-        """**WebRTC Only:** Emit to cut off the current audio response.
+        """**WebRTC/SIP Only:** Emit to cut off the current audio response.
 
         This will trigger the server to
         stop generating audio and emit a `output_audio_buffer.cleared` event. This
src/openai/resources/responses/responses.py
@@ -34,11 +34,10 @@ from .input_tokens import (
     AsyncInputTokensWithStreamingResponse,
 )
 from ..._base_client import make_request_options
-from ...types.responses import response_create_params, response_retrieve_params
-from ...lib._parsing._responses import (
-    TextFormatT,
-    parse_response,
-    type_to_text_format_param as _type_to_text_format_param,
+from ...types.responses import (
+    response_create_params,
+    response_compact_params,
+    response_retrieve_params,
 )
 from ...types.responses.response import Response
 from ...types.responses.tool_param import ToolParam, ParseableToolParam
@@ -46,11 +45,13 @@ from ...types.shared_params.metadata import Metadata
 from ...types.shared_params.reasoning import Reasoning
 from ...types.responses.parsed_response import ParsedResponse
 from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager
+from ...types.responses.compacted_response import CompactedResponse
 from ...types.responses.response_includable import ResponseIncludable
 from ...types.shared_params.responses_model import ResponsesModel
 from ...types.responses.response_input_param import ResponseInputParam
 from ...types.responses.response_prompt_param import ResponsePromptParam
 from ...types.responses.response_stream_event import ResponseStreamEvent
+from ...types.responses.response_input_item_param import ResponseInputItemParam
 from ...types.responses.response_text_config_param import ResponseTextConfigParam
 
 __all__ = ["Responses", "AsyncResponses"]
@@ -1517,6 +1518,154 @@ class Responses(SyncAPIResource):
             cast_to=Response,
         )
 
+    def compact(
+        self,
+        *,
+        input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        model: Union[
+            Literal[
+                "gpt-5.1",
+                "gpt-5.1-2025-11-13",
+                "gpt-5.1-codex",
+                "gpt-5.1-mini",
+                "gpt-5.1-chat-latest",
+                "gpt-5",
+                "gpt-5-mini",
+                "gpt-5-nano",
+                "gpt-5-2025-08-07",
+                "gpt-5-mini-2025-08-07",
+                "gpt-5-nano-2025-08-07",
+                "gpt-5-chat-latest",
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "gpt-4.1-2025-04-14",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
+                "o4-mini",
+                "o4-mini-2025-04-16",
+                "o3",
+                "o3-2025-04-16",
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "o1-preview",
+                "o1-preview-2024-09-12",
+                "o1-mini",
+                "o1-mini-2024-09-12",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-audio-preview",
+                "gpt-4o-audio-preview-2024-10-01",
+                "gpt-4o-audio-preview-2024-12-17",
+                "gpt-4o-audio-preview-2025-06-03",
+                "gpt-4o-mini-audio-preview",
+                "gpt-4o-mini-audio-preview-2024-12-17",
+                "gpt-4o-search-preview",
+                "gpt-4o-mini-search-preview",
+                "gpt-4o-search-preview-2025-03-11",
+                "gpt-4o-mini-search-preview-2025-03-11",
+                "chatgpt-4o-latest",
+                "codex-mini-latest",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0301",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+                "o1-pro",
+                "o1-pro-2025-03-19",
+                "o3-pro",
+                "o3-pro-2025-06-10",
+                "o3-deep-research",
+                "o3-deep-research-2025-06-26",
+                "o4-mini-deep-research",
+                "o4-mini-deep-research-2025-06-26",
+                "computer-use-preview",
+                "computer-use-preview-2025-03-11",
+                "gpt-5-codex",
+                "gpt-5-pro",
+                "gpt-5-pro-2025-10-06",
+                "gpt-5.1-codex-max",
+            ],
+            str,
+            None,
+        ]
+        | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> CompactedResponse:
+        """
+        Compact conversation
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response
+
+          instructions: A system (or developer) message inserted into the model's context. When used
+              along with `previous_response_id`, the instructions from a previous response
+              will not be carried over to the next response. This makes it simple to swap out
+              system (or developer) messages in new responses.
+
+          model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/responses/compact",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "instructions": instructions,
+                    "model": model,
+                    "previous_response_id": previous_response_id,
+                },
+                response_compact_params.ResponseCompactParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CompactedResponse,
+        )
+
 
 class AsyncResponses(AsyncAPIResource):
     @cached_property
@@ -2983,6 +3132,154 @@ class AsyncResponses(AsyncAPIResource):
             cast_to=Response,
         )
 
+    async def compact(
+        self,
+        *,
+        input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        model: Union[
+            Literal[
+                "gpt-5.1",
+                "gpt-5.1-2025-11-13",
+                "gpt-5.1-codex",
+                "gpt-5.1-mini",
+                "gpt-5.1-chat-latest",
+                "gpt-5",
+                "gpt-5-mini",
+                "gpt-5-nano",
+                "gpt-5-2025-08-07",
+                "gpt-5-mini-2025-08-07",
+                "gpt-5-nano-2025-08-07",
+                "gpt-5-chat-latest",
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "gpt-4.1-2025-04-14",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
+                "o4-mini",
+                "o4-mini-2025-04-16",
+                "o3",
+                "o3-2025-04-16",
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "o1-preview",
+                "o1-preview-2024-09-12",
+                "o1-mini",
+                "o1-mini-2024-09-12",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-audio-preview",
+                "gpt-4o-audio-preview-2024-10-01",
+                "gpt-4o-audio-preview-2024-12-17",
+                "gpt-4o-audio-preview-2025-06-03",
+                "gpt-4o-mini-audio-preview",
+                "gpt-4o-mini-audio-preview-2024-12-17",
+                "gpt-4o-search-preview",
+                "gpt-4o-mini-search-preview",
+                "gpt-4o-search-preview-2025-03-11",
+                "gpt-4o-mini-search-preview-2025-03-11",
+                "chatgpt-4o-latest",
+                "codex-mini-latest",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0301",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+                "o1-pro",
+                "o1-pro-2025-03-19",
+                "o3-pro",
+                "o3-pro-2025-06-10",
+                "o3-deep-research",
+                "o3-deep-research-2025-06-26",
+                "o4-mini-deep-research",
+                "o4-mini-deep-research-2025-06-26",
+                "computer-use-preview",
+                "computer-use-preview-2025-03-11",
+                "gpt-5-codex",
+                "gpt-5-pro",
+                "gpt-5-pro-2025-10-06",
+                "gpt-5.1-codex-max",
+            ],
+            str,
+            None,
+        ]
+        | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> CompactedResponse:
+        """
+        Compact conversation
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response
+
+          instructions: A system (or developer) message inserted into the model's context. When used
+              along with `previous_response_id`, the instructions from a previous response
+              will not be carried over to the next response. This makes it simple to swap out
+              system (or developer) messages in new responses.
+
+          model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/responses/compact",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "instructions": instructions,
+                    "model": model,
+                    "previous_response_id": previous_response_id,
+                },
+                response_compact_params.ResponseCompactParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CompactedResponse,
+        )
+
 
 class ResponsesWithRawResponse:
     def __init__(self, responses: Responses) -> None:
@@ -3000,9 +3297,6 @@ class ResponsesWithRawResponse:
         self.cancel = _legacy_response.to_raw_response_wrapper(
             responses.cancel,
         )
-        self.parse = _legacy_response.to_raw_response_wrapper(
-            responses.parse,
-        )
 
     @cached_property
     def input_items(self) -> InputItemsWithRawResponse:
@@ -3029,9 +3323,6 @@ class AsyncResponsesWithRawResponse:
         self.cancel = _legacy_response.async_to_raw_response_wrapper(
             responses.cancel,
         )
-        self.parse = _legacy_response.async_to_raw_response_wrapper(
-            responses.parse,
-        )
 
     @cached_property
     def input_items(self) -> AsyncInputItemsWithRawResponse:
@@ -3058,6 +3349,9 @@ class ResponsesWithStreamingResponse:
         self.cancel = to_streamed_response_wrapper(
             responses.cancel,
         )
+        self.compact = to_streamed_response_wrapper(
+            responses.compact,
+        )
 
     @cached_property
     def input_items(self) -> InputItemsWithStreamingResponse:
@@ -3084,6 +3378,9 @@ class AsyncResponsesWithStreamingResponse:
         self.cancel = async_to_streamed_response_wrapper(
             responses.cancel,
         )
+        self.compact = async_to_streamed_response_wrapper(
+            responses.compact,
+        )
 
     @cached_property
     def input_items(self) -> AsyncInputItemsWithStreamingResponse:
src/openai/resources/videos.py
@@ -84,11 +84,13 @@ class Videos(SyncAPIResource):
 
           input_reference: Optional image reference that guides generation.
 
-          model: The video generation model to use. Defaults to `sora-2`.
+          model: The video generation model to use (allowed values: sora-2, sora-2-pro). Defaults
+              to `sora-2`.
 
-          seconds: Clip duration in seconds. Defaults to 4 seconds.
+          seconds: Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds.
 
-          size: Output resolution formatted as width x height. Defaults to 720x1280.
+          size: Output resolution formatted as width x height (allowed values: 720x1280,
+              1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
 
           extra_headers: Send extra headers
 
@@ -437,11 +439,13 @@ class AsyncVideos(AsyncAPIResource):
 
           input_reference: Optional image reference that guides generation.
 
-          model: The video generation model to use. Defaults to `sora-2`.
+          model: The video generation model to use (allowed values: sora-2, sora-2-pro). Defaults
+              to `sora-2`.
 
-          seconds: Clip duration in seconds. Defaults to 4 seconds.
+          seconds: Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds.
 
-          size: Output resolution formatted as width x height. Defaults to 720x1280.
+          size: Output resolution formatted as width x height (allowed values: 720x1280,
+              1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
 
           extra_headers: Send extra headers
 
src/openai/types/beta/threads/run_create_params.py
@@ -111,9 +111,9 @@ class RunCreateParamsBase(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -121,6 +121,7 @@ class RunCreateParamsBase(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     response_format: Optional[AssistantResponseFormatOptionParam]
src/openai/types/beta/assistant_create_params.py
@@ -62,9 +62,9 @@ class AssistantCreateParams(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -72,6 +72,7 @@ class AssistantCreateParams(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     response_format: Optional[AssistantResponseFormatOptionParam]
src/openai/types/beta/assistant_update_params.py
@@ -97,9 +97,9 @@ class AssistantUpdateParams(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -107,6 +107,7 @@ class AssistantUpdateParams(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     response_format: Optional[AssistantResponseFormatOptionParam]
src/openai/types/chat/completion_create_params.py
@@ -197,9 +197,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -207,6 +207,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     response_format: ResponseFormat
src/openai/types/evals/create_eval_completions_run_data_source.py
@@ -172,9 +172,9 @@ class SamplingParams(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -182,6 +182,7 @@ class SamplingParams(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     response_format: Optional[SamplingParamsResponseFormat] = None
src/openai/types/evals/create_eval_completions_run_data_source_param.py
@@ -168,9 +168,9 @@ class SamplingParams(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -178,6 +178,7 @@ class SamplingParams(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     response_format: SamplingParamsResponseFormat
src/openai/types/evals/run_cancel_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: Optional[int] = None
src/openai/types/evals/run_create_params.py
@@ -116,9 +116,9 @@ class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -126,6 +126,7 @@ class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     temperature: Optional[float]
@@ -263,9 +264,9 @@ class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -273,6 +274,7 @@ class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: int
src/openai/types/evals/run_create_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: Optional[int] = None
src/openai/types/evals/run_list_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: Optional[int] = None
src/openai/types/evals/run_retrieve_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: Optional[int] = None
src/openai/types/graders/score_model_grader.py
@@ -67,9 +67,9 @@ class SamplingParams(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -77,6 +77,7 @@ class SamplingParams(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: Optional[int] = None
src/openai/types/graders/score_model_grader_param.py
@@ -73,9 +73,9 @@ class SamplingParams(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -83,6 +83,7 @@ class SamplingParams(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: Optional[int]
src/openai/types/realtime/__init__.py
@@ -175,6 +175,9 @@ from .realtime_response_usage_input_token_details import (
 from .response_function_call_arguments_done_event import (
     ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
 )
+from .input_audio_buffer_dtmf_event_received_event import (
+    InputAudioBufferDtmfEventReceivedEvent as InputAudioBufferDtmfEventReceivedEvent,
+)
 from .realtime_conversation_item_assistant_message import (
     RealtimeConversationItemAssistantMessage as RealtimeConversationItemAssistantMessage,
 )
src/openai/types/realtime/call_accept_params.py
@@ -110,13 +110,18 @@ class CallAcceptParams(TypedDict, total=False):
     limit, the conversation be truncated, meaning messages (starting from the
     oldest) will not be included in the model's context. A 32k context model with
     4,096 max output tokens can only include 28,224 tokens in the context before
-    truncation occurs. Clients can configure truncation behavior to truncate with a
-    lower max token limit, which is an effective way to control token usage and
-    cost. Truncation will reduce the number of cached tokens on the next turn
-    (busting the cache), since messages are dropped from the beginning of the
-    context. However, clients can also configure truncation to retain messages up to
-    a fraction of the maximum context size, which will reduce the need for future
-    truncations and thus improve the cache rate. Truncation can be disabled
-    entirely, which means the server will never truncate but would instead return an
-    error if the conversation exceeds the model's input token limit.
+    truncation occurs.
+
+    Clients can configure truncation behavior to truncate with a lower max token
+    limit, which is an effective way to control token usage and cost.
+
+    Truncation will reduce the number of cached tokens on the next turn (busting the
+    cache), since messages are dropped from the beginning of the context. However,
+    clients can also configure truncation to retain messages up to a fraction of the
+    maximum context size, which will reduce the need for future truncations and thus
+    improve the cache rate.
+
+    Truncation can be disabled entirely, which means the server will never truncate
+    but would instead return an error if the conversation exceeds the model's input
+    token limit.
     """
src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferDtmfEventReceivedEvent"]
+
+
+class InputAudioBufferDtmfEventReceivedEvent(BaseModel):
+    event: str
+    """The telephone keypad that was pressed by the user."""
+
+    received_at: int
+    """UTC Unix Timestamp when DTMF Event was received by server."""
+
+    type: Literal["input_audio_buffer.dtmf_event_received"]
+    """The event type, must be `input_audio_buffer.dtmf_event_received`."""
src/openai/types/realtime/realtime_audio_input_turn_detection.py
@@ -14,9 +14,14 @@ class ServerVad(BaseModel):
     """Type of turn detection, `server_vad` to turn on simple Server VAD."""
 
     create_response: Optional[bool] = None
-    """
-    Whether or not to automatically generate a response when a VAD stop event
+    """Whether or not to automatically generate a response when a VAD stop event
     occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     idle_timeout_ms: Optional[int] = None
@@ -37,9 +42,13 @@ class ServerVad(BaseModel):
 
     interrupt_response: Optional[bool] = None
     """
-    Whether or not to automatically interrupt any ongoing response with output to
-    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
-    occurs.
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     prefix_padding_ms: Optional[int] = None
src/openai/types/realtime/realtime_audio_input_turn_detection_param.py
@@ -13,9 +13,14 @@ class ServerVad(TypedDict, total=False):
     """Type of turn detection, `server_vad` to turn on simple Server VAD."""
 
     create_response: bool
-    """
-    Whether or not to automatically generate a response when a VAD stop event
+    """Whether or not to automatically generate a response when a VAD stop event
     occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     idle_timeout_ms: Optional[int]
@@ -36,9 +41,13 @@ class ServerVad(TypedDict, total=False):
 
     interrupt_response: bool
     """
-    Whether or not to automatically interrupt any ongoing response with output to
-    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
-    occurs.
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     prefix_padding_ms: int
src/openai/types/realtime/realtime_server_event.py
@@ -42,6 +42,7 @@ from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaE
 from .input_audio_buffer_speech_started_event import InputAudioBufferSpeechStartedEvent
 from .input_audio_buffer_speech_stopped_event import InputAudioBufferSpeechStoppedEvent
 from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .input_audio_buffer_dtmf_event_received_event import InputAudioBufferDtmfEventReceivedEvent
 from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
 from .conversation_item_input_audio_transcription_segment import ConversationItemInputAudioTranscriptionSegment
 from .conversation_item_input_audio_transcription_delta_event import ConversationItemInputAudioTranscriptionDeltaEvent
@@ -116,6 +117,7 @@ RealtimeServerEvent: TypeAlias = Annotated[
         RealtimeErrorEvent,
         InputAudioBufferClearedEvent,
         InputAudioBufferCommittedEvent,
+        InputAudioBufferDtmfEventReceivedEvent,
         InputAudioBufferSpeechStartedEvent,
         InputAudioBufferSpeechStoppedEvent,
         RateLimitsUpdatedEvent,
src/openai/types/realtime/realtime_session_create_request.py
@@ -110,13 +110,18 @@ class RealtimeSessionCreateRequest(BaseModel):
     limit, the conversation be truncated, meaning messages (starting from the
     oldest) will not be included in the model's context. A 32k context model with
     4,096 max output tokens can only include 28,224 tokens in the context before
-    truncation occurs. Clients can configure truncation behavior to truncate with a
-    lower max token limit, which is an effective way to control token usage and
-    cost. Truncation will reduce the number of cached tokens on the next turn
-    (busting the cache), since messages are dropped from the beginning of the
-    context. However, clients can also configure truncation to retain messages up to
-    a fraction of the maximum context size, which will reduce the need for future
-    truncations and thus improve the cache rate. Truncation can be disabled
-    entirely, which means the server will never truncate but would instead return an
-    error if the conversation exceeds the model's input token limit.
+    truncation occurs.
+
+    Clients can configure truncation behavior to truncate with a lower max token
+    limit, which is an effective way to control token usage and cost.
+
+    Truncation will reduce the number of cached tokens on the next turn (busting the
+    cache), since messages are dropped from the beginning of the context. However,
+    clients can also configure truncation to retain messages up to a fraction of the
+    maximum context size, which will reduce the need for future truncations and thus
+    improve the cache rate.
+
+    Truncation can be disabled entirely, which means the server will never truncate
+    but would instead return an error if the conversation exceeds the model's input
+    token limit.
     """
src/openai/types/realtime/realtime_session_create_request_param.py
@@ -110,13 +110,18 @@ class RealtimeSessionCreateRequestParam(TypedDict, total=False):
     limit, the conversation be truncated, meaning messages (starting from the
     oldest) will not be included in the model's context. A 32k context model with
     4,096 max output tokens can only include 28,224 tokens in the context before
-    truncation occurs. Clients can configure truncation behavior to truncate with a
-    lower max token limit, which is an effective way to control token usage and
-    cost. Truncation will reduce the number of cached tokens on the next turn
-    (busting the cache), since messages are dropped from the beginning of the
-    context. However, clients can also configure truncation to retain messages up to
-    a fraction of the maximum context size, which will reduce the need for future
-    truncations and thus improve the cache rate. Truncation can be disabled
-    entirely, which means the server will never truncate but would instead return an
-    error if the conversation exceeds the model's input token limit.
+    truncation occurs.
+
+    Clients can configure truncation behavior to truncate with a lower max token
+    limit, which is an effective way to control token usage and cost.
+
+    Truncation will reduce the number of cached tokens on the next turn (busting the
+    cache), since messages are dropped from the beginning of the context. However,
+    clients can also configure truncation to retain messages up to a fraction of the
+    maximum context size, which will reduce the need for future truncations and thus
+    improve the cache rate.
+
+    Truncation can be disabled entirely, which means the server will never truncate
+    but would instead return an error if the conversation exceeds the model's input
+    token limit.
     """
src/openai/types/realtime/realtime_session_create_response.py
@@ -53,9 +53,14 @@ class AudioInputTurnDetectionServerVad(BaseModel):
     """Type of turn detection, `server_vad` to turn on simple Server VAD."""
 
     create_response: Optional[bool] = None
-    """
-    Whether or not to automatically generate a response when a VAD stop event
+    """Whether or not to automatically generate a response when a VAD stop event
     occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     idle_timeout_ms: Optional[int] = None
@@ -76,9 +81,13 @@ class AudioInputTurnDetectionServerVad(BaseModel):
 
     interrupt_response: Optional[bool] = None
     """
-    Whether or not to automatically interrupt any ongoing response with output to
-    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
-    occurs.
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     prefix_padding_ms: Optional[int] = None
@@ -463,13 +472,18 @@ class RealtimeSessionCreateResponse(BaseModel):
     limit, the conversation be truncated, meaning messages (starting from the
     oldest) will not be included in the model's context. A 32k context model with
     4,096 max output tokens can only include 28,224 tokens in the context before
-    truncation occurs. Clients can configure truncation behavior to truncate with a
-    lower max token limit, which is an effective way to control token usage and
-    cost. Truncation will reduce the number of cached tokens on the next turn
-    (busting the cache), since messages are dropped from the beginning of the
-    context. However, clients can also configure truncation to retain messages up to
-    a fraction of the maximum context size, which will reduce the need for future
-    truncations and thus improve the cache rate. Truncation can be disabled
-    entirely, which means the server will never truncate but would instead return an
-    error if the conversation exceeds the model's input token limit.
+    truncation occurs.
+
+    Clients can configure truncation behavior to truncate with a lower max token
+    limit, which is an effective way to control token usage and cost.
+
+    Truncation will reduce the number of cached tokens on the next turn (busting the
+    cache), since messages are dropped from the beginning of the context. However,
+    clients can also configure truncation to retain messages up to a fraction of the
+    maximum context size, which will reduce the need for future truncations and thus
+    improve the cache rate.
+
+    Truncation can be disabled entirely, which means the server will never truncate
+    but would instead return an error if the conversation exceeds the model's input
+    token limit.
     """
src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py
@@ -14,9 +14,14 @@ class ServerVad(BaseModel):
     """Type of turn detection, `server_vad` to turn on simple Server VAD."""
 
     create_response: Optional[bool] = None
-    """
-    Whether or not to automatically generate a response when a VAD stop event
+    """Whether or not to automatically generate a response when a VAD stop event
     occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     idle_timeout_ms: Optional[int] = None
@@ -37,9 +42,13 @@ class ServerVad(BaseModel):
 
     interrupt_response: Optional[bool] = None
     """
-    Whether or not to automatically interrupt any ongoing response with output to
-    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
-    occurs.
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     prefix_padding_ms: Optional[int] = None
src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py
@@ -13,9 +13,14 @@ class ServerVad(TypedDict, total=False):
     """Type of turn detection, `server_vad` to turn on simple Server VAD."""
 
     create_response: bool
-    """
-    Whether or not to automatically generate a response when a VAD stop event
+    """Whether or not to automatically generate a response when a VAD stop event
     occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     idle_timeout_ms: Optional[int]
@@ -36,9 +41,13 @@ class ServerVad(TypedDict, total=False):
 
     interrupt_response: bool
     """
-    Whether or not to automatically interrupt any ongoing response with output to
-    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
-    occurs.
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     prefix_padding_ms: int
src/openai/types/responses/__init__.py
@@ -28,6 +28,7 @@ from .file_search_tool import FileSearchTool as FileSearchTool
 from .custom_tool_param import CustomToolParam as CustomToolParam
 from .tool_choice_shell import ToolChoiceShell as ToolChoiceShell
 from .tool_choice_types import ToolChoiceTypes as ToolChoiceTypes
+from .compacted_response import CompactedResponse as CompactedResponse
 from .easy_input_message import EasyInputMessage as EasyInputMessage
 from .response_item_list import ResponseItemList as ResponseItemList
 from .tool_choice_custom import ToolChoiceCustom as ToolChoiceCustom
@@ -60,6 +61,7 @@ from .input_item_list_params import InputItemListParams as InputItemListParams
 from .response_create_params import ResponseCreateParams as ResponseCreateParams
 from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
 from .response_input_content import ResponseInputContent as ResponseInputContent
+from .response_compact_params import ResponseCompactParams as ResponseCompactParams
 from .response_output_message import ResponseOutputMessage as ResponseOutputMessage
 from .response_output_refusal import ResponseOutputRefusal as ResponseOutputRefusal
 from .response_reasoning_item import ResponseReasoningItem as ResponseReasoningItem
@@ -69,6 +71,7 @@ from .tool_choice_types_param import ToolChoiceTypesParam as ToolChoiceTypesPara
 from .web_search_preview_tool import WebSearchPreviewTool as WebSearchPreviewTool
 from .easy_input_message_param import EasyInputMessageParam as EasyInputMessageParam
 from .input_token_count_params import InputTokenCountParams as InputTokenCountParams
+from .response_compaction_item import ResponseCompactionItem as ResponseCompactionItem
 from .response_completed_event import ResponseCompletedEvent as ResponseCompletedEvent
 from .response_retrieve_params import ResponseRetrieveParams as ResponseRetrieveParams
 from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
@@ -108,6 +111,7 @@ from .response_reasoning_item_param import ResponseReasoningItemParam as Respons
 from .tool_choice_apply_patch_param import ToolChoiceApplyPatchParam as ToolChoiceApplyPatchParam
 from .web_search_preview_tool_param import WebSearchPreviewToolParam as WebSearchPreviewToolParam
 from .response_apply_patch_tool_call import ResponseApplyPatchToolCall as ResponseApplyPatchToolCall
+from .response_compaction_item_param import ResponseCompactionItemParam as ResponseCompactionItemParam
 from .response_file_search_tool_call import ResponseFileSearchToolCall as ResponseFileSearchToolCall
 from .response_mcp_call_failed_event import ResponseMcpCallFailedEvent as ResponseMcpCallFailedEvent
 from .response_custom_tool_call_param import ResponseCustomToolCallParam as ResponseCustomToolCallParam
@@ -133,6 +137,7 @@ from .response_input_message_content_list import ResponseInputMessageContentList
 from .response_mcp_call_in_progress_event import ResponseMcpCallInProgressEvent as ResponseMcpCallInProgressEvent
 from .response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent as ResponseReasoningTextDeltaEvent
 from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam as ResponseCompactionItemParamParam
 from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam as ResponseFileSearchToolCallParam
 from .response_mcp_list_tools_failed_event import ResponseMcpListToolsFailedEvent as ResponseMcpListToolsFailedEvent
 from .response_apply_patch_tool_call_output import ResponseApplyPatchToolCallOutput as ResponseApplyPatchToolCallOutput
src/openai/types/responses/compacted_response.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_usage import ResponseUsage
+from .response_output_item import ResponseOutputItem
+
+__all__ = ["CompactedResponse"]
+
+
+class CompactedResponse(BaseModel):
+    id: str
+    """The unique identifier for the compacted response."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the compacted conversation was created."""
+
+    object: Literal["response.compaction"]
+    """The object type. Always `response.compaction`."""
+
+    output: List[ResponseOutputItem]
+    """The compacted list of output items.
+
+    This is a list of all user messages, followed by a single compaction item.
+    """
+
+    usage: ResponseUsage
+    """
+    Token accounting for the compaction pass, including cached, reasoning, and total
+    tokens.
+    """
src/openai/types/responses/parsed_response.py
@@ -6,7 +6,6 @@ from typing_extensions import Annotated, TypeAlias
 from ..._utils import PropertyInfo
 from .response import Response
 from ..._models import GenericModel
-from ..._utils._transform import PropertyInfo
 from .response_output_item import (
     McpCall,
     McpListTools,
@@ -19,6 +18,7 @@ from .response_output_text import ResponseOutputText
 from .response_output_message import ResponseOutputMessage
 from .response_output_refusal import ResponseOutputRefusal
 from .response_reasoning_item import ResponseReasoningItem
+from .response_compaction_item import ResponseCompactionItem
 from .response_custom_tool_call import ResponseCustomToolCall
 from .response_computer_tool_call import ResponseComputerToolCall
 from .response_function_tool_call import ResponseFunctionToolCall
@@ -79,6 +79,7 @@ ParsedResponseOutputItem: TypeAlias = Annotated[
         McpListTools,
         ResponseCodeInterpreterToolCall,
         ResponseCustomToolCall,
+        ResponseCompactionItem,
         ResponseFunctionShellToolCall,
         ResponseFunctionShellToolCallOutput,
         ResponseApplyPatchToolCall,
src/openai/types/responses/response_compact_params.py
@@ -0,0 +1,126 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, TypedDict
+
+from .response_input_item_param import ResponseInputItemParam
+
+__all__ = ["ResponseCompactParams"]
+
+
+class ResponseCompactParams(TypedDict, total=False):
+    input: Union[str, Iterable[ResponseInputItemParam], None]
+    """Text, image, or file inputs to the model, used to generate a response"""
+
+    instructions: Optional[str]
+    """
+    A system (or developer) message inserted into the model's context. When used
+    along with `previous_response_id`, the instructions from a previous response
+    will not be carried over to the next response. This makes it simple to swap out
+    system (or developer) messages in new responses.
+    """
+
+    model: Union[
+        Literal[
+            "gpt-5.1",
+            "gpt-5.1-2025-11-13",
+            "gpt-5.1-codex",
+            "gpt-5.1-mini",
+            "gpt-5.1-chat-latest",
+            "gpt-5",
+            "gpt-5-mini",
+            "gpt-5-nano",
+            "gpt-5-2025-08-07",
+            "gpt-5-mini-2025-08-07",
+            "gpt-5-nano-2025-08-07",
+            "gpt-5-chat-latest",
+            "gpt-4.1",
+            "gpt-4.1-mini",
+            "gpt-4.1-nano",
+            "gpt-4.1-2025-04-14",
+            "gpt-4.1-mini-2025-04-14",
+            "gpt-4.1-nano-2025-04-14",
+            "o4-mini",
+            "o4-mini-2025-04-16",
+            "o3",
+            "o3-2025-04-16",
+            "o3-mini",
+            "o3-mini-2025-01-31",
+            "o1",
+            "o1-2024-12-17",
+            "o1-preview",
+            "o1-preview-2024-09-12",
+            "o1-mini",
+            "o1-mini-2024-09-12",
+            "gpt-4o",
+            "gpt-4o-2024-11-20",
+            "gpt-4o-2024-08-06",
+            "gpt-4o-2024-05-13",
+            "gpt-4o-audio-preview",
+            "gpt-4o-audio-preview-2024-10-01",
+            "gpt-4o-audio-preview-2024-12-17",
+            "gpt-4o-audio-preview-2025-06-03",
+            "gpt-4o-mini-audio-preview",
+            "gpt-4o-mini-audio-preview-2024-12-17",
+            "gpt-4o-search-preview",
+            "gpt-4o-mini-search-preview",
+            "gpt-4o-search-preview-2025-03-11",
+            "gpt-4o-mini-search-preview-2025-03-11",
+            "chatgpt-4o-latest",
+            "codex-mini-latest",
+            "gpt-4o-mini",
+            "gpt-4o-mini-2024-07-18",
+            "gpt-4-turbo",
+            "gpt-4-turbo-2024-04-09",
+            "gpt-4-0125-preview",
+            "gpt-4-turbo-preview",
+            "gpt-4-1106-preview",
+            "gpt-4-vision-preview",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-0613",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-0301",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0125",
+            "gpt-3.5-turbo-16k-0613",
+            "o1-pro",
+            "o1-pro-2025-03-19",
+            "o3-pro",
+            "o3-pro-2025-06-10",
+            "o3-deep-research",
+            "o3-deep-research-2025-06-26",
+            "o4-mini-deep-research",
+            "o4-mini-deep-research-2025-06-26",
+            "computer-use-preview",
+            "computer-use-preview-2025-03-11",
+            "gpt-5-codex",
+            "gpt-5-pro",
+            "gpt-5-pro-2025-10-06",
+            "gpt-5.1-codex-max",
+        ],
+        str,
+        None,
+    ]
+    """Model ID used to generate the response, like `gpt-5` or `o3`.
+
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    previous_response_id: Optional[str]
+    """The unique ID of the previous response to the model.
+
+    Use this to create multi-turn conversations. Learn more about
+    [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    Cannot be used in conjunction with `conversation`.
+    """
src/openai/types/responses/response_compaction_item.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompactionItem"]
+
+
+class ResponseCompactionItem(BaseModel):
+    id: str
+    """The unique ID of the compaction item."""
+
+    encrypted_content: str
+
+    type: Literal["compaction"]
+    """The type of the item. Always `compaction`."""
+
+    created_by: Optional[str] = None
src/openai/types/responses/response_compaction_item_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompactionItemParam"]
+
+
+class ResponseCompactionItemParam(BaseModel):
+    encrypted_content: str
+
+    type: Literal["compaction"]
+    """The type of the item. Always `compaction`."""
+
+    id: Optional[str] = None
+    """The ID of the compaction item."""
src/openai/types/responses/response_compaction_item_param_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseCompactionItemParamParam"]
+
+
+class ResponseCompactionItemParamParam(TypedDict, total=False):
+    encrypted_content: Required[str]
+
+    type: Required[Literal["compaction"]]
+    """The type of the item. Always `compaction`."""
+
+    id: Optional[str]
+    """The ID of the compaction item."""
src/openai/types/responses/response_function_shell_call_output_content.py
@@ -27,10 +27,10 @@ Outcome: TypeAlias = Annotated[Union[OutcomeTimeout, OutcomeExit], PropertyInfo(
 
 class ResponseFunctionShellCallOutputContent(BaseModel):
     outcome: Outcome
-    """The exit or timeout outcome associated with this chunk."""
+    """The exit or timeout outcome associated with this shell call."""
 
     stderr: str
-    """Captured stderr output for this chunk of the shell call."""
+    """Captured stderr output for the shell call."""
 
     stdout: str
-    """Captured stdout output for this chunk of the shell call."""
+    """Captured stdout output for the shell call."""
src/openai/types/responses/response_function_shell_call_output_content_param.py
@@ -26,10 +26,10 @@ Outcome: TypeAlias = Union[OutcomeTimeout, OutcomeExit]
 
 class ResponseFunctionShellCallOutputContentParam(TypedDict, total=False):
     outcome: Required[Outcome]
-    """The exit or timeout outcome associated with this chunk."""
+    """The exit or timeout outcome associated with this shell call."""
 
     stderr: Required[str]
-    """Captured stderr output for this chunk of the shell call."""
+    """Captured stderr output for the shell call."""
 
     stdout: Required[str]
-    """Captured stdout output for this chunk of the shell call."""
+    """Captured stdout output for the shell call."""
src/openai/types/responses/response_function_shell_tool_call.py
@@ -20,7 +20,7 @@ class Action(BaseModel):
 
 class ResponseFunctionShellToolCall(BaseModel):
     id: str
-    """The unique ID of the function shell tool call.
+    """The unique ID of the shell tool call.
 
     Populated when this item is returned via API.
     """
@@ -29,7 +29,7 @@ class ResponseFunctionShellToolCall(BaseModel):
     """The shell commands and limits that describe how to run the tool call."""
 
     call_id: str
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     status: Literal["in_progress", "completed", "incomplete"]
     """The status of the shell call.
src/openai/types/responses/response_input_item.py
@@ -12,6 +12,7 @@ from .response_custom_tool_call import ResponseCustomToolCall
 from .response_computer_tool_call import ResponseComputerToolCall
 from .response_function_tool_call import ResponseFunctionToolCall
 from .response_function_web_search import ResponseFunctionWebSearch
+from .response_compaction_item_param import ResponseCompactionItemParam
 from .response_file_search_tool_call import ResponseFileSearchToolCall
 from .response_custom_tool_call_output import ResponseCustomToolCallOutput
 from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
@@ -215,13 +216,13 @@ class ShellCall(BaseModel):
     """The shell commands and limits that describe how to run the tool call."""
 
     call_id: str
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     type: Literal["shell_call"]
-    """The type of the item. Always `function_shell_call`."""
+    """The type of the item. Always `shell_call`."""
 
     id: Optional[str] = None
-    """The unique ID of the function shell tool call.
+    """The unique ID of the shell tool call.
 
     Populated when this item is returned via API.
     """
@@ -235,7 +236,7 @@ class ShellCall(BaseModel):
 
 class ShellCallOutput(BaseModel):
     call_id: str
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     output: List[ResponseFunctionShellCallOutputContent]
     """
@@ -244,10 +245,10 @@ class ShellCallOutput(BaseModel):
     """
 
     type: Literal["shell_call_output"]
-    """The type of the item. Always `function_shell_call_output`."""
+    """The type of the item. Always `shell_call_output`."""
 
     id: Optional[str] = None
-    """The unique ID of the function shell tool call output.
+    """The unique ID of the shell tool call output.
 
     Populated when this item is returned via API.
     """
@@ -462,6 +463,7 @@ ResponseInputItem: TypeAlias = Annotated[
         ResponseFunctionToolCall,
         FunctionCallOutput,
         ResponseReasoningItem,
+        ResponseCompactionItemParam,
         ImageGenerationCall,
         ResponseCodeInterpreterToolCall,
         LocalShellCall,
src/openai/types/responses/response_input_item_param.py
@@ -13,6 +13,7 @@ from .response_custom_tool_call_param import ResponseCustomToolCallParam
 from .response_computer_tool_call_param import ResponseComputerToolCallParam
 from .response_function_tool_call_param import ResponseFunctionToolCallParam
 from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam
 from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
 from .response_custom_tool_call_output_param import ResponseCustomToolCallOutputParam
 from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam
@@ -216,13 +217,13 @@ class ShellCall(TypedDict, total=False):
     """The shell commands and limits that describe how to run the tool call."""
 
     call_id: Required[str]
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     type: Required[Literal["shell_call"]]
-    """The type of the item. Always `function_shell_call`."""
+    """The type of the item. Always `shell_call`."""
 
     id: Optional[str]
-    """The unique ID of the function shell tool call.
+    """The unique ID of the shell tool call.
 
     Populated when this item is returned via API.
     """
@@ -236,7 +237,7 @@ class ShellCall(TypedDict, total=False):
 
 class ShellCallOutput(TypedDict, total=False):
     call_id: Required[str]
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     output: Required[Iterable[ResponseFunctionShellCallOutputContentParam]]
     """
@@ -245,10 +246,10 @@ class ShellCallOutput(TypedDict, total=False):
     """
 
     type: Required[Literal["shell_call_output"]]
-    """The type of the item. Always `function_shell_call_output`."""
+    """The type of the item. Always `shell_call_output`."""
 
     id: Optional[str]
-    """The unique ID of the function shell tool call output.
+    """The unique ID of the shell tool call output.
 
     Populated when this item is returned via API.
     """
@@ -461,6 +462,7 @@ ResponseInputItemParam: TypeAlias = Union[
     ResponseFunctionToolCallParam,
     FunctionCallOutput,
     ResponseReasoningItemParam,
+    ResponseCompactionItemParamParam,
     ImageGenerationCall,
     ResponseCodeInterpreterToolCallParam,
     LocalShellCall,
src/openai/types/responses/response_input_param.py
@@ -13,6 +13,7 @@ from .response_custom_tool_call_param import ResponseCustomToolCallParam
 from .response_computer_tool_call_param import ResponseComputerToolCallParam
 from .response_function_tool_call_param import ResponseFunctionToolCallParam
 from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam
 from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
 from .response_custom_tool_call_output_param import ResponseCustomToolCallOutputParam
 from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam
@@ -217,13 +218,13 @@ class ShellCall(TypedDict, total=False):
     """The shell commands and limits that describe how to run the tool call."""
 
     call_id: Required[str]
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     type: Required[Literal["shell_call"]]
-    """The type of the item. Always `function_shell_call`."""
+    """The type of the item. Always `shell_call`."""
 
     id: Optional[str]
-    """The unique ID of the function shell tool call.
+    """The unique ID of the shell tool call.
 
     Populated when this item is returned via API.
     """
@@ -237,7 +238,7 @@ class ShellCall(TypedDict, total=False):
 
 class ShellCallOutput(TypedDict, total=False):
     call_id: Required[str]
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     output: Required[Iterable[ResponseFunctionShellCallOutputContentParam]]
     """
@@ -246,10 +247,10 @@ class ShellCallOutput(TypedDict, total=False):
     """
 
     type: Required[Literal["shell_call_output"]]
-    """The type of the item. Always `function_shell_call_output`."""
+    """The type of the item. Always `shell_call_output`."""
 
     id: Optional[str]
-    """The unique ID of the function shell tool call output.
+    """The unique ID of the shell tool call output.
 
     Populated when this item is returned via API.
     """
@@ -462,6 +463,7 @@ ResponseInputItemParam: TypeAlias = Union[
     ResponseFunctionToolCallParam,
     FunctionCallOutput,
     ResponseReasoningItemParam,
+    ResponseCompactionItemParamParam,
     ImageGenerationCall,
     ResponseCodeInterpreterToolCallParam,
     LocalShellCall,
src/openai/types/responses/response_output_item.py
@@ -7,6 +7,7 @@ from ..._utils import PropertyInfo
 from ..._models import BaseModel
 from .response_output_message import ResponseOutputMessage
 from .response_reasoning_item import ResponseReasoningItem
+from .response_compaction_item import ResponseCompactionItem
 from .response_custom_tool_call import ResponseCustomToolCall
 from .response_computer_tool_call import ResponseComputerToolCall
 from .response_function_tool_call import ResponseFunctionToolCall
@@ -173,6 +174,7 @@ ResponseOutputItem: TypeAlias = Annotated[
         ResponseFunctionWebSearch,
         ResponseComputerToolCall,
         ResponseReasoningItem,
+        ResponseCompactionItem,
         ImageGenerationCall,
         ResponseCodeInterpreterToolCall,
         LocalShellCall,
src/openai/types/responses/tool.py
@@ -174,7 +174,7 @@ class CodeInterpreter(BaseModel):
     """The code interpreter container.
 
     Can be a container ID or an object that specifies uploaded file IDs to make
-    available to your code.
+    available to your code, along with an optional `memory_limit` setting.
     """
 
     type: Literal["code_interpreter"]
src/openai/types/responses/tool_param.py
@@ -174,7 +174,7 @@ class CodeInterpreter(TypedDict, total=False):
     """The code interpreter container.
 
     Can be a container ID or an object that specifies uploaded file IDs to make
-    available to your code.
+    available to your code, along with an optional `memory_limit` setting.
     """
 
     type: Required[Literal["code_interpreter"]]
src/openai/types/shared/all_models.py
@@ -24,5 +24,6 @@ AllModels: TypeAlias = Union[
         "gpt-5-codex",
         "gpt-5-pro",
         "gpt-5-pro-2025-10-06",
+        "gpt-5.1-codex-max",
     ],
 ]
src/openai/types/shared/reasoning.py
@@ -14,9 +14,9 @@ class Reasoning(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -24,6 +24,7 @@ class Reasoning(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     generate_summary: Optional[Literal["auto", "concise", "detailed"]] = None
src/openai/types/shared/reasoning_effort.py
@@ -5,4 +5,4 @@ from typing_extensions import Literal, TypeAlias
 
 __all__ = ["ReasoningEffort"]
 
-ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high"]]
+ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]]
src/openai/types/shared/responses_model.py
@@ -24,5 +24,6 @@ ResponsesModel: TypeAlias = Union[
         "gpt-5-codex",
         "gpt-5-pro",
         "gpt-5-pro-2025-10-06",
+        "gpt-5.1-codex-max",
     ],
 ]
src/openai/types/shared_params/reasoning.py
@@ -15,9 +15,9 @@ class Reasoning(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -25,6 +25,7 @@ class Reasoning(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     generate_summary: Optional[Literal["auto", "concise", "detailed"]]
src/openai/types/shared_params/reasoning_effort.py
@@ -7,4 +7,4 @@ from typing_extensions import Literal, TypeAlias
 
 __all__ = ["ReasoningEffort"]
 
-ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high"]]
+ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]]
src/openai/types/shared_params/responses_model.py
@@ -26,5 +26,6 @@ ResponsesModel: TypeAlias = Union[
         "gpt-5-codex",
         "gpt-5-pro",
         "gpt-5-pro-2025-10-06",
+        "gpt-5.1-codex-max",
     ],
 ]
src/openai/types/container_create_params.py
@@ -19,6 +19,9 @@ class ContainerCreateParams(TypedDict, total=False):
     file_ids: SequenceNotStr[str]
     """IDs of files to copy to the container."""
 
+    memory_limit: Literal["1g", "4g", "16g", "64g"]
+    """Optional memory limit for the container. Defaults to "1g"."""
+
 
 class ExpiresAfter(TypedDict, total=False):
     anchor: Required[Literal["last_active_at"]]
src/openai/types/container_create_response.py
@@ -38,3 +38,9 @@ class ContainerCreateResponse(BaseModel):
     point for the expiration. The minutes is the number of minutes after the anchor
     before the container expires.
     """
+
+    last_active_at: Optional[int] = None
+    """Unix timestamp (in seconds) when the container was last active."""
+
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+    """The memory limit configured for the container."""
src/openai/types/container_list_response.py
@@ -38,3 +38,9 @@ class ContainerListResponse(BaseModel):
     point for the expiration. The minutes is the number of minutes after the anchor
     before the container expires.
     """
+
+    last_active_at: Optional[int] = None
+    """Unix timestamp (in seconds) when the container was last active."""
+
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+    """The memory limit configured for the container."""
src/openai/types/container_retrieve_response.py
@@ -38,3 +38,9 @@ class ContainerRetrieveResponse(BaseModel):
     point for the expiration. The minutes is the number of minutes after the anchor
     before the container expires.
     """
+
+    last_active_at: Optional[int] = None
+    """Unix timestamp (in seconds) when the container was last active."""
+
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+    """The memory limit configured for the container."""
src/openai/types/video_create_params.py
@@ -20,10 +20,16 @@ class VideoCreateParams(TypedDict, total=False):
     """Optional image reference that guides generation."""
 
     model: VideoModel
-    """The video generation model to use. Defaults to `sora-2`."""
+    """The video generation model to use (allowed values: sora-2, sora-2-pro).
+
+    Defaults to `sora-2`.
+    """
 
     seconds: VideoSeconds
-    """Clip duration in seconds. Defaults to 4 seconds."""
+    """Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds."""
 
     size: VideoSize
-    """Output resolution formatted as width x height. Defaults to 720x1280."""
+    """
+    Output resolution formatted as width x height (allowed values: 720x1280,
+    1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
+    """
tests/api_resources/test_containers.py
@@ -38,6 +38,7 @@ class TestContainers:
                 "minutes": 0,
             },
             file_ids=["string"],
+            memory_limit="1g",
         )
         assert_matches_type(ContainerCreateResponse, container, path=["response"])
 
@@ -197,6 +198,7 @@ class TestAsyncContainers:
                 "minutes": 0,
             },
             file_ids=["string"],
+            memory_limit="1g",
         )
         assert_matches_type(ContainerCreateResponse, container, path=["response"])
 
tests/api_resources/test_responses.py
@@ -12,6 +12,7 @@ from tests.utils import assert_matches_type
 from openai._utils import assert_signatures_in_sync
 from openai.types.responses import (
     Response,
+    CompactedResponse,
 )
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -36,7 +37,7 @@ class TestResponses:
             max_output_tokens=0,
             max_tool_calls=0,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="gpt-5.1",
             parallel_tool_calls=True,
             previous_response_id="previous_response_id",
             prompt={
@@ -117,7 +118,7 @@ class TestResponses:
             max_output_tokens=0,
             max_tool_calls=0,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="gpt-5.1",
             parallel_tool_calls=True,
             previous_response_id="previous_response_id",
             prompt={
@@ -358,6 +359,41 @@ class TestResponses:
                 "",
             )
 
+    @parametrize
+    def test_method_compact(self, client: OpenAI) -> None:
+        response = client.responses.compact()
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    def test_method_compact_with_all_params(self, client: OpenAI) -> None:
+        response = client.responses.compact(
+            input="string",
+            instructions="instructions",
+            model="gpt-5.1",
+            previous_response_id="resp_123",
+        )
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    def test_raw_response_compact(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.compact()
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    def test_streaming_response_compact(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.compact() as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert_matches_type(CompactedResponse, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
 
 @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
 def test_parse_method_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
@@ -391,7 +427,7 @@ class TestAsyncResponses:
             max_output_tokens=0,
             max_tool_calls=0,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="gpt-5.1",
             parallel_tool_calls=True,
             previous_response_id="previous_response_id",
             prompt={
@@ -472,7 +508,7 @@ class TestAsyncResponses:
             max_output_tokens=0,
             max_tool_calls=0,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="gpt-5.1",
             parallel_tool_calls=True,
             previous_response_id="previous_response_id",
             prompt={
@@ -712,3 +748,38 @@ class TestAsyncResponses:
             await async_client.responses.with_raw_response.cancel(
                 "",
             )
+
+    @parametrize
+    async def test_method_compact(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.compact()
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    async def test_method_compact_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.compact(
+            input="string",
+            instructions="instructions",
+            model="gpt-5.1",
+            previous_response_id="resp_123",
+        )
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    async def test_raw_response_compact(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.compact()
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_compact(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.compact() as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert_matches_type(CompactedResponse, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 136
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a7e92d12ebe89ca019a7ac5b29759064eefa2c38fe08d03516f2620e66abb32b.yml
-openapi_spec_hash: acbc703b2739447abc6312b2d753631c
-config_hash: b876221dfb213df9f0a999e75d38a65e
+configured_endpoints: 137
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-fe8a79e6fd407e6c9afec60971f03076b65f711ccd6ea16457933b0e24fb1f6d.yml
+openapi_spec_hash: 38c0a73f4e08843732c5f8002a809104
+config_hash: 2c350086d87a4b4532077363087840e7
api.md
@@ -733,6 +733,7 @@ Types:
 ```python
 from openai.types.responses import (
     ApplyPatchTool,
+    CompactedResponse,
     ComputerTool,
     CustomTool,
     EasyInputMessage,
@@ -752,6 +753,8 @@ from openai.types.responses import (
     ResponseCodeInterpreterCallInProgressEvent,
     ResponseCodeInterpreterCallInterpretingEvent,
     ResponseCodeInterpreterToolCall,
+    ResponseCompactionItem,
+    ResponseCompactionItemParam,
     ResponseCompletedEvent,
     ResponseComputerToolCall,
     ResponseComputerToolCallOutputItem,
@@ -861,6 +864,7 @@ Methods:
 - <code title="get /responses/{response_id}">client.responses.<a href="./src/openai/resources/responses/responses.py">retrieve</a>(response_id, \*\*<a href="src/openai/types/responses/response_retrieve_params.py">params</a>) -> <a href="./src/openai/types/responses/response.py">Response</a></code>
 - <code title="delete /responses/{response_id}">client.responses.<a href="./src/openai/resources/responses/responses.py">delete</a>(response_id) -> None</code>
 - <code title="post /responses/{response_id}/cancel">client.responses.<a href="./src/openai/resources/responses/responses.py">cancel</a>(response_id) -> <a href="./src/openai/types/responses/response.py">Response</a></code>
+- <code title="post /responses/compact">client.responses.<a href="./src/openai/resources/responses/responses.py">compact</a>(\*\*<a href="src/openai/types/responses/response_compact_params.py">params</a>) -> <a href="./src/openai/types/responses/compacted_response.py">CompactedResponse</a></code>
 
 ## InputItems
 
@@ -914,6 +918,7 @@ from openai.types.realtime import (
     InputAudioBufferClearedEvent,
     InputAudioBufferCommitEvent,
     InputAudioBufferCommittedEvent,
+    InputAudioBufferDtmfEventReceivedEvent,
     InputAudioBufferSpeechStartedEvent,
     InputAudioBufferSpeechStoppedEvent,
     InputAudioBufferTimeoutTriggered,