Commit `18e0b36a`

stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>

2025-06-27 00:56:28

release: 1.92.0 (#2424) tag: v1.92.0

* chore(tests): skip some failing tests on the latest python versions * chore(internal): add tests for breaking change detection * move over parse and stream methods out of beta * update docs * update tests * remove old beta files * fix relative import * fix(ci): release-doctor — report correct token name * feat(api): webhook and deep research support * release: 1.92.0 --------- Co-authored-by: stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com> Co-authored-by: David Meadows <dmeadows@stainless.com>

main

1 parent 0673da6

Changed files (66)

bin

check-release-environment

examples

parsing.py

parsing_stream.py

parsing_tools.py

parsing_tools_stream.py

src

openai

lib

streaming

chat

_completions.py

azure.py

resources

beta

chat

chat

completions

completions.py

responses

responses.py

webhooks.py

types

chat

chat_completion.py

chat_completion_chunk.py

completion_create_params.py

responses

__init__.py

response.py

response_create_params.py

response_function_web_search.py

response_function_web_search_param.py

response_includable.py

tool_choice_mcp.py

tool_choice_mcp_param.py

tool_choice_types.py

tool_choice_types_param.py

shared

shared_params

webhooks

batch_cancelled_webhook_event.py

batch_completed_webhook_event.py

batch_expired_webhook_event.py

batch_failed_webhook_event.py

eval_run_canceled_webhook_event.py

eval_run_failed_webhook_event.py

eval_run_succeeded_webhook_event.py

fine_tuning_job_cancelled_webhook_event.py

fine_tuning_job_failed_webhook_event.py

fine_tuning_job_succeeded_webhook_event.py

response_cancelled_webhook_event.py

response_completed_webhook_event.py

response_failed_webhook_event.py

response_incomplete_webhook_event.py

unwrap_webhook_event.py

tests

api_resources

responses

test_input_items.py

test_responses.py

test_webhooks.py

lib

chat

test_completions.py

test_completions_streaming.py

test_client.py

test_module_client.py

.release-please-manifest.json

@@ -7,7 +7,7 @@ if [ -z "${STAINLESS_API_KEY}" ]; then
 fi
 
 if [ -z "${PYPI_TOKEN}" ]; then
-  errors+=("The OPENAI_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
+  errors+=("The PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
 fi
 
 lenErrors=${#errors[@]}

@@ -18,7 +18,7 @@ class MathResponse(BaseModel):
 
 client = OpenAI()
 
-completion = client.beta.chat.completions.parse(
+completion = client.chat.completions.parse(
     model="gpt-4o-2024-08-06",
     messages=[
         {"role": "system", "content": "You are a helpful math tutor."},

@@ -18,7 +18,7 @@ class MathResponse(BaseModel):
 
 client = OpenAI()
 
-with client.beta.chat.completions.stream(
+with client.chat.completions.stream(
     model="gpt-4o-2024-08-06",
     messages=[
         {"role": "system", "content": "You are a helpful math tutor."},

@@ -57,7 +57,7 @@ class Query(BaseModel):
 
 client = OpenAI()
 
-completion = client.beta.chat.completions.parse(
+completion = client.chat.completions.parse(
     model="gpt-4o-2024-08-06",
     messages=[
         {

@@ -15,7 +15,7 @@ class GetWeather(BaseModel):
 client = OpenAI()
 
 
-with client.beta.chat.completions.stream(
+with client.chat.completions.stream(
     model="gpt-4o-2024-08-06",
     messages=[
         {

@@ -128,7 +128,7 @@ class ChatCompletionStreamManager(Generic[ResponseFormatT]):
 
     Usage:
     ```py
-    with client.beta.chat.completions.stream(...) as stream:
+    with client.chat.completions.stream(...) as stream:
         for event in stream:
             ...
     ```
@@ -251,7 +251,7 @@ class AsyncChatCompletionStreamManager(Generic[ResponseFormatT]):
 
     Usage:
     ```py
-    async with client.beta.chat.completions.stream(...) as stream:
+    async with client.chat.completions.stream(...) as stream:
         for event in stream:
             ...
     ```

@@ -98,6 +98,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
         organization: str | None = None,
+        webhook_secret: str | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -117,6 +118,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
         organization: str | None = None,
+        webhook_secret: str | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -136,6 +138,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
         organization: str | None = None,
+        webhook_secret: str | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -156,6 +159,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
         azure_ad_token_provider: AzureADTokenProvider | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         base_url: str | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
@@ -234,6 +238,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
             api_key=api_key,
             organization=organization,
             project=project,
+            webhook_secret=webhook_secret,
             base_url=base_url,
             timeout=timeout,
             max_retries=max_retries,
@@ -256,6 +261,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         api_version: str | None = None,
         azure_ad_token: str | None = None,
@@ -277,6 +283,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
             api_key=api_key,
             organization=organization,
             project=project,
+            webhook_secret=webhook_secret,
             websocket_base_url=websocket_base_url,
             base_url=base_url,
             timeout=timeout,
@@ -370,6 +377,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -390,6 +398,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -410,6 +419,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -430,6 +440,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
         base_url: str | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
@@ -508,6 +519,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
             api_key=api_key,
             organization=organization,
             project=project,
+            webhook_secret=webhook_secret,
             base_url=base_url,
             timeout=timeout,
             max_retries=max_retries,
@@ -530,6 +542,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         api_version: str | None = None,
         azure_ad_token: str | None = None,
@@ -551,6 +564,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
             api_key=api_key,
             organization=organization,
             project=project,
+            webhook_secret=webhook_secret,
             websocket_base_url=websocket_base_url,
             base_url=base_url,
             timeout=timeout,

@@ -1,11 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .chat import Chat, AsyncChat
-from .completions import Completions, AsyncCompletions
-
-__all__ = [
-    "Completions",
-    "AsyncCompletions",
-    "Chat",
-    "AsyncChat",
-]

@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from ...._compat import cached_property
-from .completions import Completions, AsyncCompletions
-from ...._resource import SyncAPIResource, AsyncAPIResource
-
-__all__ = ["Chat", "AsyncChat"]
-
-
-class Chat(SyncAPIResource):
-    @cached_property
-    def completions(self) -> Completions:
-        return Completions(self._client)
-
-
-class AsyncChat(AsyncAPIResource):
-    @cached_property
-    def completions(self) -> AsyncCompletions:
-        return AsyncCompletions(self._client)

@@ -1,634 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Type, Union, Iterable, Optional, cast
-from functools import partial
-from typing_extensions import Literal
-
-import httpx
-
-from .... import _legacy_response
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import maybe_transform, async_maybe_transform
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...._streaming import Stream
-from ....types.chat import completion_create_params
-from ...._base_client import make_request_options
-from ....lib._parsing import (
-    ResponseFormatT,
-    validate_input_tools as _validate_input_tools,
-    parse_chat_completion as _parse_chat_completion,
-    type_to_response_format_param as _type_to_response_format,
-)
-from ....types.chat_model import ChatModel
-from ....lib.streaming.chat import ChatCompletionStreamManager, AsyncChatCompletionStreamManager
-from ....types.shared_params import Metadata, ReasoningEffort
-from ....types.chat.chat_completion import ChatCompletion
-from ....types.chat.chat_completion_chunk import ChatCompletionChunk
-from ....types.chat.parsed_chat_completion import ParsedChatCompletion
-from ....types.chat.chat_completion_tool_param import ChatCompletionToolParam
-from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
-from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam
-from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
-from ....types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
-from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
-
-__all__ = ["Completions", "AsyncCompletions"]
-
-
-class Completions(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> CompletionsWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return the
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
-        """
-        return CompletionsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
-        """
-        return CompletionsWithStreamingResponse(self)
-
-    def parse(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
-        response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
-        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        store: Optional[bool] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ParsedChatCompletion[ResponseFormatT]:
-        """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
-        & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
-
-        You can pass a pydantic model to this method and it will automatically convert the model
-        into a JSON schema, send it to the API and parse the response content back into the given model.
-
-        This method will also automatically parse `function` tool calls if:
-        - You use the `openai.pydantic_function_tool()` helper method
-        - You mark your tool schema with `"strict": True`
-
-        Example usage:
-        ```py
-        from pydantic import BaseModel
-        from openai import OpenAI
-
-
-        class Step(BaseModel):
-            explanation: str
-            output: str
-
-
-        class MathResponse(BaseModel):
-            steps: List[Step]
-            final_answer: str
-
-
-        client = OpenAI()
-        completion = client.beta.chat.completions.parse(
-            model="gpt-4o-2024-08-06",
-            messages=[
-                {"role": "system", "content": "You are a helpful math tutor."},
-                {"role": "user", "content": "solve 8x + 31 = 2"},
-            ],
-            response_format=MathResponse,
-        )
-
-        message = completion.choices[0].message
-        if message.parsed:
-            print(message.parsed.steps)
-            print("answer: ", message.parsed.final_answer)
-        ```
-        """
-        _validate_input_tools(tools)
-
-        extra_headers = {
-            "X-Stainless-Helper-Method": "beta.chat.completions.parse",
-            **(extra_headers or {}),
-        }
-
-        def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
-            return _parse_chat_completion(
-                response_format=response_format,
-                chat_completion=raw_completion,
-                input_tools=tools,
-            )
-
-        return self._post(
-            "/chat/completions",
-            body=maybe_transform(
-                {
-                    "messages": messages,
-                    "model": model,
-                    "audio": audio,
-                    "frequency_penalty": frequency_penalty,
-                    "function_call": function_call,
-                    "functions": functions,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_tokens": max_tokens,
-                    "metadata": metadata,
-                    "modalities": modalities,
-                    "n": n,
-                    "parallel_tool_calls": parallel_tool_calls,
-                    "prediction": prediction,
-                    "presence_penalty": presence_penalty,
-                    "reasoning_effort": reasoning_effort,
-                    "response_format": _type_to_response_format(response_format),
-                    "seed": seed,
-                    "service_tier": service_tier,
-                    "stop": stop,
-                    "store": store,
-                    "stream": False,
-                    "stream_options": stream_options,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_logprobs": top_logprobs,
-                    "top_p": top_p,
-                    "user": user,
-                    "web_search_options": web_search_options,
-                },
-                completion_create_params.CompletionCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=parser,
-            ),
-            # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
-            # in the `parser` function above
-            cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
-            stream=False,
-        )
-
-    def stream(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
-        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        store: Optional[bool] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletionStreamManager[ResponseFormatT]:
-        """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
-        and automatic accumulation of each delta.
-
-        This also supports all of the parsing utilities that `.parse()` does.
-
-        Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
-
-        ```py
-        with client.beta.chat.completions.stream(
-            model="gpt-4o-2024-08-06",
-            messages=[...],
-        ) as stream:
-            for event in stream:
-                if event.type == "content.delta":
-                    print(event.delta, flush=True, end="")
-        ```
-
-        When the context manager is entered, a `ChatCompletionStream` instance is returned which, like `.create(stream=True)` is an iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
-
-        When the context manager exits, the response will be closed, however the `stream` instance is still available outside
-        the context manager.
-        """
-        extra_headers = {
-            "X-Stainless-Helper-Method": "beta.chat.completions.stream",
-            **(extra_headers or {}),
-        }
-
-        api_request: partial[Stream[ChatCompletionChunk]] = partial(
-            self._client.chat.completions.create,
-            messages=messages,
-            model=model,
-            audio=audio,
-            stream=True,
-            response_format=_type_to_response_format(response_format),
-            frequency_penalty=frequency_penalty,
-            function_call=function_call,
-            functions=functions,
-            logit_bias=logit_bias,
-            logprobs=logprobs,
-            max_completion_tokens=max_completion_tokens,
-            max_tokens=max_tokens,
-            metadata=metadata,
-            modalities=modalities,
-            n=n,
-            parallel_tool_calls=parallel_tool_calls,
-            prediction=prediction,
-            presence_penalty=presence_penalty,
-            reasoning_effort=reasoning_effort,
-            seed=seed,
-            service_tier=service_tier,
-            store=store,
-            stop=stop,
-            stream_options=stream_options,
-            temperature=temperature,
-            tool_choice=tool_choice,
-            tools=tools,
-            top_logprobs=top_logprobs,
-            top_p=top_p,
-            user=user,
-            web_search_options=web_search_options,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return ChatCompletionStreamManager(
-            api_request,
-            response_format=response_format,
-            input_tools=tools,
-        )
-
-
-class AsyncCompletions(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return the
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncCompletionsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
-        """
-        return AsyncCompletionsWithStreamingResponse(self)
-
-    async def parse(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
-        response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
-        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        store: Optional[bool] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ParsedChatCompletion[ResponseFormatT]:
-        """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
-        & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
-
-        You can pass a pydantic model to this method and it will automatically convert the model
-        into a JSON schema, send it to the API and parse the response content back into the given model.
-
-        This method will also automatically parse `function` tool calls if:
-        - You use the `openai.pydantic_function_tool()` helper method
-        - You mark your tool schema with `"strict": True`
-
-        Example usage:
-        ```py
-        from pydantic import BaseModel
-        from openai import AsyncOpenAI
-
-
-        class Step(BaseModel):
-            explanation: str
-            output: str
-
-
-        class MathResponse(BaseModel):
-            steps: List[Step]
-            final_answer: str
-
-
-        client = AsyncOpenAI()
-        completion = await client.beta.chat.completions.parse(
-            model="gpt-4o-2024-08-06",
-            messages=[
-                {"role": "system", "content": "You are a helpful math tutor."},
-                {"role": "user", "content": "solve 8x + 31 = 2"},
-            ],
-            response_format=MathResponse,
-        )
-
-        message = completion.choices[0].message
-        if message.parsed:
-            print(message.parsed.steps)
-            print("answer: ", message.parsed.final_answer)
-        ```
-        """
-        _validate_input_tools(tools)
-
-        extra_headers = {
-            "X-Stainless-Helper-Method": "beta.chat.completions.parse",
-            **(extra_headers or {}),
-        }
-
-        def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
-            return _parse_chat_completion(
-                response_format=response_format,
-                chat_completion=raw_completion,
-                input_tools=tools,
-            )
-
-        return await self._post(
-            "/chat/completions",
-            body=await async_maybe_transform(
-                {
-                    "messages": messages,
-                    "model": model,
-                    "audio": audio,
-                    "frequency_penalty": frequency_penalty,
-                    "function_call": function_call,
-                    "functions": functions,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_tokens": max_tokens,
-                    "metadata": metadata,
-                    "modalities": modalities,
-                    "n": n,
-                    "parallel_tool_calls": parallel_tool_calls,
-                    "prediction": prediction,
-                    "presence_penalty": presence_penalty,
-                    "reasoning_effort": reasoning_effort,
-                    "response_format": _type_to_response_format(response_format),
-                    "seed": seed,
-                    "service_tier": service_tier,
-                    "store": store,
-                    "stop": stop,
-                    "stream": False,
-                    "stream_options": stream_options,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_logprobs": top_logprobs,
-                    "top_p": top_p,
-                    "user": user,
-                    "web_search_options": web_search_options,
-                },
-                completion_create_params.CompletionCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=parser,
-            ),
-            # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
-            # in the `parser` function above
-            cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
-            stream=False,
-        )
-
-    def stream(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
-        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        store: Optional[bool] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncChatCompletionStreamManager[ResponseFormatT]:
-        """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
-        and automatic accumulation of each delta.
-
-        This also supports all of the parsing utilities that `.parse()` does.
-
-        Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
-
-        ```py
-        async with client.beta.chat.completions.stream(
-            model="gpt-4o-2024-08-06",
-            messages=[...],
-        ) as stream:
-            async for event in stream:
-                if event.type == "content.delta":
-                    print(event.delta, flush=True, end="")
-        ```
-
-        When the context manager is entered, an `AsyncChatCompletionStream` instance is returned which, like `.create(stream=True)` is an async iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
-
-        When the context manager exits, the response will be closed, however the `stream` instance is still available outside
-        the context manager.
-        """
-        _validate_input_tools(tools)
-
-        extra_headers = {
-            "X-Stainless-Helper-Method": "beta.chat.completions.stream",
-            **(extra_headers or {}),
-        }
-
-        api_request = self._client.chat.completions.create(
-            messages=messages,
-            model=model,
-            audio=audio,
-            stream=True,
-            response_format=_type_to_response_format(response_format),
-            frequency_penalty=frequency_penalty,
-            function_call=function_call,
-            functions=functions,
-            logit_bias=logit_bias,
-            logprobs=logprobs,
-            max_completion_tokens=max_completion_tokens,
-            max_tokens=max_tokens,
-            metadata=metadata,
-            modalities=modalities,
-            n=n,
-            parallel_tool_calls=parallel_tool_calls,
-            prediction=prediction,
-            presence_penalty=presence_penalty,
-            reasoning_effort=reasoning_effort,
-            seed=seed,
-            service_tier=service_tier,
-            stop=stop,
-            store=store,
-            stream_options=stream_options,
-            temperature=temperature,
-            tool_choice=tool_choice,
-            tools=tools,
-            top_logprobs=top_logprobs,
-            top_p=top_p,
-            user=user,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-            web_search_options=web_search_options,
-        )
-        return AsyncChatCompletionStreamManager(
-            api_request,
-            response_format=response_format,
-            input_tools=tools,
-        )
-
-
-class CompletionsWithRawResponse:
-    def __init__(self, completions: Completions) -> None:
-        self._completions = completions
-
-        self.parse = _legacy_response.to_raw_response_wrapper(
-            completions.parse,
-        )
-
-
-class AsyncCompletionsWithRawResponse:
-    def __init__(self, completions: AsyncCompletions) -> None:
-        self._completions = completions
-
-        self.parse = _legacy_response.async_to_raw_response_wrapper(
-            completions.parse,
-        )
-
-
-class CompletionsWithStreamingResponse:
-    def __init__(self, completions: Completions) -> None:
-        self._completions = completions
-
-        self.parse = to_streamed_response_wrapper(
-            completions.parse,
-        )
-
-
-class AsyncCompletionsWithStreamingResponse:
-    def __init__(self, completions: AsyncCompletions) -> None:
-        self._completions = completions
-
-        self.parse = async_to_streamed_response_wrapper(
-            completions.parse,
-        )

@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 from ..._compat import cached_property
-from .chat.chat import Chat, AsyncChat
 from .assistants import (
     Assistants,
     AsyncAssistants,
@@ -21,6 +20,7 @@ from .threads.threads import (
     ThreadsWithStreamingResponse,
     AsyncThreadsWithStreamingResponse,
 )
+from ...resources.chat import Chat, AsyncChat
 from .realtime.realtime import (
     Realtime,
     AsyncRealtime,

@@ -3,7 +3,8 @@
 from __future__ import annotations
 
 import inspect
-from typing import Dict, List, Union, Iterable, Optional
+from typing import Dict, List, Type, Union, Iterable, Optional, cast
+from functools import partial
 from typing_extensions import Literal, overload
 
 import httpx
@@ -32,11 +33,19 @@ from ....types.chat import (
     completion_update_params,
 )
 from ...._base_client import AsyncPaginator, make_request_options
+from ....lib._parsing import (
+    ResponseFormatT,
+    validate_input_tools as _validate_input_tools,
+    parse_chat_completion as _parse_chat_completion,
+    type_to_response_format_param as _type_to_response_format,
+)
+from ....lib.streaming.chat import ChatCompletionStreamManager, AsyncChatCompletionStreamManager
 from ....types.shared.chat_model import ChatModel
 from ....types.chat.chat_completion import ChatCompletion
 from ....types.shared_params.metadata import Metadata
 from ....types.shared.reasoning_effort import ReasoningEffort
 from ....types.chat.chat_completion_chunk import ChatCompletionChunk
+from ....types.chat.parsed_chat_completion import ParsedChatCompletion
 from ....types.chat.chat_completion_deleted import ChatCompletionDeleted
 from ....types.chat.chat_completion_tool_param import ChatCompletionToolParam
 from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
@@ -72,6 +81,153 @@ class Completions(SyncAPIResource):
         """
         return CompletionsWithStreamingResponse(self)
 
+    def parse(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedChatCompletion[ResponseFormatT]:
+        """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
+        & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
+
+        You can pass a pydantic model to this method and it will automatically convert the model
+        into a JSON schema, send it to the API and parse the response content back into the given model.
+
+        This method will also automatically parse `function` tool calls if:
+        - You use the `openai.pydantic_function_tool()` helper method
+        - You mark your tool schema with `"strict": True`
+
+        Example usage:
+        ```py
+        from pydantic import BaseModel
+        from openai import OpenAI
+
+
+        class Step(BaseModel):
+            explanation: str
+            output: str
+
+
+        class MathResponse(BaseModel):
+            steps: List[Step]
+            final_answer: str
+
+
+        client = OpenAI()
+        completion = client.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {"role": "system", "content": "You are a helpful math tutor."},
+                {"role": "user", "content": "solve 8x + 31 = 2"},
+            ],
+            response_format=MathResponse,
+        )
+
+        message = completion.choices[0].message
+        if message.parsed:
+            print(message.parsed.steps)
+            print("answer: ", message.parsed.final_answer)
+        ```
+        """
+        _validate_input_tools(tools)
+
+        extra_headers = {
+            "X-Stainless-Helper-Method": "chat.completions.parse",
+            **(extra_headers or {}),
+        }
+
+        def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
+            return _parse_chat_completion(
+                response_format=response_format,
+                chat_completion=raw_completion,
+                input_tools=tools,
+            )
+
+        return self._post(
+            "/chat/completions",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": _type_to_response_format(response_format),
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "stop": stop,
+                    "store": store,
+                    "stream": False,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
+            stream=False,
+        )
+
     @overload
     def create(
         self,
@@ -95,7 +251,7 @@ class Completions(SyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -256,23 +412,23 @@ class Completions(SyncAPIResource):
               should refer to the `system_fingerprint` response parameter to monitor changes
               in the backend.
 
-          service_tier: Specifies the latency tier to use for processing the request. This parameter is
-              relevant for customers subscribed to the scale tier service:
-
-              - If set to 'auto', and the Project is Scale tier enabled, the system will
-                utilize scale tier credits until they are exhausted.
-              - If set to 'auto', and the Project is not Scale tier enabled, the request will
-                be processed using the default service tier with a lower uptime SLA and no
-                latency guarantee.
-              - If set to 'default', the request will be processed using the default service
-                tier with a lower uptime SLA and no latency guarantee.
-              - If set to 'flex', the request will be processed with the Flex Processing
-                service tier.
-                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
 
           stop: Not supported with latest reasoning models `o3` and `o4-mini`.
 
@@ -283,6 +439,8 @@ class Completions(SyncAPIResource):
               our [model distillation](https://platform.openai.com/docs/guides/distillation)
               or [evals](https://platform.openai.com/docs/guides/evals) products.
 
+              Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
           stream: If set to true, the model response data will be streamed to the client as it is
               generated using
               [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
@@ -365,7 +523,7 @@ class Completions(SyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -534,23 +692,23 @@ class Completions(SyncAPIResource):
               should refer to the `system_fingerprint` response parameter to monitor changes
               in the backend.
 
-          service_tier: Specifies the latency tier to use for processing the request. This parameter is
-              relevant for customers subscribed to the scale tier service:
-
-              - If set to 'auto', and the Project is Scale tier enabled, the system will
-                utilize scale tier credits until they are exhausted.
-              - If set to 'auto', and the Project is not Scale tier enabled, the request will
-                be processed using the default service tier with a lower uptime SLA and no
-                latency guarantee.
-              - If set to 'default', the request will be processed using the default service
-                tier with a lower uptime SLA and no latency guarantee.
-              - If set to 'flex', the request will be processed with the Flex Processing
-                service tier.
-                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
 
           stop: Not supported with latest reasoning models `o3` and `o4-mini`.
 
@@ -561,6 +719,8 @@ class Completions(SyncAPIResource):
               our [model distillation](https://platform.openai.com/docs/guides/distillation)
               or [evals](https://platform.openai.com/docs/guides/evals) products.
 
+              Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -634,7 +794,7 @@ class Completions(SyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -803,23 +963,23 @@ class Completions(SyncAPIResource):
               should refer to the `system_fingerprint` response parameter to monitor changes
               in the backend.
 
-          service_tier: Specifies the latency tier to use for processing the request. This parameter is
-              relevant for customers subscribed to the scale tier service:
-
-              - If set to 'auto', and the Project is Scale tier enabled, the system will
-                utilize scale tier credits until they are exhausted.
-              - If set to 'auto', and the Project is not Scale tier enabled, the request will
-                be processed using the default service tier with a lower uptime SLA and no
-                latency guarantee.
-              - If set to 'default', the request will be processed using the default service
-                tier with a lower uptime SLA and no latency guarantee.
-              - If set to 'flex', the request will be processed with the Flex Processing
-                service tier.
-                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
 
           stop: Not supported with latest reasoning models `o3` and `o4-mini`.
 
@@ -830,6 +990,8 @@ class Completions(SyncAPIResource):
               our [model distillation](https://platform.openai.com/docs/guides/distillation)
               or [evals](https://platform.openai.com/docs/guides/evals) products.
 
+              Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -902,7 +1064,7 @@ class Completions(SyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
@@ -1150,6 +1312,117 @@ class Completions(SyncAPIResource):
             cast_to=ChatCompletionDeleted,
         )
 
+    def stream(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletionStreamManager[ResponseFormatT]:
+        """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
+        and automatic accumulation of each delta.
+
+        This also supports all of the parsing utilities that `.parse()` does.
+
+        Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
+
+        ```py
+        with client.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[...],
+        ) as stream:
+            for event in stream:
+                if event.type == "content.delta":
+                    print(event.delta, flush=True, end="")
+        ```
+
+        When the context manager is entered, a `ChatCompletionStream` instance is returned which, like `.create(stream=True)` is an iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
+
+        When the context manager exits, the response will be closed, however the `stream` instance is still available outside
+        the context manager.
+        """
+        extra_headers = {
+            "X-Stainless-Helper-Method": "chat.completions.stream",
+            **(extra_headers or {}),
+        }
+
+        api_request: partial[Stream[ChatCompletionChunk]] = partial(
+            self.create,
+            messages=messages,
+            model=model,
+            audio=audio,
+            stream=True,
+            response_format=_type_to_response_format(response_format),
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            metadata=metadata,
+            modalities=modalities,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            prediction=prediction,
+            presence_penalty=presence_penalty,
+            reasoning_effort=reasoning_effort,
+            seed=seed,
+            service_tier=service_tier,
+            store=store,
+            stop=stop,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+            web_search_options=web_search_options,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return ChatCompletionStreamManager(
+            api_request,
+            response_format=response_format,
+            input_tools=tools,
+        )
+
 
 class AsyncCompletions(AsyncAPIResource):
     @cached_property
@@ -1175,6 +1448,153 @@ class AsyncCompletions(AsyncAPIResource):
         """
         return AsyncCompletionsWithStreamingResponse(self)
 
+    async def parse(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedChatCompletion[ResponseFormatT]:
+        """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
+        & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
+
+        You can pass a pydantic model to this method and it will automatically convert the model
+        into a JSON schema, send it to the API and parse the response content back into the given model.
+
+        This method will also automatically parse `function` tool calls if:
+        - You use the `openai.pydantic_function_tool()` helper method
+        - You mark your tool schema with `"strict": True`
+
+        Example usage:
+        ```py
+        from pydantic import BaseModel
+        from openai import AsyncOpenAI
+
+
+        class Step(BaseModel):
+            explanation: str
+            output: str
+
+
+        class MathResponse(BaseModel):
+            steps: List[Step]
+            final_answer: str
+
+
+        client = AsyncOpenAI()
+        completion = await client.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {"role": "system", "content": "You are a helpful math tutor."},
+                {"role": "user", "content": "solve 8x + 31 = 2"},
+            ],
+            response_format=MathResponse,
+        )
+
+        message = completion.choices[0].message
+        if message.parsed:
+            print(message.parsed.steps)
+            print("answer: ", message.parsed.final_answer)
+        ```
+        """
+        _validate_input_tools(tools)
+
+        extra_headers = {
+            "X-Stainless-Helper-Method": "chat.completions.parse",
+            **(extra_headers or {}),
+        }
+
+        def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
+            return _parse_chat_completion(
+                response_format=response_format,
+                chat_completion=raw_completion,
+                input_tools=tools,
+            )
+
+        return await self._post(
+            "/chat/completions",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": _type_to_response_format(response_format),
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "store": store,
+                    "stop": stop,
+                    "stream": False,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
+            stream=False,
+        )
+
     @overload
     async def create(
         self,
@@ -1198,7 +1618,7 @@ class AsyncCompletions(AsyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -1359,23 +1779,23 @@ class AsyncCompletions(AsyncAPIResource):
               should refer to the `system_fingerprint` response parameter to monitor changes
               in the backend.
 
-          service_tier: Specifies the latency tier to use for processing the request. This parameter is
-              relevant for customers subscribed to the scale tier service:
-
-              - If set to 'auto', and the Project is Scale tier enabled, the system will
-                utilize scale tier credits until they are exhausted.
-              - If set to 'auto', and the Project is not Scale tier enabled, the request will
-                be processed using the default service tier with a lower uptime SLA and no
-                latency guarantee.
-              - If set to 'default', the request will be processed using the default service
-                tier with a lower uptime SLA and no latency guarantee.
-              - If set to 'flex', the request will be processed with the Flex Processing
-                service tier.
-                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
 
           stop: Not supported with latest reasoning models `o3` and `o4-mini`.
 
@@ -1386,6 +1806,8 @@ class AsyncCompletions(AsyncAPIResource):
               our [model distillation](https://platform.openai.com/docs/guides/distillation)
               or [evals](https://platform.openai.com/docs/guides/evals) products.
 
+              Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
           stream: If set to true, the model response data will be streamed to the client as it is
               generated using
               [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
@@ -1468,7 +1890,7 @@ class AsyncCompletions(AsyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1637,23 +2059,23 @@ class AsyncCompletions(AsyncAPIResource):
               should refer to the `system_fingerprint` response parameter to monitor changes
               in the backend.
 
-          service_tier: Specifies the latency tier to use for processing the request. This parameter is
-              relevant for customers subscribed to the scale tier service:
-
-              - If set to 'auto', and the Project is Scale tier enabled, the system will
-                utilize scale tier credits until they are exhausted.
-              - If set to 'auto', and the Project is not Scale tier enabled, the request will
-                be processed using the default service tier with a lower uptime SLA and no
-                latency guarantee.
-              - If set to 'default', the request will be processed using the default service
-                tier with a lower uptime SLA and no latency guarantee.
-              - If set to 'flex', the request will be processed with the Flex Processing
-                service tier.
-                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
 
           stop: Not supported with latest reasoning models `o3` and `o4-mini`.
 
@@ -1664,6 +2086,8 @@ class AsyncCompletions(AsyncAPIResource):
               our [model distillation](https://platform.openai.com/docs/guides/distillation)
               or [evals](https://platform.openai.com/docs/guides/evals) products.
 
+              Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -1737,7 +2161,7 @@ class AsyncCompletions(AsyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1906,23 +2330,23 @@ class AsyncCompletions(AsyncAPIResource):
               should refer to the `system_fingerprint` response parameter to monitor changes
               in the backend.
 
-          service_tier: Specifies the latency tier to use for processing the request. This parameter is
-              relevant for customers subscribed to the scale tier service:
-
-              - If set to 'auto', and the Project is Scale tier enabled, the system will
-                utilize scale tier credits until they are exhausted.
-              - If set to 'auto', and the Project is not Scale tier enabled, the request will
-                be processed using the default service tier with a lower uptime SLA and no
-                latency guarantee.
-              - If set to 'default', the request will be processed using the default service
-                tier with a lower uptime SLA and no latency guarantee.
-              - If set to 'flex', the request will be processed with the Flex Processing
-                service tier.
-                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
 
           stop: Not supported with latest reasoning models `o3` and `o4-mini`.
 
@@ -1933,6 +2357,8 @@ class AsyncCompletions(AsyncAPIResource):
               our [model distillation](https://platform.openai.com/docs/guides/distillation)
               or [evals](https://platform.openai.com/docs/guides/evals) products.
 
+              Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -2005,7 +2431,7 @@ class AsyncCompletions(AsyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
@@ -2253,11 +2679,126 @@ class AsyncCompletions(AsyncAPIResource):
             cast_to=ChatCompletionDeleted,
         )
 
+    def stream(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncChatCompletionStreamManager[ResponseFormatT]:
+        """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
+        and automatic accumulation of each delta.
+
+        This also supports all of the parsing utilities that `.parse()` does.
+
+        Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
+
+        ```py
+        async with client.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[...],
+        ) as stream:
+            async for event in stream:
+                if event.type == "content.delta":
+                    print(event.delta, flush=True, end="")
+        ```
+
+        When the context manager is entered, an `AsyncChatCompletionStream` instance is returned which, like `.create(stream=True)` is an async iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
+
+        When the context manager exits, the response will be closed, however the `stream` instance is still available outside
+        the context manager.
+        """
+        _validate_input_tools(tools)
+
+        extra_headers = {
+            "X-Stainless-Helper-Method": "chat.completions.stream",
+            **(extra_headers or {}),
+        }
+
+        api_request = self.create(
+            messages=messages,
+            model=model,
+            audio=audio,
+            stream=True,
+            response_format=_type_to_response_format(response_format),
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            metadata=metadata,
+            modalities=modalities,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            prediction=prediction,
+            presence_penalty=presence_penalty,
+            reasoning_effort=reasoning_effort,
+            seed=seed,
+            service_tier=service_tier,
+            stop=stop,
+            store=store,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            web_search_options=web_search_options,
+        )
+        return AsyncChatCompletionStreamManager(
+            api_request,
+            response_format=response_format,
+            input_tools=tools,
+        )
+
 
 class CompletionsWithRawResponse:
     def __init__(self, completions: Completions) -> None:
         self._completions = completions
 
+        self.parse = _legacy_response.to_raw_response_wrapper(
+            completions.parse,
+        )
         self.create = _legacy_response.to_raw_response_wrapper(
             completions.create,
         )
@@ -2283,6 +2824,9 @@ class AsyncCompletionsWithRawResponse:
     def __init__(self, completions: AsyncCompletions) -> None:
         self._completions = completions
 
+        self.parse = _legacy_response.async_to_raw_response_wrapper(
+            completions.parse,
+        )
         self.create = _legacy_response.async_to_raw_response_wrapper(
             completions.create,
         )
@@ -2308,6 +2852,9 @@ class CompletionsWithStreamingResponse:
     def __init__(self, completions: Completions) -> None:
         self._completions = completions
 
+        self.parse = to_streamed_response_wrapper(
+            completions.parse,
+        )
         self.create = to_streamed_response_wrapper(
             completions.create,
         )
@@ -2333,6 +2880,9 @@ class AsyncCompletionsWithStreamingResponse:
     def __init__(self, completions: AsyncCompletions) -> None:
         self._completions = completions
 
+        self.parse = async_to_streamed_response_wrapper(
+            completions.parse,
+        )
         self.create = async_to_streamed_response_wrapper(
             completions.create,
         )
@@ -2357,5 +2907,5 @@ class AsyncCompletionsWithStreamingResponse:
 def validate_response_format(response_format: object) -> None:
     if inspect.isclass(response_format) and issubclass(response_format, pydantic.BaseModel):
         raise TypeError(
-            "You tried to pass a `BaseModel` class to `chat.completions.create()`; You must use `beta.chat.completions.parse()` instead"
+            "You tried to pass a `BaseModel` class to `chat.completions.create()`; You must use `chat.completions.parse()` instead"
         )

@@ -81,19 +81,21 @@ class Responses(SyncAPIResource):
         input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         model: ResponsesModel | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
         tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -125,18 +127,19 @@ class Responses(SyncAPIResource):
           include: Specify additional output data to include in the model response. Currently
               supported values are:
 
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
               - `file_search_call.results`: Include the search results of the file search tool
                 call.
               - `message.input_image.image_url`: Include image urls from the input message.
-              - `computer_call_output.output.image_url`: Include image urls from the computer
-                call output.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
               - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
                 tokens in reasoning item outputs. This enables reasoning items to be used in
                 multi-turn conversations when using the Responses API statelessly (like when
                 the `store` parameter is set to `false`, or when an organization is enrolled
                 in the zero data retention program).
-              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
-                in code interpreter tool call items.
 
           input: Text, image, or file inputs to the model, used to generate a response.
 
@@ -158,6 +161,11 @@ class Responses(SyncAPIResource):
               including visible output tokens and
               [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
 
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
               querying for objects via API or the dashboard.
@@ -185,23 +193,23 @@ class Responses(SyncAPIResource):
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
-          service_tier: Specifies the latency tier to use for processing the request. This parameter is
-              relevant for customers subscribed to the scale tier service:
-
-              - If set to 'auto', and the Project is Scale tier enabled, the system will
-                utilize scale tier credits until they are exhausted.
-              - If set to 'auto', and the Project is not Scale tier enabled, the request will
-                be processed using the default service tier with a lower uptime SLA and no
-                latency guarantee.
-              - If set to 'default', the request will be processed using the default service
-                tier with a lower uptime SLA and no latency guarantee.
-              - If set to 'flex', the request will be processed with the Flex Processing
-                service tier.
-                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
 
           store: Whether to store the generated model response for later retrieval via API.
 
@@ -242,6 +250,9 @@ class Responses(SyncAPIResource):
                 the model to call your own code. Learn more about
                 [function calling](https://platform.openai.com/docs/guides/function-calling).
 
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
@@ -280,18 +291,20 @@ class Responses(SyncAPIResource):
         input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         model: ResponsesModel | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
         tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -330,18 +343,19 @@ class Responses(SyncAPIResource):
           include: Specify additional output data to include in the model response. Currently
               supported values are:
 
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
               - `file_search_call.results`: Include the search results of the file search tool
                 call.
               - `message.input_image.image_url`: Include image urls from the input message.
-              - `computer_call_output.output.image_url`: Include image urls from the computer
-                call output.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
               - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
                 tokens in reasoning item outputs. This enables reasoning items to be used in
                 multi-turn conversations when using the Responses API statelessly (like when
                 the `store` parameter is set to `false`, or when an organization is enrolled
                 in the zero data retention program).
-              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
-                in code interpreter tool call items.
 
           input: Text, image, or file inputs to the model, used to generate a response.
 
@@ -363,6 +377,11 @@ class Responses(SyncAPIResource):
               including visible output tokens and
               [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
 
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
               querying for objects via API or the dashboard.
@@ -390,23 +409,23 @@ class Responses(SyncAPIResource):
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
-          service_tier: Specifies the latency tier to use for processing the request. This parameter is
-              relevant for customers subscribed to the scale tier service:
-
-              - If set to 'auto', and the Project is Scale tier enabled, the system will
-                utilize scale tier credits until they are exhausted.
-              - If set to 'auto', and the Project is not Scale tier enabled, the request will
-                be processed using the default service tier with a lower uptime SLA and no
-                latency guarantee.
-              - If set to 'default', the request will be processed using the default service
-                tier with a lower uptime SLA and no latency guarantee.
-              - If set to 'flex', the request will be processed with the Flex Processing
-                service tier.
-                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
 
           store: Whether to store the generated model response for later retrieval via API.
 
@@ -440,6 +459,9 @@ class Responses(SyncAPIResource):
                 the model to call your own code. Learn more about
                 [function calling](https://platform.openai.com/docs/guides/function-calling).
 
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
@@ -478,18 +500,20 @@ class Responses(SyncAPIResource):
         input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         model: ResponsesModel | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
         tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -528,18 +552,19 @@ class Responses(SyncAPIResource):
           include: Specify additional output data to include in the model response. Currently
               supported values are:
 
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
               - `file_search_call.results`: Include the search results of the file search tool
                 call.
               - `message.input_image.image_url`: Include image urls from the input message.
-              - `computer_call_output.output.image_url`: Include image urls from the computer
-                call output.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
               - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
                 tokens in reasoning item outputs. This enables reasoning items to be used in
                 multi-turn conversations when using the Responses API statelessly (like when
                 the `store` parameter is set to `false`, or when an organization is enrolled
                 in the zero data retention program).
-              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
-                in code interpreter tool call items.
 
           input: Text, image, or file inputs to the model, used to generate a response.
 
@@ -561,6 +586,11 @@ class Responses(SyncAPIResource):
               including visible output tokens and
               [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
 
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
               querying for objects via API or the dashboard.
@@ -588,23 +618,23 @@ class Responses(SyncAPIResource):
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
-          service_tier: Specifies the latency tier to use for processing the request. This parameter is
-              relevant for customers subscribed to the scale tier service:
-
-              - If set to 'auto', and the Project is Scale tier enabled, the system will
-                utilize scale tier credits until they are exhausted.
-              - If set to 'auto', and the Project is not Scale tier enabled, the request will
-                be processed using the default service tier with a lower uptime SLA and no
-                latency guarantee.
-              - If set to 'default', the request will be processed using the default service
-                tier with a lower uptime SLA and no latency guarantee.
-              - If set to 'flex', the request will be processed with the Flex Processing
-                service tier.
-                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
 
           store: Whether to store the generated model response for later retrieval via API.
 
@@ -638,6 +668,9 @@ class Responses(SyncAPIResource):
                 the model to call your own code. Learn more about
                 [function calling](https://platform.openai.com/docs/guides/function-calling).
 
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
@@ -674,19 +707,21 @@ class Responses(SyncAPIResource):
         input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         model: ResponsesModel | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
         tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -706,6 +741,7 @@ class Responses(SyncAPIResource):
                     "input": input,
                     "instructions": instructions,
                     "max_output_tokens": max_output_tokens,
+                    "max_tool_calls": max_tool_calls,
                     "metadata": metadata,
                     "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
@@ -719,6 +755,7 @@ class Responses(SyncAPIResource):
                     "text": text,
                     "tool_choice": tool_choice,
                     "tools": tools,
+                    "top_logprobs": top_logprobs,
                     "top_p": top_p,
                     "truncation": truncation,
                     "user": user,
@@ -1299,19 +1336,21 @@ class AsyncResponses(AsyncAPIResource):
         input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         model: ResponsesModel | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
         tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -1343,18 +1382,19 @@ class AsyncResponses(AsyncAPIResource):
           include: Specify additional output data to include in the model response. Currently
               supported values are:
 
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
               - `file_search_call.results`: Include the search results of the file search tool
                 call.
               - `message.input_image.image_url`: Include image urls from the input message.
-              - `computer_call_output.output.image_url`: Include image urls from the computer
-                call output.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
               - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
                 tokens in reasoning item outputs. This enables reasoning items to be used in
                 multi-turn conversations when using the Responses API statelessly (like when
                 the `store` parameter is set to `false`, or when an organization is enrolled
                 in the zero data retention program).
-              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
-                in code interpreter tool call items.
 
           input: Text, image, or file inputs to the model, used to generate a response.
 
@@ -1376,6 +1416,11 @@ class AsyncResponses(AsyncAPIResource):
               including visible output tokens and
               [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
 
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
               querying for objects via API or the dashboard.
@@ -1403,23 +1448,23 @@ class AsyncResponses(AsyncAPIResource):
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
-          service_tier: Specifies the latency tier to use for processing the request. This parameter is
-              relevant for customers subscribed to the scale tier service:
-
-              - If set to 'auto', and the Project is Scale tier enabled, the system will
-                utilize scale tier credits until they are exhausted.
-              - If set to 'auto', and the Project is not Scale tier enabled, the request will
-                be processed using the default service tier with a lower uptime SLA and no
-                latency guarantee.
-              - If set to 'default', the request will be processed using the default service
-                tier with a lower uptime SLA and no latency guarantee.
-              - If set to 'flex', the request will be processed with the Flex Processing
-                service tier.
-                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
 
           store: Whether to store the generated model response for later retrieval via API.
 
@@ -1460,6 +1505,9 @@ class AsyncResponses(AsyncAPIResource):
                 the model to call your own code. Learn more about
                 [function calling](https://platform.openai.com/docs/guides/function-calling).
 
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
@@ -1498,18 +1546,20 @@ class AsyncResponses(AsyncAPIResource):
         input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         model: ResponsesModel | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
         tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -1548,18 +1598,19 @@ class AsyncResponses(AsyncAPIResource):
           include: Specify additional output data to include in the model response. Currently
               supported values are:
 
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
               - `file_search_call.results`: Include the search results of the file search tool
                 call.
               - `message.input_image.image_url`: Include image urls from the input message.
-              - `computer_call_output.output.image_url`: Include image urls from the computer
-                call output.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
               - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
                 tokens in reasoning item outputs. This enables reasoning items to be used in
                 multi-turn conversations when using the Responses API statelessly (like when
                 the `store` parameter is set to `false`, or when an organization is enrolled
                 in the zero data retention program).
-              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
-                in code interpreter tool call items.
 
           input: Text, image, or file inputs to the model, used to generate a response.
 
@@ -1581,6 +1632,11 @@ class AsyncResponses(AsyncAPIResource):
               including visible output tokens and
               [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
 
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
               querying for objects via API or the dashboard.
@@ -1608,23 +1664,23 @@ class AsyncResponses(AsyncAPIResource):
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
-          service_tier: Specifies the latency tier to use for processing the request. This parameter is
-              relevant for customers subscribed to the scale tier service:
-
-              - If set to 'auto', and the Project is Scale tier enabled, the system will
-                utilize scale tier credits until they are exhausted.
-              - If set to 'auto', and the Project is not Scale tier enabled, the request will
-                be processed using the default service tier with a lower uptime SLA and no
-                latency guarantee.
-              - If set to 'default', the request will be processed using the default service
-                tier with a lower uptime SLA and no latency guarantee.
-              - If set to 'flex', the request will be processed with the Flex Processing
-                service tier.
-                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
 
           store: Whether to store the generated model response for later retrieval via API.
 
@@ -1658,6 +1714,9 @@ class AsyncResponses(AsyncAPIResource):
                 the model to call your own code. Learn more about
                 [function calling](https://platform.openai.com/docs/guides/function-calling).
 
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
@@ -1696,18 +1755,20 @@ class AsyncResponses(AsyncAPIResource):
         input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         model: ResponsesModel | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
         tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -1746,18 +1807,19 @@ class AsyncResponses(AsyncAPIResource):
           include: Specify additional output data to include in the model response. Currently
               supported values are:
 
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
               - `file_search_call.results`: Include the search results of the file search tool
                 call.
               - `message.input_image.image_url`: Include image urls from the input message.
-              - `computer_call_output.output.image_url`: Include image urls from the computer
-                call output.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
               - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
                 tokens in reasoning item outputs. This enables reasoning items to be used in
                 multi-turn conversations when using the Responses API statelessly (like when
                 the `store` parameter is set to `false`, or when an organization is enrolled
                 in the zero data retention program).
-              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
-                in code interpreter tool call items.
 
           input: Text, image, or file inputs to the model, used to generate a response.
 
@@ -1779,6 +1841,11 @@ class AsyncResponses(AsyncAPIResource):
               including visible output tokens and
               [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
 
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
               querying for objects via API or the dashboard.
@@ -1806,23 +1873,23 @@ class AsyncResponses(AsyncAPIResource):
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
-          service_tier: Specifies the latency tier to use for processing the request. This parameter is
-              relevant for customers subscribed to the scale tier service:
-
-              - If set to 'auto', and the Project is Scale tier enabled, the system will
-                utilize scale tier credits until they are exhausted.
-              - If set to 'auto', and the Project is not Scale tier enabled, the request will
-                be processed using the default service tier with a lower uptime SLA and no
-                latency guarantee.
-              - If set to 'default', the request will be processed using the default service
-                tier with a lower uptime SLA and no latency guarantee.
-              - If set to 'flex', the request will be processed with the Flex Processing
-                service tier.
-                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
 
           store: Whether to store the generated model response for later retrieval via API.
 
@@ -1856,6 +1923,9 @@ class AsyncResponses(AsyncAPIResource):
                 the model to call your own code. Learn more about
                 [function calling](https://platform.openai.com/docs/guides/function-calling).
 
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
@@ -1892,19 +1962,21 @@ class AsyncResponses(AsyncAPIResource):
         input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         model: ResponsesModel | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
         tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -1924,6 +1996,7 @@ class AsyncResponses(AsyncAPIResource):
                     "input": input,
                     "instructions": instructions,
                     "max_output_tokens": max_output_tokens,
+                    "max_tool_calls": max_tool_calls,
                     "metadata": metadata,
                     "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
@@ -1937,6 +2010,7 @@ class AsyncResponses(AsyncAPIResource):
                     "text": text,
                     "tool_choice": tool_choice,
                     "tools": tools,
+                    "top_logprobs": top_logprobs,
                     "top_p": top_p,
                     "truncation": truncation,
                     "user": user,

@@ -0,0 +1,210 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import hmac
+import json
+import time
+import base64
+import hashlib
+from typing import cast
+
+from .._types import HeadersLike
+from .._utils import get_required_header
+from .._models import construct_type
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._exceptions import InvalidWebhookSignatureError
+from ..types.webhooks.unwrap_webhook_event import UnwrapWebhookEvent
+
+__all__ = ["Webhooks", "AsyncWebhooks"]
+
+
+class Webhooks(SyncAPIResource):
+    def unwrap(
+        self,
+        payload: str | bytes,
+        headers: HeadersLike,
+        *,
+        secret: str | None = None,
+    ) -> UnwrapWebhookEvent:
+        """Validates that the given payload was sent by OpenAI and parses the payload."""
+        if secret is None:
+            secret = self._client.webhook_secret
+
+        self.verify_signature(payload=payload, headers=headers, secret=secret)
+
+        return cast(
+            UnwrapWebhookEvent,
+            construct_type(
+                type_=UnwrapWebhookEvent,
+                value=json.loads(payload),
+            ),
+        )
+
+    def verify_signature(
+        self,
+        payload: str | bytes,
+        headers: HeadersLike,
+        *,
+        secret: str | None = None,
+        tolerance: int = 300,
+    ) -> None:
+        """Validates whether or not the webhook payload was sent by OpenAI.
+
+        Args:
+            payload: The webhook payload
+            headers: The webhook headers
+            secret: The webhook secret (optional, will use client secret if not provided)
+            tolerance: Maximum age of the webhook in seconds (default: 300 = 5 minutes)
+        """
+        if secret is None:
+            secret = self._client.webhook_secret
+
+        if secret is None:
+            raise ValueError(
+                "The webhook secret must either be set using the env var, OPENAI_WEBHOOK_SECRET, "
+                "on the client class, OpenAI(webhook_secret='123'), or passed to this function"
+            )
+
+        signature_header = get_required_header(headers, "webhook-signature")
+        timestamp = get_required_header(headers, "webhook-timestamp")
+        webhook_id = get_required_header(headers, "webhook-id")
+
+        # Validate timestamp to prevent replay attacks
+        try:
+            timestamp_seconds = int(timestamp)
+        except ValueError:
+            raise InvalidWebhookSignatureError("Invalid webhook timestamp format") from None
+
+        now = int(time.time())
+
+        if now - timestamp_seconds > tolerance:
+            raise InvalidWebhookSignatureError("Webhook timestamp is too old") from None
+
+        if timestamp_seconds > now + tolerance:
+            raise InvalidWebhookSignatureError("Webhook timestamp is too new") from None
+
+        # Extract signatures from v1,<base64> format
+        # The signature header can have multiple values, separated by spaces.
+        # Each value is in the format v1,<base64>. We should accept if any match.
+        signatures: list[str] = []
+        for part in signature_header.split():
+            if part.startswith("v1,"):
+                signatures.append(part[3:])
+            else:
+                signatures.append(part)
+
+        # Decode the secret if it starts with whsec_
+        if secret.startswith("whsec_"):
+            decoded_secret = base64.b64decode(secret[6:])
+        else:
+            decoded_secret = secret.encode()
+
+        body = payload.decode("utf-8") if isinstance(payload, bytes) else payload
+
+        # Prepare the signed payload (OpenAI uses webhookId.timestamp.payload format)
+        signed_payload = f"{webhook_id}.{timestamp}.{body}"
+        expected_signature = base64.b64encode(
+            hmac.new(decoded_secret, signed_payload.encode(), hashlib.sha256).digest()
+        ).decode()
+
+        # Accept if any signature matches
+        if not any(hmac.compare_digest(expected_signature, sig) for sig in signatures):
+            raise InvalidWebhookSignatureError(
+                "The given webhook signature does not match the expected signature"
+            ) from None
+
+
+class AsyncWebhooks(AsyncAPIResource):
+    def unwrap(
+        self,
+        payload: str | bytes,
+        headers: HeadersLike,
+        *,
+        secret: str | None = None,
+    ) -> UnwrapWebhookEvent:
+        """Validates that the given payload was sent by OpenAI and parses the payload."""
+        if secret is None:
+            secret = self._client.webhook_secret
+
+        self.verify_signature(payload=payload, headers=headers, secret=secret)
+
+        body = payload.decode("utf-8") if isinstance(payload, bytes) else payload
+        return cast(
+            UnwrapWebhookEvent,
+            construct_type(
+                type_=UnwrapWebhookEvent,
+                value=json.loads(body),
+            ),
+        )
+
+    def verify_signature(
+        self,
+        payload: str | bytes,
+        headers: HeadersLike,
+        *,
+        secret: str | None = None,
+        tolerance: int = 300,
+    ) -> None:
+        """Validates whether or not the webhook payload was sent by OpenAI.
+
+        Args:
+            payload: The webhook payload
+            headers: The webhook headers
+            secret: The webhook secret (optional, will use client secret if not provided)
+            tolerance: Maximum age of the webhook in seconds (default: 300 = 5 minutes)
+        """
+        if secret is None:
+            secret = self._client.webhook_secret
+
+        if secret is None:
+            raise ValueError(
+                "The webhook secret must either be set using the env var, OPENAI_WEBHOOK_SECRET, "
+                "on the client class, OpenAI(webhook_secret='123'), or passed to this function"
+            ) from None
+
+        signature_header = get_required_header(headers, "webhook-signature")
+        timestamp = get_required_header(headers, "webhook-timestamp")
+        webhook_id = get_required_header(headers, "webhook-id")
+
+        # Validate timestamp to prevent replay attacks
+        try:
+            timestamp_seconds = int(timestamp)
+        except ValueError:
+            raise InvalidWebhookSignatureError("Invalid webhook timestamp format") from None
+
+        now = int(time.time())
+
+        if now - timestamp_seconds > tolerance:
+            raise InvalidWebhookSignatureError("Webhook timestamp is too old") from None
+
+        if timestamp_seconds > now + tolerance:
+            raise InvalidWebhookSignatureError("Webhook timestamp is too new") from None
+
+        # Extract signatures from v1,<base64> format
+        # The signature header can have multiple values, separated by spaces.
+        # Each value is in the format v1,<base64>. We should accept if any match.
+        signatures: list[str] = []
+        for part in signature_header.split():
+            if part.startswith("v1,"):
+                signatures.append(part[3:])
+            else:
+                signatures.append(part)
+
+        # Decode the secret if it starts with whsec_
+        if secret.startswith("whsec_"):
+            decoded_secret = base64.b64decode(secret[6:])
+        else:
+            decoded_secret = secret.encode()
+
+        body = payload.decode("utf-8") if isinstance(payload, bytes) else payload
+
+        # Prepare the signed payload (OpenAI uses webhookId.timestamp.payload format)
+        signed_payload = f"{webhook_id}.{timestamp}.{body}"
+        expected_signature = base64.b64encode(
+            hmac.new(decoded_secret, signed_payload.encode(), hashlib.sha256).digest()
+        ).decode()
+
+        # Accept if any signature matches
+        if not any(hmac.compare_digest(expected_signature, sig) for sig in signatures):
+            raise InvalidWebhookSignatureError("The given webhook signature does not match the expected signature")

@@ -59,25 +59,24 @@ class ChatCompletion(BaseModel):
     object: Literal["chat.completion"]
     """The object type, which is always `chat.completion`."""
 
-    service_tier: Optional[Literal["auto", "default", "flex", "scale"]] = None
-    """Specifies the latency tier to use for processing the request.
-
-    This parameter is relevant for customers subscribed to the scale tier service:
-
-    - If set to 'auto', and the Project is Scale tier enabled, the system will
-      utilize scale tier credits until they are exhausted.
-    - If set to 'auto', and the Project is not Scale tier enabled, the request will
-      be processed using the default service tier with a lower uptime SLA and no
-      latency guarantee.
-    - If set to 'default', the request will be processed using the default service
-      tier with a lower uptime SLA and no latency guarantee.
-    - If set to 'flex', the request will be processed with the Flex Processing
-      service tier.
-      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = None
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the requset will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      'priority', then the request will be processed with the corresponding service
+      tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+      Priority processing.
     - When not set, the default behavior is 'auto'.
 
-    When this parameter is set, the response body will include the `service_tier`
-    utilized.
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
     """
 
     system_fingerprint: Optional[str] = None

@@ -128,25 +128,24 @@ class ChatCompletionChunk(BaseModel):
     object: Literal["chat.completion.chunk"]
     """The object type, which is always `chat.completion.chunk`."""
 
-    service_tier: Optional[Literal["auto", "default", "flex", "scale"]] = None
-    """Specifies the latency tier to use for processing the request.
-
-    This parameter is relevant for customers subscribed to the scale tier service:
-
-    - If set to 'auto', and the Project is Scale tier enabled, the system will
-      utilize scale tier credits until they are exhausted.
-    - If set to 'auto', and the Project is not Scale tier enabled, the request will
-      be processed using the default service tier with a lower uptime SLA and no
-      latency guarantee.
-    - If set to 'default', the request will be processed using the default service
-      tier with a lower uptime SLA and no latency guarantee.
-    - If set to 'flex', the request will be processed with the Flex Processing
-      service tier.
-      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = None
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the requset will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      'priority', then the request will be processed with the corresponding service
+      tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+      Priority processing.
     - When not set, the default behavior is 'auto'.
 
-    When this parameter is set, the response body will include the `service_tier`
-    utilized.
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
     """
 
     system_fingerprint: Optional[str] = None

@@ -208,25 +208,24 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     in the backend.
     """
 
-    service_tier: Optional[Literal["auto", "default", "flex", "scale"]]
-    """Specifies the latency tier to use for processing the request.
-
-    This parameter is relevant for customers subscribed to the scale tier service:
-
-    - If set to 'auto', and the Project is Scale tier enabled, the system will
-      utilize scale tier credits until they are exhausted.
-    - If set to 'auto', and the Project is not Scale tier enabled, the request will
-      be processed using the default service tier with a lower uptime SLA and no
-      latency guarantee.
-    - If set to 'default', the request will be processed using the default service
-      tier with a lower uptime SLA and no latency guarantee.
-    - If set to 'flex', the request will be processed with the Flex Processing
-      service tier.
-      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]]
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the requset will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      'priority', then the request will be processed with the corresponding service
+      tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+      Priority processing.
     - When not set, the default behavior is 'auto'.
 
-    When this parameter is set, the response body will include the `service_tier`
-    utilized.
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
     """
 
     stop: Union[Optional[str], List[str], None]
@@ -241,6 +240,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     Whether or not to store the output of this chat completion request for use in
     our [model distillation](https://platform.openai.com/docs/guides/distillation)
     or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+    Supports text and image inputs. Note: image inputs over 10MB will be dropped.
     """
 
     stream_options: Optional[ChatCompletionStreamOptionsParam]

@@ -20,6 +20,7 @@ from .parsed_response import (
 )
 from .response_prompt import ResponsePrompt as ResponsePrompt
 from .response_status import ResponseStatus as ResponseStatus
+from .tool_choice_mcp import ToolChoiceMcp as ToolChoiceMcp
 from .web_search_tool import WebSearchTool as WebSearchTool
 from .file_search_tool import FileSearchTool as FileSearchTool
 from .tool_choice_types import ToolChoiceTypes as ToolChoiceTypes
@@ -43,6 +44,7 @@ from .response_failed_event import ResponseFailedEvent as ResponseFailedEvent
 from .response_prompt_param import ResponsePromptParam as ResponsePromptParam
 from .response_queued_event import ResponseQueuedEvent as ResponseQueuedEvent
 from .response_stream_event import ResponseStreamEvent as ResponseStreamEvent
+from .tool_choice_mcp_param import ToolChoiceMcpParam as ToolChoiceMcpParam
 from .web_search_tool_param import WebSearchToolParam as WebSearchToolParam
 from .file_search_tool_param import FileSearchToolParam as FileSearchToolParam
 from .input_item_list_params import InputItemListParams as InputItemListParams

@@ -9,6 +9,7 @@ from .response_error import ResponseError
 from .response_usage import ResponseUsage
 from .response_prompt import ResponsePrompt
 from .response_status import ResponseStatus
+from .tool_choice_mcp import ToolChoiceMcp
 from ..shared.metadata import Metadata
 from ..shared.reasoning import Reasoning
 from .tool_choice_types import ToolChoiceTypes
@@ -27,7 +28,7 @@ class IncompleteDetails(BaseModel):
     """The reason why the response is incomplete."""
 
 
-ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceTypes, ToolChoiceFunction]
+ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceTypes, ToolChoiceFunction, ToolChoiceMcp]
 
 
 class Response(BaseModel):
@@ -141,6 +142,14 @@ class Response(BaseModel):
     [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
     """
 
+    max_tool_calls: Optional[int] = None
+    """
+    The maximum number of total calls to built-in tools that can be processed in a
+    response. This maximum number applies across all built-in tool calls, not per
+    individual tool. Any further attempts to call a tool by the model will be
+    ignored.
+    """
+
     previous_response_id: Optional[str] = None
     """The unique ID of the previous response to the model.
 
@@ -161,25 +170,24 @@ class Response(BaseModel):
     [reasoning models](https://platform.openai.com/docs/guides/reasoning).
     """
 
-    service_tier: Optional[Literal["auto", "default", "flex", "scale"]] = None
-    """Specifies the latency tier to use for processing the request.
-
-    This parameter is relevant for customers subscribed to the scale tier service:
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = None
+    """Specifies the processing type used for serving the request.
 
-    - If set to 'auto', and the Project is Scale tier enabled, the system will
-      utilize scale tier credits until they are exhausted.
-    - If set to 'auto', and the Project is not Scale tier enabled, the request will
-      be processed using the default service tier with a lower uptime SLA and no
-      latency guarantee.
-    - If set to 'default', the request will be processed using the default service
-      tier with a lower uptime SLA and no latency guarantee.
-    - If set to 'flex', the request will be processed with the Flex Processing
-      service tier.
-      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the requset will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      'priority', then the request will be processed with the corresponding service
+      tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+      Priority processing.
     - When not set, the default behavior is 'auto'.
 
-    When this parameter is set, the response body will include the `service_tier`
-    utilized.
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
     """
 
     status: Optional[ResponseStatus] = None
@@ -198,6 +206,12 @@ class Response(BaseModel):
     - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
     """
 
+    top_logprobs: Optional[int] = None
+    """
+    An integer between 0 and 20 specifying the number of most likely tokens to
+    return at each token position, each with an associated log probability.
+    """
+
     truncation: Optional[Literal["auto", "disabled"]] = None
     """The truncation strategy to use for the model response.

@@ -10,6 +10,7 @@ from .response_includable import ResponseIncludable
 from .tool_choice_options import ToolChoiceOptions
 from .response_input_param import ResponseInputParam
 from .response_prompt_param import ResponsePromptParam
+from .tool_choice_mcp_param import ToolChoiceMcpParam
 from ..shared_params.metadata import Metadata
 from .tool_choice_types_param import ToolChoiceTypesParam
 from ..shared_params.reasoning import Reasoning
@@ -37,18 +38,19 @@ class ResponseCreateParamsBase(TypedDict, total=False):
 
     Currently supported values are:
 
+    - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+      in code interpreter tool call items.
+    - `computer_call_output.output.image_url`: Include image urls from the computer
+      call output.
     - `file_search_call.results`: Include the search results of the file search tool
       call.
     - `message.input_image.image_url`: Include image urls from the input message.
-    - `computer_call_output.output.image_url`: Include image urls from the computer
-      call output.
+    - `message.output_text.logprobs`: Include logprobs with assistant messages.
     - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
       tokens in reasoning item outputs. This enables reasoning items to be used in
       multi-turn conversations when using the Responses API statelessly (like when
       the `store` parameter is set to `false`, or when an organization is enrolled
       in the zero data retention program).
-    - `code_interpreter_call.outputs`: Includes the outputs of python code execution
-      in code interpreter tool call items.
     """
 
     input: Union[str, ResponseInputParam]
@@ -78,6 +80,14 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
     """
 
+    max_tool_calls: Optional[int]
+    """
+    The maximum number of total calls to built-in tools that can be processed in a
+    response. This maximum number applies across all built-in tool calls, not per
+    individual tool. Any further attempts to call a tool by the model will be
+    ignored.
+    """
+
     metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
@@ -120,25 +130,24 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     [reasoning models](https://platform.openai.com/docs/guides/reasoning).
     """
 
-    service_tier: Optional[Literal["auto", "default", "flex", "scale"]]
-    """Specifies the latency tier to use for processing the request.
-
-    This parameter is relevant for customers subscribed to the scale tier service:
-
-    - If set to 'auto', and the Project is Scale tier enabled, the system will
-      utilize scale tier credits until they are exhausted.
-    - If set to 'auto', and the Project is not Scale tier enabled, the request will
-      be processed using the default service tier with a lower uptime SLA and no
-      latency guarantee.
-    - If set to 'default', the request will be processed using the default service
-      tier with a lower uptime SLA and no latency guarantee.
-    - If set to 'flex', the request will be processed with the Flex Processing
-      service tier.
-      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]]
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the requset will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      'priority', then the request will be processed with the corresponding service
+      tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+      Priority processing.
     - When not set, the default behavior is 'auto'.
 
-    When this parameter is set, the response body will include the `service_tier`
-    utilized.
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
     """
 
     store: Optional[bool]
@@ -186,6 +195,12 @@ class ResponseCreateParamsBase(TypedDict, total=False):
       [function calling](https://platform.openai.com/docs/guides/function-calling).
     """
 
+    top_logprobs: Optional[int]
+    """
+    An integer between 0 and 20 specifying the number of most likely tokens to
+    return at each token position, each with an associated log probability.
+    """
+
     top_p: Optional[float]
     """
     An alternative to sampling with temperature, called nucleus sampling, where the
@@ -214,7 +229,7 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     """
 
 
-ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceTypesParam, ToolChoiceFunctionParam]
+ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceTypesParam, ToolChoiceFunctionParam, ToolChoiceMcpParam]
 
 
 class ResponseCreateParamsNonStreaming(ResponseCreateParamsBase, total=False):

@@ -1,16 +1,57 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing_extensions import Literal
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
 
+from ..._utils import PropertyInfo
 from ..._models import BaseModel
 
-__all__ = ["ResponseFunctionWebSearch"]
+__all__ = ["ResponseFunctionWebSearch", "Action", "ActionSearch", "ActionOpenPage", "ActionFind"]
+
+
+class ActionSearch(BaseModel):
+    query: str
+    """The search query."""
+
+    type: Literal["search"]
+    """The action type."""
+
+    domains: Optional[List[str]] = None
+    """Domains to restrict the search or domains where results were found."""
+
+
+class ActionOpenPage(BaseModel):
+    type: Literal["open_page"]
+    """The action type."""
+
+    url: str
+    """The URL opened by the model."""
+
+
+class ActionFind(BaseModel):
+    pattern: str
+    """The pattern or text to search for within the page."""
+
+    type: Literal["find"]
+    """The action type."""
+
+    url: str
+    """The URL of the page searched for the pattern."""
+
+
+Action: TypeAlias = Annotated[Union[ActionSearch, ActionOpenPage, ActionFind], PropertyInfo(discriminator="type")]
 
 
 class ResponseFunctionWebSearch(BaseModel):
     id: str
     """The unique ID of the web search tool call."""
 
+    action: Action
+    """
+    An object describing the specific action taken in this web search call. Includes
+    details on how the model used the web (search, open_page, find).
+    """
+
     status: Literal["in_progress", "searching", "completed", "failed"]
     """The status of the web search tool call."""

@@ -2,15 +2,55 @@
 
 from __future__ import annotations
 
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-__all__ = ["ResponseFunctionWebSearchParam"]
+__all__ = ["ResponseFunctionWebSearchParam", "Action", "ActionSearch", "ActionOpenPage", "ActionFind"]
+
+
+class ActionSearch(TypedDict, total=False):
+    query: Required[str]
+    """The search query."""
+
+    type: Required[Literal["search"]]
+    """The action type."""
+
+    domains: List[str]
+    """Domains to restrict the search or domains where results were found."""
+
+
+class ActionOpenPage(TypedDict, total=False):
+    type: Required[Literal["open_page"]]
+    """The action type."""
+
+    url: Required[str]
+    """The URL opened by the model."""
+
+
+class ActionFind(TypedDict, total=False):
+    pattern: Required[str]
+    """The pattern or text to search for within the page."""
+
+    type: Required[Literal["find"]]
+    """The action type."""
+
+    url: Required[str]
+    """The URL of the page searched for the pattern."""
+
+
+Action: TypeAlias = Union[ActionSearch, ActionOpenPage, ActionFind]
 
 
 class ResponseFunctionWebSearchParam(TypedDict, total=False):
     id: Required[str]
     """The unique ID of the web search tool call."""
 
+    action: Required[Action]
+    """
+    An object describing the specific action taken in this web search call. Includes
+    details on how the model used the web (search, open_page, find).
+    """
+
     status: Required[Literal["in_progress", "searching", "completed", "failed"]]
     """The status of the web search tool call."""

@@ -5,9 +5,10 @@ from typing_extensions import Literal, TypeAlias
 __all__ = ["ResponseIncludable"]
 
 ResponseIncludable: TypeAlias = Literal[
+    "code_interpreter_call.outputs",
+    "computer_call_output.output.image_url",
     "file_search_call.results",
     "message.input_image.image_url",
-    "computer_call_output.output.image_url",
+    "message.output_text.logprobs",
     "reasoning.encrypted_content",
-    "code_interpreter_call.outputs",
 ]

@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceMcp"]
+
+
+class ToolChoiceMcp(BaseModel):
+    server_label: str
+    """The label of the MCP server to use."""
+
+    type: Literal["mcp"]
+    """For MCP tools, the type is always `mcp`."""
+
+    name: Optional[str] = None
+    """The name of the tool to call on the server."""

@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceMcpParam"]
+
+
+class ToolChoiceMcpParam(TypedDict, total=False):
+    server_label: Required[str]
+    """The label of the MCP server to use."""
+
+    type: Required[Literal["mcp"]]
+    """For MCP tools, the type is always `mcp`."""
+
+    name: Optional[str]
+    """The name of the tool to call on the server."""

@@ -15,7 +15,6 @@ class ToolChoiceTypes(BaseModel):
         "web_search_preview_2025_03_11",
         "image_generation",
         "code_interpreter",
-        "mcp",
     ]
     """The type of hosted tool the model should to use.
 
@@ -28,6 +27,5 @@ class ToolChoiceTypes(BaseModel):
     - `web_search_preview`
     - `computer_use_preview`
     - `code_interpreter`
-    - `mcp`
     - `image_generation`
     """

@@ -16,7 +16,6 @@ class ToolChoiceTypesParam(TypedDict, total=False):
             "web_search_preview_2025_03_11",
             "image_generation",
             "code_interpreter",
-            "mcp",
         ]
     ]
     """The type of hosted tool the model should to use.
@@ -30,6 +29,5 @@ class ToolChoiceTypesParam(TypedDict, total=False):
     - `web_search_preview`
     - `computer_use_preview`
     - `code_interpreter`
-    - `mcp`
     - `image_generation`
     """

@@ -15,6 +15,10 @@ AllModels: TypeAlias = Union[
         "o1-pro-2025-03-19",
         "o3-pro",
         "o3-pro-2025-06-10",
+        "o3-deep-research",
+        "o3-deep-research-2025-06-26",
+        "o4-mini-deep-research",
+        "o4-mini-deep-research-2025-06-26",
         "computer-use-preview",
         "computer-use-preview-2025-03-11",
     ],

@@ -15,6 +15,10 @@ ResponsesModel: TypeAlias = Union[
         "o1-pro-2025-03-19",
         "o3-pro",
         "o3-pro-2025-06-10",
+        "o3-deep-research",
+        "o3-deep-research-2025-06-26",
+        "o4-mini-deep-research",
+        "o4-mini-deep-research-2025-06-26",
         "computer-use-preview",
         "computer-use-preview-2025-03-11",
     ],

@@ -17,6 +17,10 @@ ResponsesModel: TypeAlias = Union[
         "o1-pro-2025-03-19",
         "o3-pro",
         "o3-pro-2025-06-10",
+        "o3-deep-research",
+        "o3-deep-research-2025-06-26",
+        "o4-mini-deep-research",
+        "o4-mini-deep-research-2025-06-26",
         "computer-use-preview",
         "computer-use-preview-2025-03-11",
     ],

@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .unwrap_webhook_event import UnwrapWebhookEvent as UnwrapWebhookEvent
+from .batch_failed_webhook_event import BatchFailedWebhookEvent as BatchFailedWebhookEvent
+from .batch_expired_webhook_event import BatchExpiredWebhookEvent as BatchExpiredWebhookEvent
+from .batch_cancelled_webhook_event import BatchCancelledWebhookEvent as BatchCancelledWebhookEvent
+from .batch_completed_webhook_event import BatchCompletedWebhookEvent as BatchCompletedWebhookEvent
+from .eval_run_failed_webhook_event import EvalRunFailedWebhookEvent as EvalRunFailedWebhookEvent
+from .response_failed_webhook_event import ResponseFailedWebhookEvent as ResponseFailedWebhookEvent
+from .eval_run_canceled_webhook_event import EvalRunCanceledWebhookEvent as EvalRunCanceledWebhookEvent
+from .eval_run_succeeded_webhook_event import EvalRunSucceededWebhookEvent as EvalRunSucceededWebhookEvent
+from .response_cancelled_webhook_event import ResponseCancelledWebhookEvent as ResponseCancelledWebhookEvent
+from .response_completed_webhook_event import ResponseCompletedWebhookEvent as ResponseCompletedWebhookEvent
+from .response_incomplete_webhook_event import ResponseIncompleteWebhookEvent as ResponseIncompleteWebhookEvent
+from .fine_tuning_job_failed_webhook_event import FineTuningJobFailedWebhookEvent as FineTuningJobFailedWebhookEvent
+from .fine_tuning_job_cancelled_webhook_event import (
+    FineTuningJobCancelledWebhookEvent as FineTuningJobCancelledWebhookEvent,
+)
+from .fine_tuning_job_succeeded_webhook_event import (
+    FineTuningJobSucceededWebhookEvent as FineTuningJobSucceededWebhookEvent,
+)

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchCancelledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the batch API request."""
+
+
+class BatchCancelledWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the batch API request was cancelled."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["batch.cancelled"]
+    """The type of the event. Always `batch.cancelled`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchCompletedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the batch API request."""
+
+
+class BatchCompletedWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the batch API request was completed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["batch.completed"]
+    """The type of the event. Always `batch.completed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchExpiredWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the batch API request."""
+
+
+class BatchExpiredWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the batch API request expired."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["batch.expired"]
+    """The type of the event. Always `batch.expired`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the batch API request."""
+
+
+class BatchFailedWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the batch API request failed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["batch.failed"]
+    """The type of the event. Always `batch.failed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["EvalRunCanceledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the eval run."""
+
+
+class EvalRunCanceledWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the eval run was canceled."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["eval.run.canceled"]
+    """The type of the event. Always `eval.run.canceled`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["EvalRunFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the eval run."""
+
+
+class EvalRunFailedWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the eval run failed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["eval.run.failed"]
+    """The type of the event. Always `eval.run.failed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["EvalRunSucceededWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the eval run."""
+
+
+class EvalRunSucceededWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the eval run succeeded."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["eval.run.succeeded"]
+    """The type of the event. Always `eval.run.succeeded`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobCancelledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the fine-tuning job."""
+
+
+class FineTuningJobCancelledWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the fine-tuning job was cancelled."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["fine_tuning.job.cancelled"]
+    """The type of the event. Always `fine_tuning.job.cancelled`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the fine-tuning job."""
+
+
+class FineTuningJobFailedWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the fine-tuning job failed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["fine_tuning.job.failed"]
+    """The type of the event. Always `fine_tuning.job.failed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobSucceededWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the fine-tuning job."""
+
+
+class FineTuningJobSucceededWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the fine-tuning job succeeded."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["fine_tuning.job.succeeded"]
+    """The type of the event. Always `fine_tuning.job.succeeded`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCancelledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the model response."""
+
+
+class ResponseCancelledWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the model response was cancelled."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["response.cancelled"]
+    """The type of the event. Always `response.cancelled`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompletedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the model response."""
+
+
+class ResponseCompletedWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the model response was completed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["response.completed"]
+    """The type of the event. Always `response.completed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the model response."""
+
+
+class ResponseFailedWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the model response failed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["response.failed"]
+    """The type of the event. Always `response.failed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseIncompleteWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the model response."""
+
+
+class ResponseIncompleteWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the model response was interrupted."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["response.incomplete"]
+    """The type of the event. Always `response.incomplete`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""

@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .batch_failed_webhook_event import BatchFailedWebhookEvent
+from .batch_expired_webhook_event import BatchExpiredWebhookEvent
+from .batch_cancelled_webhook_event import BatchCancelledWebhookEvent
+from .batch_completed_webhook_event import BatchCompletedWebhookEvent
+from .eval_run_failed_webhook_event import EvalRunFailedWebhookEvent
+from .response_failed_webhook_event import ResponseFailedWebhookEvent
+from .eval_run_canceled_webhook_event import EvalRunCanceledWebhookEvent
+from .eval_run_succeeded_webhook_event import EvalRunSucceededWebhookEvent
+from .response_cancelled_webhook_event import ResponseCancelledWebhookEvent
+from .response_completed_webhook_event import ResponseCompletedWebhookEvent
+from .response_incomplete_webhook_event import ResponseIncompleteWebhookEvent
+from .fine_tuning_job_failed_webhook_event import FineTuningJobFailedWebhookEvent
+from .fine_tuning_job_cancelled_webhook_event import FineTuningJobCancelledWebhookEvent
+from .fine_tuning_job_succeeded_webhook_event import FineTuningJobSucceededWebhookEvent
+
+__all__ = ["UnwrapWebhookEvent"]
+
+UnwrapWebhookEvent: TypeAlias = Annotated[
+    Union[
+        BatchCancelledWebhookEvent,
+        BatchCompletedWebhookEvent,
+        BatchExpiredWebhookEvent,
+        BatchFailedWebhookEvent,
+        EvalRunCanceledWebhookEvent,
+        EvalRunFailedWebhookEvent,
+        EvalRunSucceededWebhookEvent,
+        FineTuningJobCancelledWebhookEvent,
+        FineTuningJobFailedWebhookEvent,
+        FineTuningJobSucceededWebhookEvent,
+        ResponseCancelledWebhookEvent,
+        ResponseCompletedWebhookEvent,
+        ResponseFailedWebhookEvent,
+        ResponseIncompleteWebhookEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]

@@ -1,6 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
+from typing_extensions import Literal
 
 from .image import Image
 from .._models import BaseModel
@@ -34,8 +35,26 @@ class ImagesResponse(BaseModel):
     created: int
     """The Unix timestamp (in seconds) of when the image was created."""
 
+    background: Optional[Literal["transparent", "opaque"]] = None
+    """The background parameter used for the image generation.
+
+    Either `transparent` or `opaque`.
+    """
+
     data: Optional[List[Image]] = None
     """The list of generated images."""
 
+    output_format: Optional[Literal["png", "webp", "jpeg"]] = None
+    """The output format of the image generation. Either `png`, `webp`, or `jpeg`."""
+
+    quality: Optional[Literal["low", "medium", "high"]] = None
+    """The quality of the image generated. Either `low`, `medium`, or `high`."""
+
+    size: Optional[Literal["1024x1024", "1024x1536", "1536x1024"]] = None
+    """The size of the image generated.
+
+    Either `1024x1024`, `1024x1536`, or `1536x1024`.
+    """
+
     usage: Optional[Usage] = None
     """For `gpt-image-1` only, the token usage information for the image generation."""

@@ -30,6 +30,7 @@ from ._exceptions import (
     LengthFinishReasonError,
     UnprocessableEntityError,
     APIResponseValidationError,
+    InvalidWebhookSignatureError,
     ContentFilterFinishReasonError,
 )
 from ._base_client import DefaultHttpxClient, DefaultAioHttpClient, DefaultAsyncHttpxClient
@@ -62,6 +63,7 @@ __all__ = [
     "InternalServerError",
     "LengthFinishReasonError",
     "ContentFilterFinishReasonError",
+    "InvalidWebhookSignatureError",
     "Timeout",
     "RequestOptions",
     "Client",
@@ -121,6 +123,8 @@ organization: str | None = None
 
 project: str | None = None
 
+webhook_secret: str | None = None
+
 base_url: str | _httpx.URL | None = None
 
 timeout: float | Timeout | None = DEFAULT_TIMEOUT
@@ -183,6 +187,17 @@ class _ModuleClient(OpenAI):
 
         project = value
 
+    @property  # type: ignore
+    @override
+    def webhook_secret(self) -> str | None:
+        return webhook_secret
+
+    @webhook_secret.setter  # type: ignore
+    def webhook_secret(self, value: str | None) -> None:  # type: ignore
+        global webhook_secret
+
+        webhook_secret = value
+
     @property
     @override
     def base_url(self) -> _httpx.URL:
@@ -335,6 +350,7 @@ def _load_client() -> OpenAI:  # type: ignore[reportUnusedFunction]
             api_key=api_key,
             organization=organization,
             project=project,
+            webhook_secret=webhook_secret,
             base_url=base_url,
             timeout=timeout,
             max_retries=max_retries,
@@ -363,6 +379,7 @@ from ._module_client import (
     models as models,
     batches as batches,
     uploads as uploads,
+    webhooks as webhooks,
     responses as responses,
     containers as containers,
     embeddings as embeddings,

@@ -57,6 +57,7 @@ if TYPE_CHECKING:
     from .resources.images import Images, AsyncImages
     from .resources.models import Models, AsyncModels
     from .resources.batches import Batches, AsyncBatches
+    from .resources.webhooks import Webhooks, AsyncWebhooks
     from .resources.beta.beta import Beta, AsyncBeta
     from .resources.chat.chat import Chat, AsyncChat
     from .resources.embeddings import Embeddings, AsyncEmbeddings
@@ -78,6 +79,7 @@ class OpenAI(SyncAPIClient):
     api_key: str
     organization: str | None
     project: str | None
+    webhook_secret: str | None
 
     websocket_base_url: str | httpx.URL | None
     """Base URL for WebSocket connections.
@@ -93,6 +95,7 @@ class OpenAI(SyncAPIClient):
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
         base_url: str | httpx.URL | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
@@ -119,6 +122,7 @@ class OpenAI(SyncAPIClient):
         - `api_key` from `OPENAI_API_KEY`
         - `organization` from `OPENAI_ORG_ID`
         - `project` from `OPENAI_PROJECT_ID`
+        - `webhook_secret` from `OPENAI_WEBHOOK_SECRET`
         """
         if api_key is None:
             api_key = os.environ.get("OPENAI_API_KEY")
@@ -136,6 +140,10 @@ class OpenAI(SyncAPIClient):
             project = os.environ.get("OPENAI_PROJECT_ID")
         self.project = project
 
+        if webhook_secret is None:
+            webhook_secret = os.environ.get("OPENAI_WEBHOOK_SECRET")
+        self.webhook_secret = webhook_secret
+
         self.websocket_base_url = websocket_base_url
 
         if base_url is None:
@@ -216,6 +224,12 @@ class OpenAI(SyncAPIClient):
 
         return VectorStores(self)
 
+    @cached_property
+    def webhooks(self) -> Webhooks:
+        from .resources.webhooks import Webhooks
+
+        return Webhooks(self)
+
     @cached_property
     def beta(self) -> Beta:
         from .resources.beta import Beta
@@ -288,6 +302,7 @@ class OpenAI(SyncAPIClient):
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
@@ -325,6 +340,7 @@ class OpenAI(SyncAPIClient):
             api_key=api_key or self.api_key,
             organization=organization or self.organization,
             project=project or self.project,
+            webhook_secret=webhook_secret or self.webhook_secret,
             websocket_base_url=websocket_base_url or self.websocket_base_url,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
@@ -379,6 +395,7 @@ class AsyncOpenAI(AsyncAPIClient):
     api_key: str
     organization: str | None
     project: str | None
+    webhook_secret: str | None
 
     websocket_base_url: str | httpx.URL | None
     """Base URL for WebSocket connections.
@@ -394,6 +411,7 @@ class AsyncOpenAI(AsyncAPIClient):
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
         base_url: str | httpx.URL | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
@@ -420,6 +438,7 @@ class AsyncOpenAI(AsyncAPIClient):
         - `api_key` from `OPENAI_API_KEY`
         - `organization` from `OPENAI_ORG_ID`
         - `project` from `OPENAI_PROJECT_ID`
+        - `webhook_secret` from `OPENAI_WEBHOOK_SECRET`
         """
         if api_key is None:
             api_key = os.environ.get("OPENAI_API_KEY")
@@ -437,6 +456,10 @@ class AsyncOpenAI(AsyncAPIClient):
             project = os.environ.get("OPENAI_PROJECT_ID")
         self.project = project
 
+        if webhook_secret is None:
+            webhook_secret = os.environ.get("OPENAI_WEBHOOK_SECRET")
+        self.webhook_secret = webhook_secret
+
         self.websocket_base_url = websocket_base_url
 
         if base_url is None:
@@ -517,6 +540,12 @@ class AsyncOpenAI(AsyncAPIClient):
 
         return AsyncVectorStores(self)
 
+    @cached_property
+    def webhooks(self) -> AsyncWebhooks:
+        from .resources.webhooks import AsyncWebhooks
+
+        return AsyncWebhooks(self)
+
     @cached_property
     def beta(self) -> AsyncBeta:
         from .resources.beta import AsyncBeta
@@ -589,6 +618,7 @@ class AsyncOpenAI(AsyncAPIClient):
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
         websocket_base_url: str | httpx.URL | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
@@ -626,6 +656,7 @@ class AsyncOpenAI(AsyncAPIClient):
             api_key=api_key or self.api_key,
             organization=organization or self.organization,
             project=project or self.project,
+            webhook_secret=webhook_secret or self.webhook_secret,
             websocket_base_url=websocket_base_url or self.websocket_base_url,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,

@@ -24,6 +24,7 @@ __all__ = [
     "InternalServerError",
     "LengthFinishReasonError",
     "ContentFilterFinishReasonError",
+    "InvalidWebhookSignatureError",
 ]
 
 
@@ -154,3 +155,7 @@ class ContentFilterFinishReasonError(OpenAIError):
         super().__init__(
             f"Could not parse response content as the request was rejected by the content filter",
         )
+
+
+class InvalidWebhookSignatureError(ValueError):
+    """Raised when a webhook signature is invalid, meaning the computed signature does not match the expected signature."""

@@ -10,6 +10,7 @@ if TYPE_CHECKING:
     from .resources.images import Images
     from .resources.models import Models
     from .resources.batches import Batches
+    from .resources.webhooks import Webhooks
     from .resources.beta.beta import Beta
     from .resources.chat.chat import Chat
     from .resources.embeddings import Embeddings
@@ -81,6 +82,12 @@ class UploadsProxy(LazyProxy["Uploads"]):
         return _load_client().uploads
 
 
+class WebhooksProxy(LazyProxy["Webhooks"]):
+    @override
+    def __load__(self) -> Webhooks:
+        return _load_client().webhooks
+
+
 class ResponsesProxy(LazyProxy["Responses"]):
     @override
     def __load__(self) -> Responses:
@@ -132,6 +139,7 @@ images: Images = ImagesProxy().__as_proxied__()
 models: Models = ModelsProxy().__as_proxied__()
 batches: Batches = BatchesProxy().__as_proxied__()
 uploads: Uploads = UploadsProxy().__as_proxied__()
+webhooks: Webhooks = WebhooksProxy().__as_proxied__()
 responses: Responses = ResponsesProxy().__as_proxied__()
 embeddings: Embeddings = EmbeddingsProxy().__as_proxied__()
 containers: Containers = ContainersProxy().__as_proxied__()

@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.91.0"  # x-release-please-version
+__version__ = "1.92.0"  # x-release-please-version

@@ -31,7 +31,7 @@ class TestInputItems:
             response_id="response_id",
             after="after",
             before="before",
-            include=["file_search_call.results"],
+            include=["code_interpreter_call.outputs"],
             limit=0,
             order="asc",
         )
@@ -87,7 +87,7 @@ class TestAsyncInputItems:
             response_id="response_id",
             after="after",
             before="before",
-            include=["file_search_call.results"],
+            include=["code_interpreter_call.outputs"],
             limit=0,
             order="asc",
         )

@@ -28,10 +28,11 @@ class TestResponses:
     def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
         response = client.responses.create(
             background=True,
-            include=["file_search_call.results"],
+            include=["code_interpreter_call.outputs"],
             input="string",
             instructions="instructions",
             max_output_tokens=0,
+            max_tool_calls=0,
             metadata={"foo": "string"},
             model="gpt-4o",
             parallel_tool_calls=True,
@@ -61,6 +62,7 @@ class TestResponses:
                     "description": "description",
                 }
             ],
+            top_logprobs=0,
             top_p=1,
             truncation="auto",
             user="user-1234",
@@ -99,10 +101,11 @@ class TestResponses:
         response_stream = client.responses.create(
             stream=True,
             background=True,
-            include=["file_search_call.results"],
+            include=["code_interpreter_call.outputs"],
             input="string",
             instructions="instructions",
             max_output_tokens=0,
+            max_tool_calls=0,
             metadata={"foo": "string"},
             model="gpt-4o",
             parallel_tool_calls=True,
@@ -131,6 +134,7 @@ class TestResponses:
                     "description": "description",
                 }
             ],
+            top_logprobs=0,
             top_p=1,
             truncation="auto",
             user="user-1234",
@@ -171,7 +175,7 @@ class TestResponses:
     def test_method_retrieve_with_all_params_overload_1(self, client: OpenAI) -> None:
         response = client.responses.retrieve(
             response_id="resp_677efb5139a88190b512bc3fef8e535d",
-            include=["file_search_call.results"],
+            include=["code_interpreter_call.outputs"],
             starting_after=0,
             stream=False,
         )
@@ -221,7 +225,7 @@ class TestResponses:
         response_stream = client.responses.retrieve(
             response_id="resp_677efb5139a88190b512bc3fef8e535d",
             stream=True,
-            include=["file_search_call.results"],
+            include=["code_interpreter_call.outputs"],
             starting_after=0,
         )
         response_stream.response.close()
@@ -350,10 +354,11 @@ class TestAsyncResponses:
     async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.responses.create(
             background=True,
-            include=["file_search_call.results"],
+            include=["code_interpreter_call.outputs"],
             input="string",
             instructions="instructions",
             max_output_tokens=0,
+            max_tool_calls=0,
             metadata={"foo": "string"},
             model="gpt-4o",
             parallel_tool_calls=True,
@@ -383,6 +388,7 @@ class TestAsyncResponses:
                     "description": "description",
                 }
             ],
+            top_logprobs=0,
             top_p=1,
             truncation="auto",
             user="user-1234",
@@ -421,10 +427,11 @@ class TestAsyncResponses:
         response_stream = await async_client.responses.create(
             stream=True,
             background=True,
-            include=["file_search_call.results"],
+            include=["code_interpreter_call.outputs"],
             input="string",
             instructions="instructions",
             max_output_tokens=0,
+            max_tool_calls=0,
             metadata={"foo": "string"},
             model="gpt-4o",
             parallel_tool_calls=True,
@@ -453,6 +460,7 @@ class TestAsyncResponses:
                     "description": "description",
                 }
             ],
+            top_logprobs=0,
             top_p=1,
             truncation="auto",
             user="user-1234",
@@ -493,7 +501,7 @@ class TestAsyncResponses:
     async def test_method_retrieve_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.responses.retrieve(
             response_id="resp_677efb5139a88190b512bc3fef8e535d",
-            include=["file_search_call.results"],
+            include=["code_interpreter_call.outputs"],
             starting_after=0,
             stream=False,
         )
@@ -543,7 +551,7 @@ class TestAsyncResponses:
         response_stream = await async_client.responses.retrieve(
             response_id="resp_677efb5139a88190b512bc3fef8e535d",
             stream=True,
-            include=["file_search_call.results"],
+            include=["code_interpreter_call.outputs"],
             starting_after=0,
         )
         await response_stream.response.aclose()

@@ -0,0 +1,284 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from unittest import mock
+
+import pytest
+
+import openai
+from openai._exceptions import InvalidWebhookSignatureError
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+# Standardized test constants (matches TypeScript implementation)
+TEST_SECRET = "whsec_RdvaYFYUXuIFuEbvZHwMfYFhUf7aMYjYcmM24+Aj40c="
+TEST_PAYLOAD = '{"id": "evt_685c059ae3a481909bdc86819b066fb6", "object": "event", "created_at": 1750861210, "type": "response.completed", "data": {"id": "resp_123"}}'
+TEST_TIMESTAMP = 1750861210  # Fixed timestamp that matches our test signature
+TEST_WEBHOOK_ID = "wh_685c059ae39c8190af8c71ed1022a24d"
+TEST_SIGNATURE = "v1,gUAg4R2hWouRZqRQG4uJypNS8YK885G838+EHb4nKBY="
+
+
+def create_test_headers(
+    timestamp: int | None = None, signature: str | None = None, webhook_id: str | None = None
+) -> dict[str, str]:
+    """Helper function to create test headers"""
+    return {
+        "webhook-signature": signature or TEST_SIGNATURE,
+        "webhook-timestamp": str(timestamp or TEST_TIMESTAMP),
+        "webhook-id": webhook_id or TEST_WEBHOOK_ID,
+    }
+
+
+class TestWebhooks:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_unwrap_with_secret(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        unwrapped = client.webhooks.unwrap(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+        assert unwrapped.id == "evt_685c059ae3a481909bdc86819b066fb6"
+        assert unwrapped.created_at == 1750861210
+
+    @parametrize
+    def test_unwrap_without_secret(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        with pytest.raises(ValueError, match="The webhook secret must either be set"):
+            client.webhooks.unwrap(TEST_PAYLOAD, headers)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_valid(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        # Should not raise - this is a truly valid signature for this timestamp
+        client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @parametrize
+    def test_verify_signature_invalid_secret_format(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        with pytest.raises(ValueError, match="The webhook secret must either be set"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=None)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_invalid(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret="invalid_secret")
+
+    @parametrize
+    def test_verify_signature_missing_webhook_signature_header(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers(signature=None)
+        del headers["webhook-signature"]
+        with pytest.raises(ValueError, match="Could not find webhook-signature header"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @parametrize
+    def test_verify_signature_missing_webhook_timestamp_header(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        del headers["webhook-timestamp"]
+        with pytest.raises(ValueError, match="Could not find webhook-timestamp header"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @parametrize
+    def test_verify_signature_missing_webhook_id_header(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        del headers["webhook-id"]
+        with pytest.raises(ValueError, match="Could not find webhook-id header"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_payload_bytes(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        client.webhooks.verify_signature(TEST_PAYLOAD.encode("utf-8"), headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    def test_unwrap_with_client_secret(self) -> None:
+        test_client = openai.OpenAI(base_url=base_url, api_key="test-api-key", webhook_secret=TEST_SECRET)
+        headers = create_test_headers()
+
+        unwrapped = test_client.webhooks.unwrap(TEST_PAYLOAD, headers)
+        assert unwrapped.id == "evt_685c059ae3a481909bdc86819b066fb6"
+        assert unwrapped.created_at == 1750861210
+
+    @parametrize
+    def test_verify_signature_timestamp_too_old(self, client: openai.OpenAI) -> None:
+        # Use a timestamp that's older than 5 minutes from our test timestamp
+        old_timestamp = TEST_TIMESTAMP - 400  # 6 minutes 40 seconds ago
+        headers = create_test_headers(timestamp=old_timestamp, signature="v1,dummy_signature")
+
+        with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too old"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_timestamp_too_new(self, client: openai.OpenAI) -> None:
+        # Use a timestamp that's in the future beyond tolerance from our test timestamp
+        future_timestamp = TEST_TIMESTAMP + 400  # 6 minutes 40 seconds in the future
+        headers = create_test_headers(timestamp=future_timestamp, signature="v1,dummy_signature")
+
+        with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too new"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_custom_tolerance(self, client: openai.OpenAI) -> None:
+        # Use a timestamp that's older than default tolerance but within custom tolerance
+        old_timestamp = TEST_TIMESTAMP - 400  # 6 minutes 40 seconds ago from test timestamp
+        headers = create_test_headers(timestamp=old_timestamp, signature="v1,dummy_signature")
+
+        # Should fail with default tolerance
+        with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too old"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+        # Should also fail with custom tolerance of 10 minutes (signature won't match)
+        with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET, tolerance=600)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_recent_timestamp_succeeds(self, client: openai.OpenAI) -> None:
+        # Use a recent timestamp with dummy signature
+        headers = create_test_headers(signature="v1,dummy_signature")
+
+        # Should fail on signature verification (not timestamp validation)
+        with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_multiple_signatures_one_valid(self, client: openai.OpenAI) -> None:
+        # Test multiple signatures: one invalid, one valid
+        multiple_signatures = f"v1,invalid_signature {TEST_SIGNATURE}"
+        headers = create_test_headers(signature=multiple_signatures)
+
+        # Should not raise when at least one signature is valid
+        client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_multiple_signatures_all_invalid(self, client: openai.OpenAI) -> None:
+        # Test multiple invalid signatures
+        multiple_invalid_signatures = "v1,invalid_signature1 v1,invalid_signature2"
+        headers = create_test_headers(signature=multiple_invalid_signatures)
+
+        with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+
+class TestAsyncWebhooks:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_unwrap_with_secret(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        unwrapped = async_client.webhooks.unwrap(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+        assert unwrapped.id == "evt_685c059ae3a481909bdc86819b066fb6"
+        assert unwrapped.created_at == 1750861210
+
+    @parametrize
+    async def test_unwrap_without_secret(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        with pytest.raises(ValueError, match="The webhook secret must either be set"):
+            async_client.webhooks.unwrap(TEST_PAYLOAD, headers)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_verify_signature_valid(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        # Should not raise - this is a truly valid signature for this timestamp
+        async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @parametrize
+    async def test_verify_signature_invalid_secret_format(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        with pytest.raises(ValueError, match="The webhook secret must either be set"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=None)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_verify_signature_invalid(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret="invalid_secret")
+
+    @parametrize
+    async def test_verify_signature_missing_webhook_signature_header(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        del headers["webhook-signature"]
+        with pytest.raises(ValueError, match="Could not find webhook-signature header"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @parametrize
+    async def test_verify_signature_missing_webhook_timestamp_header(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        del headers["webhook-timestamp"]
+        with pytest.raises(ValueError, match="Could not find webhook-timestamp header"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @parametrize
+    async def test_verify_signature_missing_webhook_id_header(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        del headers["webhook-id"]
+        with pytest.raises(ValueError, match="Could not find webhook-id header"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_verify_signature_payload_bytes(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        async_client.webhooks.verify_signature(TEST_PAYLOAD.encode("utf-8"), headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    async def test_unwrap_with_client_secret(self) -> None:
+        test_async_client = openai.AsyncOpenAI(base_url=base_url, api_key="test-api-key", webhook_secret=TEST_SECRET)
+        headers = create_test_headers()
+
+        unwrapped = test_async_client.webhooks.unwrap(TEST_PAYLOAD, headers)
+        assert unwrapped.id == "evt_685c059ae3a481909bdc86819b066fb6"
+        assert unwrapped.created_at == 1750861210
+
+    @parametrize
+    async def test_verify_signature_timestamp_too_old(self, async_client: openai.AsyncOpenAI) -> None:
+        # Use a timestamp that's older than 5 minutes from our test timestamp
+        old_timestamp = TEST_TIMESTAMP - 400  # 6 minutes 40 seconds ago
+        headers = create_test_headers(timestamp=old_timestamp, signature="v1,dummy_signature")
+
+        with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too old"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_verify_signature_timestamp_too_new(self, async_client: openai.AsyncOpenAI) -> None:
+        # Use a timestamp that's in the future beyond tolerance from our test timestamp
+        future_timestamp = TEST_TIMESTAMP + 400  # 6 minutes 40 seconds in the future
+        headers = create_test_headers(timestamp=future_timestamp, signature="v1,dummy_signature")
+
+        with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too new"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_verify_signature_multiple_signatures_one_valid(self, async_client: openai.AsyncOpenAI) -> None:
+        # Test multiple signatures: one invalid, one valid
+        multiple_signatures = f"v1,invalid_signature {TEST_SIGNATURE}"
+        headers = create_test_headers(signature=multiple_signatures)
+
+        # Should not raise when at least one signature is valid
+        async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_verify_signature_multiple_signatures_all_invalid(self, async_client: openai.AsyncOpenAI) -> None:
+        # Test multiple invalid signatures
+        multiple_invalid_signatures = "v1,invalid_signature1 v1,invalid_signature2"
+        headers = create_test_headers(signature=multiple_invalid_signatures)
+
+        with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)

@@ -33,7 +33,7 @@ _T = TypeVar("_T")
 @pytest.mark.respx(base_url=base_url)
 def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
     completion = _make_snapshot_request(
-        lambda c: c.beta.chat.completions.parse(
+        lambda c: c.chat.completions.parse(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -101,7 +101,7 @@ def test_parse_pydantic_model(client: OpenAI, respx_mock: MockRouter, monkeypatc
         units: Literal["c", "f"]
 
     completion = _make_snapshot_request(
-        lambda c: c.beta.chat.completions.parse(
+        lambda c: c.chat.completions.parse(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -171,7 +171,7 @@ def test_parse_pydantic_model_optional_default(
         units: Optional[Literal["c", "f"]] = None
 
     completion = _make_snapshot_request(
-        lambda c: c.beta.chat.completions.parse(
+        lambda c: c.chat.completions.parse(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -248,7 +248,7 @@ def test_parse_pydantic_model_enum(client: OpenAI, respx_mock: MockRouter, monke
         ColorDetection.update_forward_refs(**locals())  # type: ignore
 
     completion = _make_snapshot_request(
-        lambda c: c.beta.chat.completions.parse(
+        lambda c: c.chat.completions.parse(
             model="gpt-4o-2024-08-06",
             messages=[
                 {"role": "user", "content": "What color is a Coke can?"},
@@ -293,7 +293,7 @@ def test_parse_pydantic_model_multiple_choices(
         units: Literal["c", "f"]
 
     completion = _make_snapshot_request(
-        lambda c: c.beta.chat.completions.parse(
+        lambda c: c.chat.completions.parse(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -376,7 +376,7 @@ def test_parse_pydantic_dataclass(client: OpenAI, respx_mock: MockRouter, monkey
         participants: List[str]
 
     completion = _make_snapshot_request(
-        lambda c: c.beta.chat.completions.parse(
+        lambda c: c.chat.completions.parse(
             model="gpt-4o-2024-08-06",
             messages=[
                 {"role": "system", "content": "Extract the event information."},
@@ -437,7 +437,7 @@ ParsedChatCompletion[CalendarEvent](
 @pytest.mark.respx(base_url=base_url)
 def test_pydantic_tool_model_all_types(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
     completion = _make_snapshot_request(
-        lambda c: c.beta.chat.completions.parse(
+        lambda c: c.chat.completions.parse(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -522,7 +522,7 @@ def test_parse_max_tokens_reached(client: OpenAI, respx_mock: MockRouter) -> Non
 
     with pytest.raises(openai.LengthFinishReasonError):
         _make_snapshot_request(
-            lambda c: c.beta.chat.completions.parse(
+            lambda c: c.chat.completions.parse(
                 model="gpt-4o-2024-08-06",
                 messages=[
                     {
@@ -549,7 +549,7 @@ def test_parse_pydantic_model_refusal(client: OpenAI, respx_mock: MockRouter, mo
         units: Literal["c", "f"]
 
     completion = _make_snapshot_request(
-        lambda c: c.beta.chat.completions.parse(
+        lambda c: c.chat.completions.parse(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -597,7 +597,7 @@ def test_parse_pydantic_tool(client: OpenAI, respx_mock: MockRouter, monkeypatch
         units: Literal["c", "f"] = "c"
 
     completion = _make_snapshot_request(
-        lambda c: c.beta.chat.completions.parse(
+        lambda c: c.chat.completions.parse(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -663,7 +663,7 @@ def test_parse_multiple_pydantic_tools(client: OpenAI, respx_mock: MockRouter, m
         exchange: str
 
     completion = _make_snapshot_request(
-        lambda c: c.beta.chat.completions.parse(
+        lambda c: c.chat.completions.parse(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -734,7 +734,7 @@ def test_parse_multiple_pydantic_tools(client: OpenAI, respx_mock: MockRouter, m
 @pytest.mark.respx(base_url=base_url)
 def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
     completion = _make_snapshot_request(
-        lambda c: c.beta.chat.completions.parse(
+        lambda c: c.chat.completions.parse(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -808,7 +808,7 @@ def test_parse_non_strict_tools(client: OpenAI) -> None:
     with pytest.raises(
         ValueError, match="`get_weather` is not strict. Only `strict` function tools can be auto-parsed"
     ):
-        client.beta.chat.completions.parse(
+        client.chat.completions.parse(
             model="gpt-4o-2024-08-06",
             messages=[],
             tools=[
@@ -831,7 +831,7 @@ def test_parse_pydantic_raw_response(client: OpenAI, respx_mock: MockRouter, mon
         units: Literal["c", "f"]
 
     response = _make_snapshot_request(
-        lambda c: c.beta.chat.completions.with_raw_response.parse(
+        lambda c: c.chat.completions.with_raw_response.parse(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -847,7 +847,7 @@ def test_parse_pydantic_raw_response(client: OpenAI, respx_mock: MockRouter, mon
         mock_client=client,
         respx_mock=respx_mock,
     )
-    assert response.http_request.headers.get("x-stainless-helper-method") == "beta.chat.completions.parse"
+    assert response.http_request.headers.get("x-stainless-helper-method") == "chat.completions.parse"
 
     completion = response.parse()
     message = completion.choices[0].message
@@ -907,7 +907,7 @@ async def test_async_parse_pydantic_raw_response(
         units: Literal["c", "f"]
 
     response = await _make_async_snapshot_request(
-        lambda c: c.beta.chat.completions.with_raw_response.parse(
+        lambda c: c.chat.completions.with_raw_response.parse(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -923,7 +923,7 @@ async def test_async_parse_pydantic_raw_response(
         mock_client=async_client,
         respx_mock=respx_mock,
     )
-    assert response.http_request.headers.get("x-stainless-helper-method") == "beta.chat.completions.parse"
+    assert response.http_request.headers.get("x-stainless-helper-method") == "chat.completions.parse"
 
     completion = response.parse()
     message = completion.choices[0].message
@@ -978,7 +978,7 @@ def test_parse_method_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpe
 
     assert_signatures_in_sync(
         checking_client.chat.completions.create,
-        checking_client.beta.chat.completions.parse,
+        checking_client.chat.completions.parse,
         exclude_params={"response_format", "stream"},
     )

@@ -41,7 +41,7 @@ _T = TypeVar("_T")
 @pytest.mark.respx(base_url=base_url)
 def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
     listener = _make_stream_snapshot_request(
-        lambda c: c.beta.chat.completions.stream(
+        lambda c: c.chat.completions.stream(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -103,7 +103,7 @@ def test_parse_pydantic_model(client: OpenAI, respx_mock: MockRouter, monkeypatc
             done_snapshots.append(model_copy(stream.current_completion_snapshot, deep=True))
 
     listener = _make_stream_snapshot_request(
-        lambda c: c.beta.chat.completions.stream(
+        lambda c: c.chat.completions.stream(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -195,7 +195,7 @@ def test_parse_pydantic_model_multiple_choices(
         units: Literal["c", "f"]
 
     listener = _make_stream_snapshot_request(
-        lambda c: c.beta.chat.completions.stream(
+        lambda c: c.chat.completions.stream(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -374,7 +374,7 @@ def test_parse_max_tokens_reached(client: OpenAI, respx_mock: MockRouter) -> Non
 
     with pytest.raises(openai.LengthFinishReasonError):
         _make_stream_snapshot_request(
-            lambda c: c.beta.chat.completions.stream(
+            lambda c: c.chat.completions.stream(
                 model="gpt-4o-2024-08-06",
                 messages=[
                     {
@@ -399,7 +399,7 @@ def test_parse_pydantic_model_refusal(client: OpenAI, respx_mock: MockRouter, mo
         units: Literal["c", "f"]
 
     listener = _make_stream_snapshot_request(
-        lambda c: c.beta.chat.completions.stream(
+        lambda c: c.chat.completions.stream(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -444,7 +444,7 @@ RefusalDoneEvent(refusal="I'm sorry, I can't assist with that request.", type='r
 @pytest.mark.respx(base_url=base_url)
 def test_content_logprobs_events(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
     listener = _make_stream_snapshot_request(
-        lambda c: c.beta.chat.completions.stream(
+        lambda c: c.chat.completions.stream(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -523,7 +523,7 @@ def test_refusal_logprobs_events(client: OpenAI, respx_mock: MockRouter, monkeyp
         units: Literal["c", "f"]
 
     listener = _make_stream_snapshot_request(
-        lambda c: c.beta.chat.completions.stream(
+        lambda c: c.chat.completions.stream(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -635,7 +635,7 @@ def test_parse_pydantic_tool(client: OpenAI, respx_mock: MockRouter, monkeypatch
         units: Literal["c", "f"] = "c"
 
     listener = _make_stream_snapshot_request(
-        lambda c: c.beta.chat.completions.stream(
+        lambda c: c.chat.completions.stream(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -733,7 +733,7 @@ def test_parse_multiple_pydantic_tools(client: OpenAI, respx_mock: MockRouter, m
         exchange: str
 
     listener = _make_stream_snapshot_request(
-        lambda c: c.beta.chat.completions.stream(
+        lambda c: c.chat.completions.stream(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -831,7 +831,7 @@ def test_parse_multiple_pydantic_tools(client: OpenAI, respx_mock: MockRouter, m
 @pytest.mark.respx(base_url=base_url)
 def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
     listener = _make_stream_snapshot_request(
-        lambda c: c.beta.chat.completions.stream(
+        lambda c: c.chat.completions.stream(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -903,7 +903,7 @@ def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch:
 @pytest.mark.respx(base_url=base_url)
 def test_non_pydantic_response_format(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
     listener = _make_stream_snapshot_request(
-        lambda c: c.beta.chat.completions.stream(
+        lambda c: c.chat.completions.stream(
             model="gpt-4o-2024-08-06",
             messages=[
                 {
@@ -951,7 +951,7 @@ def test_allows_non_strict_tools_but_no_parsing(
     client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch
 ) -> None:
     listener = _make_stream_snapshot_request(
-        lambda c: c.beta.chat.completions.stream(
+        lambda c: c.chat.completions.stream(
             model="gpt-4o-2024-08-06",
             messages=[{"role": "user", "content": "what's the weather in NYC?"}],
             tools=[
@@ -1069,7 +1069,7 @@ def test_stream_method_in_sync(sync: bool, client: OpenAI, async_client: AsyncOp
 
     assert_signatures_in_sync(
         checking_client.chat.completions.create,
-        checking_client.beta.chat.completions.stream,
+        checking_client.chat.completions.stream,
         exclude_params={"response_format", "stream"},
     )

@@ -192,6 +192,7 @@ class TestOpenAI:
             copy_param = copy_signature.parameters.get(name)
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
+    @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
     def test_copy_build_request(self) -> None:
         options = FinalRequestOptions(method="get", url="/foo")
 
@@ -1074,6 +1075,7 @@ class TestAsyncOpenAI:
             copy_param = copy_signature.parameters.get(name)
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
+    @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
     def test_copy_build_request(self) -> None:
         options = FinalRequestOptions(method="get", url="/foo")

@@ -17,6 +17,7 @@ def reset_state() -> None:
     openai.api_key = None or "My API Key"
     openai.organization = None
     openai.project = None
+    openai.webhook_secret = None
     openai.base_url = None
     openai.timeout = DEFAULT_TIMEOUT
     openai.max_retries = DEFAULT_MAX_RETRIES

@@ -1,3 +1,3 @@
 {
-  ".": "1.91.0"
+  ".": "1.92.0"
 }
\ No newline at end of file

@@ -1,4 +1,4 @@
 configured_endpoints: 111
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-ef4ecb19eb61e24c49d77fef769ee243e5279bc0bdbaee8d0f8dba4da8722559.yml
-openapi_spec_hash: 1b8a9767c9f04e6865b06c41948cdc24
-config_hash: fd2af1d5eff0995bb7dc02ac9a34851d
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-cca460eaf5cc13e9d6e5293eb97aac53d66dc1385c691f74b768c97d165b6e8b.yml
+openapi_spec_hash: 9ec43d443b3dd58ca5aa87eb0a7eb49f
+config_hash: e74d6791681e3af1b548748ff47a22c2

@@ -395,6 +395,35 @@ Methods:
 - <code>client.vector_stores.file_batches.<a href="./src/openai/resources/vector_stores/file_batches.py">poll</a>(\*args) -> VectorStoreFileBatch</code>
 - <code>client.vector_stores.file_batches.<a href="./src/openai/resources/vector_stores/file_batches.py">upload_and_poll</a>(\*args) -> VectorStoreFileBatch</code>
 
+# Webhooks
+
+Types:
+
+```python
+from openai.types.webhooks import (
+    BatchCancelledWebhookEvent,
+    BatchCompletedWebhookEvent,
+    BatchExpiredWebhookEvent,
+    BatchFailedWebhookEvent,
+    EvalRunCanceledWebhookEvent,
+    EvalRunFailedWebhookEvent,
+    EvalRunSucceededWebhookEvent,
+    FineTuningJobCancelledWebhookEvent,
+    FineTuningJobFailedWebhookEvent,
+    FineTuningJobSucceededWebhookEvent,
+    ResponseCancelledWebhookEvent,
+    ResponseCompletedWebhookEvent,
+    ResponseFailedWebhookEvent,
+    ResponseIncompleteWebhookEvent,
+    UnwrapWebhookEvent,
+)
+```
+
+Methods:
+
+- <code>client.webhooks.<a href="./src/openai/resources/webhooks.py">unwrap</a>(payload, headers, \*, secret) -> UnwrapWebhookEvent</code>
+- <code>client.webhooks.<a href="./src/openai/resources/webhooks.py">verify_signature</a>(payload, headers, \*, secret, tolerance) -> None</code>
+
 # Beta
 
 ## Realtime
@@ -774,6 +803,7 @@ from openai.types.responses import (
     ResponseWebSearchCallSearchingEvent,
     Tool,
     ToolChoiceFunction,
+    ToolChoiceMcp,
     ToolChoiceOptions,
     ToolChoiceTypes,
     WebSearchTool,

@@ -1,5 +1,25 @@
 # Changelog
 
+## 1.92.0 (2025-06-26)
+
+Full Changelog: [v1.91.0...v1.92.0](https://github.com/openai/openai-python/compare/v1.91.0...v1.92.0)
+
+### Features
+
+* **api:** webhook and deep research support ([d3bb116](https://github.com/openai/openai-python/commit/d3bb116f34f470502f902b88131deec43a953b12))
+* **client:** move stream and parse out of beta ([0e358ed](https://github.com/openai/openai-python/commit/0e358ed66b317038705fb38958a449d284f3cb88))
+
+
+### Bug Fixes
+
+* **ci:** release-doctor — report correct token name ([ff8c556](https://github.com/openai/openai-python/commit/ff8c5561e44e8a0902732b5934c97299d2c98d4e))
+
+
+### Chores
+
+* **internal:** add tests for breaking change detection ([710fe8f](https://github.com/openai/openai-python/commit/710fe8fd5f9e33730338341680152d3f2556dfa0))
+* **tests:** skip some failing tests on the latest python versions ([93ccc38](https://github.com/openai/openai-python/commit/93ccc38a8ef1575d77d33d031666d07d10e4af72))
+
 ## 1.91.0 (2025-06-23)
 
 Full Changelog: [v1.90.0...v1.91.0](https://github.com/openai/openai-python/compare/v1.90.0...v1.91.0)

@@ -2,7 +2,7 @@
 
 The OpenAI API supports extracting JSON from the model with the `response_format` request param, for more details on the API, see [this guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-The SDK provides a `client.beta.chat.completions.parse()` method which is a wrapper over the `client.chat.completions.create()` that
+The SDK provides a `client.chat.completions.parse()` method which is a wrapper over the `client.chat.completions.create()` that
 provides richer integrations with Python specific types & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
 
 ## Auto-parsing response content with Pydantic models
@@ -24,7 +24,7 @@ class MathResponse(BaseModel):
     final_answer: str
 
 client = OpenAI()
-completion = client.beta.chat.completions.parse(
+completion = client.chat.completions.parse(
     model="gpt-4o-2024-08-06",
     messages=[
         {"role": "system", "content": "You are a helpful math tutor."},
@@ -44,6 +44,7 @@ else:
 ## Auto-parsing function tool calls
 
 The `.parse()` method will also automatically parse `function` tool calls if:
+
 - You use the `openai.pydantic_function_tool()` helper method
 - You mark your tool schema with `"strict": True`
 
@@ -96,7 +97,7 @@ class Query(BaseModel):
     order_by: OrderBy
 
 client = openai.OpenAI()
-completion = client.beta.chat.completions.parse(
+completion = client.chat.completions.parse(
     model="gpt-4o-2024-08-06",
     messages=[
         {
@@ -121,7 +122,7 @@ print(tool_call.function.parsed_arguments.table_name)
 
 ### Differences from `.create()`
 
-The `beta.chat.completions.parse()` method imposes some additional restrictions on it's usage that `chat.completions.create()` does not. 
+The `chat.completions.parse()` method imposes some additional restrictions on it's usage that `chat.completions.create()` does not.
 
 - If the completion completes with `finish_reason` set to `length` or `content_filter`, the `LengthFinishReasonError` / `ContentFilterFinishReasonError` errors will be raised.
 - Only strict function tools can be passed, e.g. `{'type': 'function', 'function': {..., 'strict': True}}`
@@ -132,7 +133,7 @@ OpenAI supports streaming responses when interacting with the [Chat Completion](
 
 ## Chat Completions API
 
-The SDK provides a `.beta.chat.completions.stream()` method that wraps the `.chat.completions.create(stream=True)` stream providing a more granular event API & automatic accumulation of each delta.
+The SDK provides a `.chat.completions.stream()` method that wraps the `.chat.completions.create(stream=True)` stream providing a more granular event API & automatic accumulation of each delta.
 
 It also supports all aforementioned [parsing helpers](#structured-outputs-parsing-helpers).
 
@@ -143,7 +144,7 @@ from openai import AsyncOpenAI
 
 client = AsyncOpenAI()
 
-async with client.beta.chat.completions.stream(
+async with client.chat.completions.stream(
     model='gpt-4o-2024-08-06',
     messages=[...],
 ) as stream:
@@ -263,7 +264,7 @@ A handful of helper methods are provided on the stream class for additional conv
 Returns the accumulated `ParsedChatCompletion` object
 
 ```py
-async with client.beta.chat.completions.stream(...) as stream:
+async with client.chat.completions.stream(...) as stream:
     ...
 
 completion = await stream.get_final_completion()
@@ -275,7 +276,7 @@ print(completion.choices[0].message)
 If you want to wait for the stream to complete, you can use the `.until_done()` method.
 
 ```py
-async with client.beta.chat.completions.stream(...) as stream:
+async with client.chat.completions.stream(...) as stream:
     await stream.until_done()
     # stream is now finished
 ```

@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.91.0"
+version = "1.92.0"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"

@@ -406,6 +406,84 @@ client.files.create(
 
 The async client uses the exact same interface. If you pass a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance, the file contents will be read asynchronously automatically.
 
+## Webhook Verification
+
+Verifying webhook signatures is _optional but encouraged_.
+
+### Parsing webhook payloads
+
+For most use cases, you will likely want to verify the webhook and parse the payload at the same time. To achieve this, we provide the method `client.webhooks.unwrap()`, which parses a webhook request and verifies that it was sent by OpenAI. This method will raise an error if the signature is invalid.
+
+Note that the `body` parameter must be the raw JSON string sent from the server (do not parse it first). The `.unwrap()` method will parse this JSON for you into an event object after verifying the webhook was sent from OpenAI.
+
+```python
+from openai import OpenAI
+from flask import Flask, request
+
+app = Flask(__name__)
+client = OpenAI()  # OPENAI_WEBHOOK_SECRET environment variable is used by default
+
+
+@app.route("/webhook", methods=["POST"])
+def webhook():
+    request_body = request.get_data(as_text=True)
+
+    try:
+        event = client.webhooks.unwrap(request_body, request.headers)
+
+        if event.type == "response.completed":
+            print("Response completed:", event.data)
+        elif event.type == "response.failed":
+            print("Response failed:", event.data)
+        else:
+            print("Unhandled event type:", event.type)
+
+        return "ok"
+    except Exception as e:
+        print("Invalid signature:", e)
+        return "Invalid signature", 400
+
+
+if __name__ == "__main__":
+    app.run(port=8000)
+```
+
+### Verifying webhook payloads directly
+
+In some cases, you may want to verify the webhook separately from parsing the payload. If you prefer to handle these steps separately, we provide the method `client.webhooks.verify_signature()` to _only verify_ the signature of a webhook request. Like `.unwrap()`, this method will raise an error if the signature is invalid.
+
+Note that the `body` parameter must be the raw JSON string sent from the server (do not parse it first). You will then need to parse the body after verifying the signature.
+
+```python
+import json
+from openai import OpenAI
+from flask import Flask, request
+
+app = Flask(__name__)
+client = OpenAI()  # OPENAI_WEBHOOK_SECRET environment variable is used by default
+
+
+@app.route("/webhook", methods=["POST"])
+def webhook():
+    request_body = request.get_data(as_text=True)
+
+    try:
+        client.webhooks.verify_signature(request_body, request.headers)
+
+        # Parse the body after verification
+        event = json.loads(request_body)
+        print("Verified event:", event)
+
+        return "ok"
+    except Exception as e:
+        print("Invalid signature:", e)
+        return "Invalid signature", 400
+
+
+if __name__ == "__main__":
+    app.run(port=8000)
+```
+
 ## Handling errors
 
 When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `openai.APIConnectionError` is raised.

Commit 18e0b36a

Commit `18e0b36a`