Commit 550b855f

stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>
2024-10-18 00:49:46
feat(api): add gpt-4o-audio-preview model for chat completions (#1796)
This enables audio inputs and outputs. https://platform.openai.com/docs/guides/audio
1 parent af8e0ad
src/openai/resources/beta/chat/completions.py
@@ -28,7 +28,9 @@ from ....lib.streaming.chat import ChatCompletionStreamManager, AsyncChatComplet
 from ....types.chat.chat_completion import ChatCompletion
 from ....types.chat.chat_completion_chunk import ChatCompletionChunk
 from ....types.chat.parsed_chat_completion import ParsedChatCompletion
+from ....types.chat.chat_completion_modality import ChatCompletionModality
 from ....types.chat.chat_completion_tool_param import ChatCompletionToolParam
+from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
 from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam
 from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
@@ -61,6 +63,7 @@ class Completions(SyncAPIResource):
         *,
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
@@ -70,6 +73,7 @@ class Completions(SyncAPIResource):
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -153,6 +157,7 @@ class Completions(SyncAPIResource):
                 {
                     "messages": messages,
                     "model": model,
+                    "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,
                     "functions": functions,
@@ -161,6 +166,7 @@ class Completions(SyncAPIResource):
                     "max_completion_tokens": max_completion_tokens,
                     "max_tokens": max_tokens,
                     "metadata": metadata,
+                    "modalities": modalities,
                     "n": n,
                     "parallel_tool_calls": parallel_tool_calls,
                     "presence_penalty": presence_penalty,
@@ -198,6 +204,7 @@ class Completions(SyncAPIResource):
         *,
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
@@ -207,6 +214,7 @@ class Completions(SyncAPIResource):
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -259,6 +267,7 @@ class Completions(SyncAPIResource):
             self._client.chat.completions.create,
             messages=messages,
             model=model,
+            audio=audio,
             stream=True,
             response_format=_type_to_response_format(response_format),
             frequency_penalty=frequency_penalty,
@@ -269,6 +278,7 @@ class Completions(SyncAPIResource):
             max_completion_tokens=max_completion_tokens,
             max_tokens=max_tokens,
             metadata=metadata,
+            modalities=modalities,
             n=n,
             parallel_tool_calls=parallel_tool_calls,
             presence_penalty=presence_penalty,
@@ -320,6 +330,7 @@ class AsyncCompletions(AsyncAPIResource):
         *,
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
@@ -329,6 +340,7 @@ class AsyncCompletions(AsyncAPIResource):
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -412,6 +424,7 @@ class AsyncCompletions(AsyncAPIResource):
                 {
                     "messages": messages,
                     "model": model,
+                    "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,
                     "functions": functions,
@@ -420,6 +433,7 @@ class AsyncCompletions(AsyncAPIResource):
                     "max_completion_tokens": max_completion_tokens,
                     "max_tokens": max_tokens,
                     "metadata": metadata,
+                    "modalities": modalities,
                     "n": n,
                     "parallel_tool_calls": parallel_tool_calls,
                     "presence_penalty": presence_penalty,
@@ -457,6 +471,7 @@ class AsyncCompletions(AsyncAPIResource):
         *,
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
@@ -466,6 +481,7 @@ class AsyncCompletions(AsyncAPIResource):
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -519,6 +535,7 @@ class AsyncCompletions(AsyncAPIResource):
         api_request = self._client.chat.completions.create(
             messages=messages,
             model=model,
+            audio=audio,
             stream=True,
             response_format=_type_to_response_format(response_format),
             frequency_penalty=frequency_penalty,
@@ -529,6 +546,7 @@ class AsyncCompletions(AsyncAPIResource):
             max_completion_tokens=max_completion_tokens,
             max_tokens=max_tokens,
             metadata=metadata,
+            modalities=modalities,
             n=n,
             parallel_tool_calls=parallel_tool_calls,
             presence_penalty=presence_penalty,
src/openai/resources/chat/completions.py
@@ -20,12 +20,17 @@ from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ..._streaming import Stream, AsyncStream
-from ...types.chat import completion_create_params
+from ...types.chat import (
+    ChatCompletionAudioParam,
+    completion_create_params,
+)
 from ..._base_client import make_request_options
 from ...types.chat_model import ChatModel
 from ...types.chat.chat_completion import ChatCompletion
 from ...types.chat.chat_completion_chunk import ChatCompletionChunk
+from ...types.chat.chat_completion_modality import ChatCompletionModality
 from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam
+from ...types.chat.chat_completion_audio_param import ChatCompletionAudioParam
 from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam
 from ...types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
@@ -59,6 +64,7 @@ class Completions(SyncAPIResource):
         *,
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -67,6 +73,7 @@ class Completions(SyncAPIResource):
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -90,8 +97,12 @@ class Completions(SyncAPIResource):
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        """
-        Creates a model response for the given chat conversation.
+        """Creates a model response for the given chat conversation.
+
+        Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
 
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
@@ -105,6 +116,10 @@ class Completions(SyncAPIResource):
               [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
               table for details on which models work with the Chat API.
 
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
           frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
@@ -152,7 +167,18 @@ class Completions(SyncAPIResource):
               [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Developer-defined tags and values used for filtering completions in the
-              [dashboard](https://platform.openai.com/completions).
+              [dashboard](https://platform.openai.com/chat-completions).
+
+          modalities: Output types that you would like the model to generate for this request. Most
+              models are capable of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -213,8 +239,9 @@ class Completions(SyncAPIResource):
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
-          store: Whether or not to store the output of this completion request for traffic
-              logging in the [dashboard](https://platform.openai.com/completions).
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
 
           stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
               sent as data-only
@@ -276,6 +303,7 @@ class Completions(SyncAPIResource):
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
         stream: Literal[True],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -284,6 +312,7 @@ class Completions(SyncAPIResource):
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -306,8 +335,12 @@ class Completions(SyncAPIResource):
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Stream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
+        """Creates a model response for the given chat conversation.
+
+        Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
 
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
@@ -328,6 +361,10 @@ class Completions(SyncAPIResource):
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
           frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
@@ -375,7 +412,18 @@ class Completions(SyncAPIResource):
               [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Developer-defined tags and values used for filtering completions in the
-              [dashboard](https://platform.openai.com/completions).
+              [dashboard](https://platform.openai.com/chat-completions).
+
+          modalities: Output types that you would like the model to generate for this request. Most
+              models are capable of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -436,8 +484,9 @@ class Completions(SyncAPIResource):
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
-          store: Whether or not to store the output of this completion request for traffic
-              logging in the [dashboard](https://platform.openai.com/completions).
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
 
@@ -492,6 +541,7 @@ class Completions(SyncAPIResource):
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
         stream: bool,
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -500,6 +550,7 @@ class Completions(SyncAPIResource):
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -522,8 +573,12 @@ class Completions(SyncAPIResource):
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
+        """Creates a model response for the given chat conversation.
+
+        Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
 
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
@@ -544,6 +599,10 @@ class Completions(SyncAPIResource):
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
           frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
@@ -591,7 +650,18 @@ class Completions(SyncAPIResource):
               [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Developer-defined tags and values used for filtering completions in the
-              [dashboard](https://platform.openai.com/completions).
+              [dashboard](https://platform.openai.com/chat-completions).
+
+          modalities: Output types that you would like the model to generate for this request. Most
+              models are capable of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -652,8 +722,9 @@ class Completions(SyncAPIResource):
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
-          store: Whether or not to store the output of this completion request for traffic
-              logging in the [dashboard](https://platform.openai.com/completions).
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
 
@@ -707,6 +778,7 @@ class Completions(SyncAPIResource):
         *,
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -715,6 +787,7 @@ class Completions(SyncAPIResource):
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -745,6 +818,7 @@ class Completions(SyncAPIResource):
                 {
                     "messages": messages,
                     "model": model,
+                    "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,
                     "functions": functions,
@@ -753,6 +827,7 @@ class Completions(SyncAPIResource):
                     "max_completion_tokens": max_completion_tokens,
                     "max_tokens": max_tokens,
                     "metadata": metadata,
+                    "modalities": modalities,
                     "n": n,
                     "parallel_tool_calls": parallel_tool_calls,
                     "presence_penalty": presence_penalty,
@@ -807,6 +882,7 @@ class AsyncCompletions(AsyncAPIResource):
         *,
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -815,6 +891,7 @@ class AsyncCompletions(AsyncAPIResource):
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -838,8 +915,12 @@ class AsyncCompletions(AsyncAPIResource):
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        """
-        Creates a model response for the given chat conversation.
+        """Creates a model response for the given chat conversation.
+
+        Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
 
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
@@ -853,6 +934,10 @@ class AsyncCompletions(AsyncAPIResource):
               [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
               table for details on which models work with the Chat API.
 
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
           frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
@@ -900,7 +985,18 @@ class AsyncCompletions(AsyncAPIResource):
               [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Developer-defined tags and values used for filtering completions in the
-              [dashboard](https://platform.openai.com/completions).
+              [dashboard](https://platform.openai.com/chat-completions).
+
+          modalities: Output types that you would like the model to generate for this request. Most
+              models are capable of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -961,8 +1057,9 @@ class AsyncCompletions(AsyncAPIResource):
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
-          store: Whether or not to store the output of this completion request for traffic
-              logging in the [dashboard](https://platform.openai.com/completions).
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
 
           stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
               sent as data-only
@@ -1024,6 +1121,7 @@ class AsyncCompletions(AsyncAPIResource):
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
         stream: Literal[True],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -1032,6 +1130,7 @@ class AsyncCompletions(AsyncAPIResource):
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1054,8 +1153,12 @@ class AsyncCompletions(AsyncAPIResource):
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> AsyncStream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
+        """Creates a model response for the given chat conversation.
+
+        Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
 
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
@@ -1076,6 +1179,10 @@ class AsyncCompletions(AsyncAPIResource):
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
           frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
@@ -1123,7 +1230,18 @@ class AsyncCompletions(AsyncAPIResource):
               [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Developer-defined tags and values used for filtering completions in the
-              [dashboard](https://platform.openai.com/completions).
+              [dashboard](https://platform.openai.com/chat-completions).
+
+          modalities: Output types that you would like the model to generate for this request. Most
+              models are capable of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -1184,8 +1302,9 @@ class AsyncCompletions(AsyncAPIResource):
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
-          store: Whether or not to store the output of this completion request for traffic
-              logging in the [dashboard](https://platform.openai.com/completions).
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
 
@@ -1240,6 +1359,7 @@ class AsyncCompletions(AsyncAPIResource):
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
         stream: bool,
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -1248,6 +1368,7 @@ class AsyncCompletions(AsyncAPIResource):
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1270,8 +1391,12 @@ class AsyncCompletions(AsyncAPIResource):
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
+        """Creates a model response for the given chat conversation.
+
+        Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
 
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
@@ -1292,6 +1417,10 @@ class AsyncCompletions(AsyncAPIResource):
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
           frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
@@ -1339,7 +1468,18 @@ class AsyncCompletions(AsyncAPIResource):
               [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Developer-defined tags and values used for filtering completions in the
-              [dashboard](https://platform.openai.com/completions).
+              [dashboard](https://platform.openai.com/chat-completions).
+
+          modalities: Output types that you would like the model to generate for this request. Most
+              models are capable of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -1400,8 +1540,9 @@ class AsyncCompletions(AsyncAPIResource):
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
-          store: Whether or not to store the output of this completion request for traffic
-              logging in the [dashboard](https://platform.openai.com/completions).
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
 
@@ -1455,6 +1596,7 @@ class AsyncCompletions(AsyncAPIResource):
         *,
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -1463,6 +1605,7 @@ class AsyncCompletions(AsyncAPIResource):
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1493,6 +1636,7 @@ class AsyncCompletions(AsyncAPIResource):
                 {
                     "messages": messages,
                     "model": model,
+                    "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,
                     "functions": functions,
@@ -1501,6 +1645,7 @@ class AsyncCompletions(AsyncAPIResource):
                     "max_completion_tokens": max_completion_tokens,
                     "max_tokens": max_tokens,
                     "metadata": metadata,
+                    "modalities": modalities,
                     "n": n,
                     "parallel_tool_calls": parallel_tool_calls,
                     "presence_penalty": presence_penalty,
src/openai/types/beta/assistant_stream_event.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Union
+from typing import Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from .thread import Thread
@@ -51,6 +51,9 @@ class ThreadCreated(BaseModel):
 
     event: Literal["thread.created"]
 
+    enabled: Optional[bool] = None
+    """Whether to enable input audio transcription."""
+
 
 class ThreadRunCreated(BaseModel):
     data: Run
src/openai/types/chat/__init__.py
@@ -4,6 +4,7 @@ from __future__ import annotations
 
 from .chat_completion import ChatCompletion as ChatCompletion
 from .chat_completion_role import ChatCompletionRole as ChatCompletionRole
+from .chat_completion_audio import ChatCompletionAudio as ChatCompletionAudio
 from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
 from .parsed_chat_completion import (
     ParsedChoice as ParsedChoice,
@@ -11,12 +12,14 @@ from .parsed_chat_completion import (
     ParsedChatCompletionMessage as ParsedChatCompletionMessage,
 )
 from .chat_completion_message import ChatCompletionMessage as ChatCompletionMessage
+from .chat_completion_modality import ChatCompletionModality as ChatCompletionModality
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
 from .parsed_function_tool_call import (
     ParsedFunction as ParsedFunction,
     ParsedFunctionToolCall as ParsedFunctionToolCall,
 )
 from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletionToolParam
+from .chat_completion_audio_param import ChatCompletionAudioParam as ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam as ChatCompletionMessageParam
 from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
 from .chat_completion_message_tool_call import ChatCompletionMessageToolCall as ChatCompletionMessageToolCall
@@ -52,3 +55,6 @@ from .chat_completion_content_part_refusal_param import (
 from .chat_completion_function_call_option_param import (
     ChatCompletionFunctionCallOptionParam as ChatCompletionFunctionCallOptionParam,
 )
+from .chat_completion_content_part_input_audio_param import (
+    ChatCompletionContentPartInputAudioParam as ChatCompletionContentPartInputAudioParam,
+)
src/openai/types/chat/chat_completion_assistant_message_param.py
@@ -9,7 +9,13 @@ from .chat_completion_content_part_text_param import ChatCompletionContentPartTe
 from .chat_completion_message_tool_call_param import ChatCompletionMessageToolCallParam
 from .chat_completion_content_part_refusal_param import ChatCompletionContentPartRefusalParam
 
-__all__ = ["ChatCompletionAssistantMessageParam", "ContentArrayOfContentPart", "FunctionCall"]
+__all__ = ["ChatCompletionAssistantMessageParam", "Audio", "ContentArrayOfContentPart", "FunctionCall"]
+
+
+class Audio(TypedDict, total=False):
+    id: Required[str]
+    """Unique identifier for a previous audio response from the model."""
+
 
 ContentArrayOfContentPart: TypeAlias = Union[ChatCompletionContentPartTextParam, ChatCompletionContentPartRefusalParam]
 
@@ -31,6 +37,12 @@ class ChatCompletionAssistantMessageParam(TypedDict, total=False):
     role: Required[Literal["assistant"]]
     """The role of the messages author, in this case `assistant`."""
 
+    audio: Optional[Audio]
+    """Data about a previous audio response from the model.
+
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
     content: Union[str, Iterable[ContentArrayOfContentPart], None]
     """The contents of the assistant message.
 
src/openai/types/chat/chat_completion_audio.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionAudio"]
+
+
+class ChatCompletionAudio(BaseModel):
+    id: str
+    """Unique identifier for this audio response."""
+
+    data: str
+    """
+    Base64 encoded audio bytes generated by the model, in the format specified in
+    the request.
+    """
+
+    expires_at: int
+    """
+    The Unix timestamp (in seconds) for when this audio response will no longer be
+    accessible on the server for use in multi-turn conversations.
+    """
+
+    transcript: str
+    """Transcript of the audio generated by the model."""
src/openai/types/chat/chat_completion_audio_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionAudioParam"]
+
+
+class ChatCompletionAudioParam(TypedDict, total=False):
+    format: Required[Literal["wav", "mp3", "flac", "opus", "pcm16"]]
+    """Specifies the output audio format.
+
+    Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.
+    """
+
+    voice: Required[Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]]
+    """Specifies the voice type.
+
+    Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
+    """
src/openai/types/chat/chat_completion_content_part_input_audio_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionContentPartInputAudioParam", "InputAudio"]
+
+
+class InputAudio(TypedDict, total=False):
+    data: Required[str]
+    """Base64 encoded audio data."""
+
+    format: Required[Literal["wav", "mp3"]]
+    """The format of the encoded audio data. Currently supports "wav" and "mp3"."""
+
+
+class ChatCompletionContentPartInputAudioParam(TypedDict, total=False):
+    input_audio: Required[InputAudio]
+
+    type: Required[Literal["input_audio"]]
+    """The type of the content part. Always `input_audio`."""
src/openai/types/chat/chat_completion_content_part_param.py
@@ -7,9 +7,10 @@ from typing_extensions import TypeAlias
 
 from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
 from .chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
+from .chat_completion_content_part_input_audio_param import ChatCompletionContentPartInputAudioParam
 
 __all__ = ["ChatCompletionContentPartParam"]
 
 ChatCompletionContentPartParam: TypeAlias = Union[
-    ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam
+    ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam, ChatCompletionContentPartInputAudioParam
 ]
src/openai/types/chat/chat_completion_message.py
@@ -4,6 +4,7 @@ from typing import List, Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from .chat_completion_audio import ChatCompletionAudio
 from .chat_completion_message_tool_call import ChatCompletionMessageToolCall
 
 __all__ = ["ChatCompletionMessage", "FunctionCall"]
@@ -32,6 +33,13 @@ class ChatCompletionMessage(BaseModel):
     role: Literal["assistant"]
     """The role of the author of this message."""
 
+    audio: Optional[ChatCompletionAudio] = None
+    """
+    If the audio output modality is requested, this object contains data about the
+    audio response from the model.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
     function_call: Optional[FunctionCall] = None
     """Deprecated and replaced by `tool_calls`.
 
src/openai/types/chat/chat_completion_modality.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatCompletionModality"]
+
+ChatCompletionModality: TypeAlias = Literal["text", "audio"]
src/openai/types/chat/completion_create_params.py
@@ -6,7 +6,9 @@ from typing import Dict, List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from ..chat_model import ChatModel
+from .chat_completion_modality import ChatCompletionModality
 from .chat_completion_tool_param import ChatCompletionToolParam
+from .chat_completion_audio_param import ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam
 from ..shared_params.function_parameters import FunctionParameters
 from ..shared_params.response_format_text import ResponseFormatText
@@ -45,6 +47,13 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     table for details on which models work with the Chat API.
     """
 
+    audio: Optional[ChatCompletionAudioParam]
+    """Parameters for audio output.
+
+    Required when audio output is requested with `modalities: ["audio"]`.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
     frequency_penalty: Optional[float]
     """Number between -2.0 and 2.0.
 
@@ -112,7 +121,21 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     metadata: Optional[Dict[str, str]]
     """
     Developer-defined tags and values used for filtering completions in the
-    [dashboard](https://platform.openai.com/completions).
+    [dashboard](https://platform.openai.com/chat-completions).
+    """
+
+    modalities: Optional[List[ChatCompletionModality]]
+    """
+    Output types that you would like the model to generate for this request. Most
+    models are capable of generating text, which is the default:
+
+    `["text"]`
+
+    The `gpt-4o-audio-preview` model can also be used to
+    [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+    this model generate both text and audio responses, you can use:
+
+    `["text", "audio"]`
     """
 
     n: Optional[int]
@@ -195,8 +218,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     store: Optional[bool]
     """
-    Whether or not to store the output of this completion request for traffic
-    logging in the [dashboard](https://platform.openai.com/completions).
+    Whether or not to store the output of this chat completion request for use in
+    our [model distillation](https://platform.openai.com/docs/guides/distillation)
+    or [evals](https://platform.openai.com/docs/guides/evals) products.
     """
 
     stream_options: Optional[ChatCompletionStreamOptionsParam]
src/openai/types/chat_model.py
@@ -12,7 +12,10 @@ ChatModel: TypeAlias = Literal[
     "gpt-4o",
     "gpt-4o-2024-08-06",
     "gpt-4o-2024-05-13",
+    "gpt-4o-realtime-preview",
     "gpt-4o-realtime-preview-2024-10-01",
+    "gpt-4o-audio-preview",
+    "gpt-4o-audio-preview-2024-10-01",
     "chatgpt-4o-latest",
     "gpt-4o-mini",
     "gpt-4o-mini-2024-07-18",
tests/api_resources/chat/test_completions.py
@@ -44,6 +44,10 @@ class TestCompletions:
                 }
             ],
             model="gpt-4o",
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -58,6 +62,7 @@ class TestCompletions:
             max_completion_tokens=0,
             max_tokens=0,
             metadata={"foo": "string"},
+            modalities=["text", "audio"],
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
@@ -167,6 +172,10 @@ class TestCompletions:
             ],
             model="gpt-4o",
             stream=True,
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -181,6 +190,7 @@ class TestCompletions:
             max_completion_tokens=0,
             max_tokens=0,
             metadata={"foo": "string"},
+            modalities=["text", "audio"],
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
@@ -309,6 +319,10 @@ class TestAsyncCompletions:
                 }
             ],
             model="gpt-4o",
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -323,6 +337,7 @@ class TestAsyncCompletions:
             max_completion_tokens=0,
             max_tokens=0,
             metadata={"foo": "string"},
+            modalities=["text", "audio"],
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
@@ -432,6 +447,10 @@ class TestAsyncCompletions:
             ],
             model="gpt-4o",
             stream=True,
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -446,6 +465,7 @@ class TestAsyncCompletions:
             max_completion_tokens=0,
             max_tokens=0,
             metadata={"foo": "string"},
+            modalities=["text", "audio"],
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
tests/lib/chat/test_completions.py
@@ -58,6 +58,7 @@ ParsedChatCompletion[NoneType](
             index=0,
             logprobs=None,
             message=ParsedChatCompletionMessage[NoneType](
+                audio=None,
                 content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I
 recommend checking a reliable weather website or app like the Weather Channel or a local news station.",
                 function_call=None,
@@ -120,6 +121,7 @@ ParsedChatCompletion[Location](
             index=0,
             logprobs=None,
             message=ParsedChatCompletionMessage[Location](
+                audio=None,
                 content='{"city":"San Francisco","temperature":65,"units":"f"}',
                 function_call=None,
                 parsed=Location(city='San Francisco', temperature=65.0, units='f'),
@@ -183,6 +185,7 @@ ParsedChatCompletion[Location](
             index=0,
             logprobs=None,
             message=ParsedChatCompletionMessage[Location](
+                audio=None,
                 content='{"city":"San Francisco","temperature":65,"units":"f"}',
                 function_call=None,
                 parsed=Location(city='San Francisco', temperature=65.0, units='f'),
@@ -248,6 +251,7 @@ ParsedChoice[ColorDetection](
     index=0,
     logprobs=None,
     message=ParsedChatCompletionMessage[ColorDetection](
+        audio=None,
         content='{"color":"red","hex_color_code":"#FF0000"}',
         function_call=None,
         parsed=ColorDetection(color=<Color.RED: 'red'>, hex_color_code='#FF0000'),
@@ -296,6 +300,7 @@ def test_parse_pydantic_model_multiple_choices(
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[Location](
+            audio=None,
             content='{"city":"San Francisco","temperature":64,"units":"f"}',
             function_call=None,
             parsed=Location(city='San Francisco', temperature=64.0, units='f'),
@@ -309,6 +314,7 @@ def test_parse_pydantic_model_multiple_choices(
         index=1,
         logprobs=None,
         message=ParsedChatCompletionMessage[Location](
+            audio=None,
             content='{"city":"San Francisco","temperature":65,"units":"f"}',
             function_call=None,
             parsed=Location(city='San Francisco', temperature=65.0, units='f'),
@@ -322,6 +328,7 @@ def test_parse_pydantic_model_multiple_choices(
         index=2,
         logprobs=None,
         message=ParsedChatCompletionMessage[Location](
+            audio=None,
             content='{"city":"San Francisco","temperature":63.0,"units":"f"}',
             function_call=None,
             parsed=Location(city='San Francisco', temperature=63.0, units='f'),
@@ -371,6 +378,7 @@ ParsedChatCompletion[CalendarEvent](
             index=0,
             logprobs=None,
             message=ParsedChatCompletionMessage[CalendarEvent](
+                audio=None,
                 content='{"name":"Science Fair","date":"Friday","participants":["Alice","Bob"]}',
                 function_call=None,
                 parsed=CalendarEvent(name='Science Fair', date='Friday', participants=['Alice', 'Bob']),
@@ -426,6 +434,7 @@ ParsedChoice[Query](
     index=0,
     logprobs=None,
     message=ParsedChatCompletionMessage[Query](
+        audio=None,
         content=None,
         function_call=None,
         parsed=None,
@@ -536,6 +545,7 @@ def test_parse_pydantic_model_refusal(client: OpenAI, respx_mock: MockRouter, mo
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[Location](
+            audio=None,
             content=None,
             function_call=None,
             parsed=None,
@@ -584,6 +594,7 @@ def test_parse_pydantic_tool(client: OpenAI, respx_mock: MockRouter, monkeypatch
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[NoneType](
+            audio=None,
             content=None,
             function_call=None,
             parsed=None,
@@ -655,6 +666,7 @@ def test_parse_multiple_pydantic_tools(client: OpenAI, respx_mock: MockRouter, m
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[NoneType](
+            audio=None,
             content=None,
             function_call=None,
             parsed=None,
@@ -735,6 +747,7 @@ def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch:
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[NoneType](
+            audio=None,
             content=None,
             function_call=None,
             parsed=None,
@@ -816,6 +829,7 @@ ParsedChatCompletion[Location](
             index=0,
             logprobs=None,
             message=ParsedChatCompletionMessage[Location](
+                audio=None,
                 content='{"city":"San Francisco","temperature":58,"units":"f"}',
                 function_call=None,
                 parsed=Location(city='San Francisco', temperature=58.0, units='f'),
@@ -885,6 +899,7 @@ ParsedChatCompletion[Location](
             index=0,
             logprobs=None,
             message=ParsedChatCompletionMessage[Location](
+                audio=None,
                 content='{"city":"San Francisco","temperature":65,"units":"f"}',
                 function_call=None,
                 parsed=Location(city='San Francisco', temperature=65.0, units='f'),
tests/lib/chat/test_completions_streaming.py
@@ -61,6 +61,7 @@ def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pyte
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[NoneType](
+            audio=None,
             content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I 
 recommend checking a reliable weather website or a weather app.",
             function_call=None,
@@ -138,6 +139,7 @@ ParsedChatCompletion[Location](
             index=0,
             logprobs=None,
             message=ParsedChatCompletionMessage[Location](
+                audio=None,
                 content='{"city":"San Francisco","temperature":61,"units":"f"}',
                 function_call=None,
                 parsed=Location(city='San Francisco', temperature=61.0, units='f'),
@@ -309,6 +311,7 @@ def test_parse_pydantic_model_multiple_choices(
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[Location](
+            audio=None,
             content='{"city":"San Francisco","temperature":65,"units":"f"}',
             function_call=None,
             parsed=Location(city='San Francisco', temperature=65.0, units='f'),
@@ -322,6 +325,7 @@ def test_parse_pydantic_model_multiple_choices(
         index=1,
         logprobs=None,
         message=ParsedChatCompletionMessage[Location](
+            audio=None,
             content='{"city":"San Francisco","temperature":61,"units":"f"}',
             function_call=None,
             parsed=Location(city='San Francisco', temperature=61.0, units='f'),
@@ -335,6 +339,7 @@ def test_parse_pydantic_model_multiple_choices(
         index=2,
         logprobs=None,
         message=ParsedChatCompletionMessage[Location](
+            audio=None,
             content='{"city":"San Francisco","temperature":59,"units":"f"}',
             function_call=None,
             parsed=Location(city='San Francisco', temperature=59.0, units='f'),
@@ -409,6 +414,7 @@ RefusalDoneEvent(refusal="I'm sorry, I can't assist with that request.", type='r
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[Location](
+            audio=None,
             content=None,
             function_call=None,
             parsed=None,
@@ -482,6 +488,7 @@ def test_content_logprobs_events(client: OpenAI, respx_mock: MockRouter, monkeyp
             refusal=None
         ),
         message=ParsedChatCompletionMessage[NoneType](
+            audio=None,
             content='Foo!',
             function_call=None,
             parsed=None,
@@ -592,6 +599,7 @@ def test_refusal_logprobs_events(client: OpenAI, respx_mock: MockRouter, monkeyp
             ]
         ),
         message=ParsedChatCompletionMessage[Location](
+            audio=None,
             content=None,
             function_call=None,
             parsed=None,
@@ -637,6 +645,7 @@ def test_parse_pydantic_tool(client: OpenAI, respx_mock: MockRouter, monkeypatch
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[object](
+            audio=None,
             content=None,
             function_call=None,
             parsed=None,
@@ -668,6 +677,7 @@ def test_parse_pydantic_tool(client: OpenAI, respx_mock: MockRouter, monkeypatch
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[NoneType](
+            audio=None,
             content=None,
             function_call=None,
             parsed=None,
@@ -738,6 +748,7 @@ def test_parse_multiple_pydantic_tools(client: OpenAI, respx_mock: MockRouter, m
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[object](
+            audio=None,
             content=None,
             function_call=None,
             parsed=None,
@@ -845,6 +856,7 @@ def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch:
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[object](
+            audio=None,
             content=None,
             function_call=None,
             parsed=None,
@@ -895,6 +907,7 @@ def test_non_pydantic_response_format(client: OpenAI, respx_mock: MockRouter, mo
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[NoneType](
+            audio=None,
             content='\\n  {\\n    "location": "San Francisco, CA",\\n    "weather": {\\n      "temperature": "18°C",\\n      
 "condition": "Partly Cloudy",\\n      "humidity": "72%",\\n      "windSpeed": "15 km/h",\\n      "windDirection": "NW"\\n   
 },\\n    "forecast": [\\n      {\\n        "day": "Monday",\\n        "high": "20°C",\\n        "low": "14°C",\\n        
@@ -954,6 +967,7 @@ FunctionToolCallArgumentsDoneEvent(
         index=0,
         logprobs=None,
         message=ParsedChatCompletionMessage[NoneType](
+            audio=None,
             content=None,
             function_call=None,
             parsed=None,
.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 68
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-52b934aee6468039ec7f4ce046a282b5fbce114afc708e70f17121df654f71da.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8729aaa35436531ab453224af10e67f89677db8f350f0346bb3537489edea649.yml
api.md
@@ -39,9 +39,12 @@ Types:
 from openai.types.chat import (
     ChatCompletion,
     ChatCompletionAssistantMessageParam,
+    ChatCompletionAudio,
+    ChatCompletionAudioParam,
     ChatCompletionChunk,
     ChatCompletionContentPart,
     ChatCompletionContentPartImage,
+    ChatCompletionContentPartInputAudio,
     ChatCompletionContentPartRefusal,
     ChatCompletionContentPartText,
     ChatCompletionFunctionCallOption,
@@ -49,6 +52,7 @@ from openai.types.chat import (
     ChatCompletionMessage,
     ChatCompletionMessageParam,
     ChatCompletionMessageToolCall,
+    ChatCompletionModality,
     ChatCompletionNamedToolChoice,
     ChatCompletionRole,
     ChatCompletionStreamOptions,