Commit `575ff607`

@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .realtime import (
+    Realtime,
+    AsyncRealtime,
+    RealtimeWithRawResponse,
+    AsyncRealtimeWithRawResponse,
+    RealtimeWithStreamingResponse,
+    AsyncRealtimeWithStreamingResponse,
+)
+from .sessions import (
+    Sessions,
+    AsyncSessions,
+    SessionsWithRawResponse,
+    AsyncSessionsWithRawResponse,
+    SessionsWithStreamingResponse,
+    AsyncSessionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Sessions",
+    "AsyncSessions",
+    "SessionsWithRawResponse",
+    "AsyncSessionsWithRawResponse",
+    "SessionsWithStreamingResponse",
+    "AsyncSessionsWithStreamingResponse",
+    "Realtime",
+    "AsyncRealtime",
+    "RealtimeWithRawResponse",
+    "AsyncRealtimeWithRawResponse",
+    "RealtimeWithStreamingResponse",
+    "AsyncRealtimeWithStreamingResponse",
+]

@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .sessions import (
+    Sessions,
+    AsyncSessions,
+    SessionsWithRawResponse,
+    AsyncSessionsWithRawResponse,
+    SessionsWithStreamingResponse,
+    AsyncSessionsWithStreamingResponse,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["Realtime", "AsyncRealtime"]
+
+
+class Realtime(SyncAPIResource):
+    @cached_property
+    def sessions(self) -> Sessions:
+        return Sessions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> RealtimeWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return RealtimeWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> RealtimeWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return RealtimeWithStreamingResponse(self)
+
+
+class AsyncRealtime(AsyncAPIResource):
+    @cached_property
+    def sessions(self) -> AsyncSessions:
+        return AsyncSessions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRealtimeWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncRealtimeWithStreamingResponse(self)
+
+
+class RealtimeWithRawResponse:
+    def __init__(self, realtime: Realtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> SessionsWithRawResponse:
+        return SessionsWithRawResponse(self._realtime.sessions)
+
+
+class AsyncRealtimeWithRawResponse:
+    def __init__(self, realtime: AsyncRealtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsWithRawResponse:
+        return AsyncSessionsWithRawResponse(self._realtime.sessions)
+
+
+class RealtimeWithStreamingResponse:
+    def __init__(self, realtime: Realtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> SessionsWithStreamingResponse:
+        return SessionsWithStreamingResponse(self._realtime.sessions)
+
+
+class AsyncRealtimeWithStreamingResponse:
+    def __init__(self, realtime: AsyncRealtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsWithStreamingResponse:
+        return AsyncSessionsWithStreamingResponse(self._realtime.sessions)

@@ -0,0 +1,337 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.realtime import session_create_params
+from ....types.beta.realtime.session_create_response import SessionCreateResponse
+
+__all__ = ["Sessions", "AsyncSessions"]
+
+
+class Sessions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return SessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return SessionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        model: Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ],
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+        turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionCreateResponse:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API. Can be configured with the same session parameters as the
+        `session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          model: The Realtime model used for this session.
+
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+
+          input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+              `null` to turn off once on. Input audio transcription is not native to the
+              model, since the model consumes audio directly. Transcription runs
+              asynchronously through Whisper and should be treated as rough guidance rather
+              than the representation understood by the model.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+
+          temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
+
+          tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+              a function.
+
+          tools: Tools (functions) available to the model.
+
+          turn_detection: Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+              means that the model will detect the start and end of speech based on audio
+              volume and respond at the end of user speech.
+
+          voice: The voice the model uses to respond. Voice cannot be changed during the session
+              once the model has responded with audio at least once. Current voice options are
+              `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/realtime/sessions",
+            body=maybe_transform(
+                {
+                    "model": model,
+                    "input_audio_format": input_audio_format,
+                    "input_audio_transcription": input_audio_transcription,
+                    "instructions": instructions,
+                    "max_response_output_tokens": max_response_output_tokens,
+                    "modalities": modalities,
+                    "output_audio_format": output_audio_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "turn_detection": turn_detection,
+                    "voice": voice,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+
+class AsyncSessions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncSessionsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        model: Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ],
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+        turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionCreateResponse:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API. Can be configured with the same session parameters as the
+        `session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          model: The Realtime model used for this session.
+
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+
+          input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+              `null` to turn off once on. Input audio transcription is not native to the
+              model, since the model consumes audio directly. Transcription runs
+              asynchronously through Whisper and should be treated as rough guidance rather
+              than the representation understood by the model.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+
+          temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
+
+          tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+              a function.
+
+          tools: Tools (functions) available to the model.
+
+          turn_detection: Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+              means that the model will detect the start and end of speech based on audio
+              volume and respond at the end of user speech.
+
+          voice: The voice the model uses to respond. Voice cannot be changed during the session
+              once the model has responded with audio at least once. Current voice options are
+              `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/realtime/sessions",
+            body=await async_maybe_transform(
+                {
+                    "model": model,
+                    "input_audio_format": input_audio_format,
+                    "input_audio_transcription": input_audio_transcription,
+                    "instructions": instructions,
+                    "max_response_output_tokens": max_response_output_tokens,
+                    "modalities": modalities,
+                    "output_audio_format": output_audio_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "turn_detection": turn_detection,
+                    "voice": voice,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+
+class SessionsWithRawResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            sessions.create,
+        )
+
+
+class AsyncSessionsWithRawResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            sessions.create,
+        )
+
+
+class SessionsWithStreamingResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = to_streamed_response_wrapper(
+            sessions.create,
+        )
+
+
+class AsyncSessionsWithStreamingResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = async_to_streamed_response_wrapper(
+            sessions.create,
+        )

@@ -21,6 +21,14 @@ from .threads.threads import (
     ThreadsWithStreamingResponse,
     AsyncThreadsWithStreamingResponse,
 )
+from .realtime.realtime import (
+    Realtime,
+    AsyncRealtime,
+    RealtimeWithRawResponse,
+    AsyncRealtimeWithRawResponse,
+    RealtimeWithStreamingResponse,
+    AsyncRealtimeWithStreamingResponse,
+)
 from .vector_stores.vector_stores import (
     VectorStores,
     AsyncVectorStores,
@@ -38,6 +46,10 @@ class Beta(SyncAPIResource):
     def chat(self) -> Chat:
         return Chat(self._client)
 
+    @cached_property
+    def realtime(self) -> Realtime:
+        return Realtime(self._client)
+
     @cached_property
     def vector_stores(self) -> VectorStores:
         return VectorStores(self._client)
@@ -75,6 +87,10 @@ class AsyncBeta(AsyncAPIResource):
     def chat(self) -> AsyncChat:
         return AsyncChat(self._client)
 
+    @cached_property
+    def realtime(self) -> AsyncRealtime:
+        return AsyncRealtime(self._client)
+
     @cached_property
     def vector_stores(self) -> AsyncVectorStores:
         return AsyncVectorStores(self._client)
@@ -111,6 +127,10 @@ class BetaWithRawResponse:
     def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
+    @cached_property
+    def realtime(self) -> RealtimeWithRawResponse:
+        return RealtimeWithRawResponse(self._beta.realtime)
+
     @cached_property
     def vector_stores(self) -> VectorStoresWithRawResponse:
         return VectorStoresWithRawResponse(self._beta.vector_stores)
@@ -128,6 +148,10 @@ class AsyncBetaWithRawResponse:
     def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
+    @cached_property
+    def realtime(self) -> AsyncRealtimeWithRawResponse:
+        return AsyncRealtimeWithRawResponse(self._beta.realtime)
+
     @cached_property
     def vector_stores(self) -> AsyncVectorStoresWithRawResponse:
         return AsyncVectorStoresWithRawResponse(self._beta.vector_stores)
@@ -145,6 +169,10 @@ class BetaWithStreamingResponse:
     def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
+    @cached_property
+    def realtime(self) -> RealtimeWithStreamingResponse:
+        return RealtimeWithStreamingResponse(self._beta.realtime)
+
     @cached_property
     def vector_stores(self) -> VectorStoresWithStreamingResponse:
         return VectorStoresWithStreamingResponse(self._beta.vector_stores)
@@ -162,6 +190,10 @@ class AsyncBetaWithStreamingResponse:
     def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
+    @cached_property
+    def realtime(self) -> AsyncRealtimeWithStreamingResponse:
+        return AsyncRealtimeWithStreamingResponse(self._beta.realtime)
+
     @cached_property
     def vector_stores(self) -> AsyncVectorStoresWithStreamingResponse:
         return AsyncVectorStoresWithStreamingResponse(self._beta.vector_stores)

@@ -22,6 +22,7 @@ from ..._response import to_streamed_response_wrapper, async_to_streamed_respons
 from ..._streaming import Stream, AsyncStream
 from ...types.chat import (
     ChatCompletionAudioParam,
+    ChatCompletionReasoningEffort,
     completion_create_params,
 )
 from ..._base_client import make_request_options
@@ -32,6 +33,7 @@ from ...types.chat.chat_completion_modality import ChatCompletionModality
 from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam
 from ...types.chat.chat_completion_audio_param import ChatCompletionAudioParam
 from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ...types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
 from ...types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 from ...types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
 from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
@@ -79,6 +81,7 @@ class Completions(SyncAPIResource):
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -106,6 +109,12 @@ class Completions(SyncAPIResource):
         [vision](https://platform.openai.com/docs/guides/vision), and
         [audio](https://platform.openai.com/docs/guides/audio) guides.
 
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
               [model](https://platform.openai.com/docs/models) you use, different message
@@ -126,16 +135,18 @@ class Completions(SyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
           function_call: Deprecated in favor of `tool_choice`.
 
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
               function.
 
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
               `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
@@ -197,13 +208,14 @@ class Completions(SyncAPIResource):
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+          reasoning_effort: **o1 models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
 
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-              [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+          response_format: An object specifying the format that the model must output.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
               Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -259,9 +271,8 @@ class Completions(SyncAPIResource):
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tool and instead generates a message. `auto` means the model can
@@ -322,6 +333,7 @@ class Completions(SyncAPIResource):
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -348,6 +360,12 @@ class Completions(SyncAPIResource):
         [vision](https://platform.openai.com/docs/guides/vision), and
         [audio](https://platform.openai.com/docs/guides/audio) guides.
 
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
               [model](https://platform.openai.com/docs/models) you use, different message
@@ -375,16 +393,18 @@ class Completions(SyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
           function_call: Deprecated in favor of `tool_choice`.
 
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
               function.
 
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
               `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
@@ -446,13 +466,14 @@ class Completions(SyncAPIResource):
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+          reasoning_effort: **o1 models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
 
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-              [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+          response_format: An object specifying the format that the model must output.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
               Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -501,9 +522,8 @@ class Completions(SyncAPIResource):
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tool and instead generates a message. `auto` means the model can
@@ -564,6 +584,7 @@ class Completions(SyncAPIResource):
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -590,6 +611,12 @@ class Completions(SyncAPIResource):
         [vision](https://platform.openai.com/docs/guides/vision), and
         [audio](https://platform.openai.com/docs/guides/audio) guides.
 
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
               [model](https://platform.openai.com/docs/models) you use, different message
@@ -617,16 +644,18 @@ class Completions(SyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
           function_call: Deprecated in favor of `tool_choice`.
 
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
               function.
 
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
               `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
@@ -688,13 +717,14 @@ class Completions(SyncAPIResource):
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+          reasoning_effort: **o1 models only**
 
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-              [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
               Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -743,9 +773,8 @@ class Completions(SyncAPIResource):
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tool and instead generates a message. `auto` means the model can
@@ -805,6 +834,7 @@ class Completions(SyncAPIResource):
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -846,6 +876,7 @@ class Completions(SyncAPIResource):
                     "parallel_tool_calls": parallel_tool_calls,
                     "prediction": prediction,
                     "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "seed": seed,
                     "service_tier": service_tier,
@@ -911,6 +942,7 @@ class AsyncCompletions(AsyncAPIResource):
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -938,6 +970,12 @@ class AsyncCompletions(AsyncAPIResource):
         [vision](https://platform.openai.com/docs/guides/vision), and
         [audio](https://platform.openai.com/docs/guides/audio) guides.
 
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
               [model](https://platform.openai.com/docs/models) you use, different message
@@ -958,16 +996,18 @@ class AsyncCompletions(AsyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
           function_call: Deprecated in favor of `tool_choice`.
 
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
               function.
 
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
               `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
@@ -1029,13 +1069,14 @@ class AsyncCompletions(AsyncAPIResource):
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+          reasoning_effort: **o1 models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
 
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-              [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+          response_format: An object specifying the format that the model must output.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
               Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -1091,9 +1132,8 @@ class AsyncCompletions(AsyncAPIResource):
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tool and instead generates a message. `auto` means the model can
@@ -1154,6 +1194,7 @@ class AsyncCompletions(AsyncAPIResource):
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -1180,6 +1221,12 @@ class AsyncCompletions(AsyncAPIResource):
         [vision](https://platform.openai.com/docs/guides/vision), and
         [audio](https://platform.openai.com/docs/guides/audio) guides.
 
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
               [model](https://platform.openai.com/docs/models) you use, different message
@@ -1207,16 +1254,18 @@ class AsyncCompletions(AsyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
           function_call: Deprecated in favor of `tool_choice`.
 
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
               function.
 
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
               `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
@@ -1278,13 +1327,14 @@ class AsyncCompletions(AsyncAPIResource):
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+          reasoning_effort: **o1 models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
 
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-              [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+          response_format: An object specifying the format that the model must output.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
               Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -1333,9 +1383,8 @@ class AsyncCompletions(AsyncAPIResource):
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tool and instead generates a message. `auto` means the model can
@@ -1396,6 +1445,7 @@ class AsyncCompletions(AsyncAPIResource):
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -1422,6 +1472,12 @@ class AsyncCompletions(AsyncAPIResource):
         [vision](https://platform.openai.com/docs/guides/vision), and
         [audio](https://platform.openai.com/docs/guides/audio) guides.
 
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
               [model](https://platform.openai.com/docs/models) you use, different message
@@ -1449,16 +1505,18 @@ class AsyncCompletions(AsyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
           function_call: Deprecated in favor of `tool_choice`.
 
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
               function.
 
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
               `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
@@ -1520,13 +1578,14 @@ class AsyncCompletions(AsyncAPIResource):
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+          reasoning_effort: **o1 models only**
 
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-              [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
               Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -1575,9 +1634,8 @@ class AsyncCompletions(AsyncAPIResource):
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tool and instead generates a message. `auto` means the model can
@@ -1637,6 +1695,7 @@ class AsyncCompletions(AsyncAPIResource):
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -1678,6 +1737,7 @@ class AsyncCompletions(AsyncAPIResource):
                     "parallel_tool_calls": parallel_tool_calls,
                     "prediction": prediction,
                     "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "seed": seed,
                     "service_tier": service_tier,

@@ -67,6 +67,7 @@ class Jobs(SyncAPIResource):
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
         integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        method: job_create_params.Method | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
@@ -99,17 +100,22 @@ class Jobs(SyncAPIResource):
               your file with the purpose `fine-tune`.
 
               The contents of the file should differ depending on if the model uses the
-              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
               [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+              format, or if the fine-tuning method uses the
+              [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
               format.
 
               See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
               for more details.
 
-          hyperparameters: The hyperparameters used for the fine-tuning job.
+          hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+              in favor of `method`, and should be passed in under the `method` parameter.
 
           integrations: A list of integrations to enable for your fine-tuning job.
 
+          method: The method used for fine-tuning.
+
           seed: The seed controls the reproducibility of the job. Passing in the same seed and
               job parameters should produce the same results, but may differ in rare cases. If
               a seed is not specified, one will be generated for you.
@@ -149,6 +155,7 @@ class Jobs(SyncAPIResource):
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
                     "integrations": integrations,
+                    "method": method,
                     "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
@@ -358,6 +365,7 @@ class AsyncJobs(AsyncAPIResource):
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
         integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        method: job_create_params.Method | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
@@ -390,17 +398,22 @@ class AsyncJobs(AsyncAPIResource):
               your file with the purpose `fine-tune`.
 
               The contents of the file should differ depending on if the model uses the
-              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
               [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+              format, or if the fine-tuning method uses the
+              [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
               format.
 
               See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
               for more details.
 
-          hyperparameters: The hyperparameters used for the fine-tuning job.
+          hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+              in favor of `method`, and should be passed in under the `method` parameter.
 
           integrations: A list of integrations to enable for your fine-tuning job.
 
+          method: The method used for fine-tuning.
+
           seed: The seed controls the reproducibility of the job. Passing in the same seed and
               job parameters should produce the same results, but may differ in rare cases. If
               a seed is not specified, one will be generated for you.
@@ -440,6 +453,7 @@ class AsyncJobs(AsyncAPIResource):
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
                     "integrations": integrations,
+                    "method": method,
                     "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,

@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .session_create_params import SessionCreateParams as SessionCreateParams
+from .session_create_response import SessionCreateResponse as SessionCreateResponse

@@ -0,0 +1,149 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["SessionCreateParams", "InputAudioTranscription", "Tool", "TurnDetection"]
+
+
+class SessionCreateParams(TypedDict, total=False):
+    model: Required[
+        Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+    ]
+    """The Realtime model used for this session."""
+
+    input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    input_audio_transcription: InputAudioTranscription
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through Whisper and should be treated as rough guidance rather
+    than the representation understood by the model.
+    """
+
+    instructions: str
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"]]
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: float
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: str
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Iterable[Tool]
+    """Tools (functions) available to the model."""
+
+    turn_detection: TurnDetection
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+    voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
+
+
+class InputAudioTranscription(TypedDict, total=False):
+    model: str
+    """
+    The model to use for transcription, `whisper-1` is the only currently supported
+    model.
+    """
+
+
+class Tool(TypedDict, total=False):
+    description: str
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: str
+    """The name of the function."""
+
+    parameters: object
+    """Parameters of the function in JSON Schema."""
+
+    type: Literal["function"]
+    """The type of the tool, i.e. `function`."""
+
+
+class TurnDetection(TypedDict, total=False):
+    create_response: bool
+    """Whether or not to automatically generate a response when VAD is enabled.
+
+    `true` by default.
+    """
+
+    prefix_padding_ms: int
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: int
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: float
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: str
+    """Type of turn detection, only `server_vad` is currently supported."""

@@ -0,0 +1,150 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["SessionCreateResponse", "ClientSecret", "InputAudioTranscription", "Tool", "TurnDetection"]
+
+
+class ClientSecret(BaseModel):
+    expires_at: Optional[int] = None
+    """Timestamp for when the token expires.
+
+    Currently, all tokens expire after one minute.
+    """
+
+    value: Optional[str] = None
+    """
+    Ephemeral key usable in client environments to authenticate connections to the
+    Realtime API. Use this in client-side environments rather than a standard API
+    token, which should only be used server-side.
+    """
+
+
+class InputAudioTranscription(BaseModel):
+    model: Optional[str] = None
+    """
+    The model to use for transcription, `whisper-1` is the only currently supported
+    model.
+    """
+
+
+class Tool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
+
+
+class TurnDetection(BaseModel):
+    prefix_padding_ms: Optional[int] = None
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: Optional[str] = None
+    """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class SessionCreateResponse(BaseModel):
+    client_secret: Optional[ClientSecret] = None
+    """Ephemeral key returned by the API."""
+
+    input_audio_format: Optional[str] = None
+    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    input_audio_transcription: Optional[InputAudioTranscription] = None
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through Whisper and should be treated as rough guidance rather
+    than the representation understood by the model.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Optional[str] = None
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: Optional[str] = None
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Optional[List[Tool]] = None
+    """Tools (functions) available to the model."""
+
+    turn_detection: Optional[TurnDetection] = None
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+    voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """

@@ -22,6 +22,7 @@ from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletio
 from .chat_completion_audio_param import ChatCompletionAudioParam as ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam as ChatCompletionMessageParam
 from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
+from .chat_completion_reasoning_effort import ChatCompletionReasoningEffort as ChatCompletionReasoningEffort
 from .chat_completion_message_tool_call import ChatCompletionMessageToolCall as ChatCompletionMessageToolCall
 from .chat_completion_content_part_param import ChatCompletionContentPartParam as ChatCompletionContentPartParam
 from .chat_completion_tool_message_param import ChatCompletionToolMessageParam as ChatCompletionToolMessageParam
@@ -37,6 +38,9 @@ from .chat_completion_assistant_message_param import (
 from .chat_completion_content_part_text_param import (
     ChatCompletionContentPartTextParam as ChatCompletionContentPartTextParam,
 )
+from .chat_completion_developer_message_param import (
+    ChatCompletionDeveloperMessageParam as ChatCompletionDeveloperMessageParam,
+)
 from .chat_completion_message_tool_call_param import (
     ChatCompletionMessageToolCallParam as ChatCompletionMessageToolCallParam,
 )

@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
+__all__ = ["ChatCompletionDeveloperMessageParam"]
+
+
+class ChatCompletionDeveloperMessageParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
+    """The contents of the developer message."""
+
+    role: Required[Literal["developer"]]
+    """The role of the messages author, in this case `developer`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """

@@ -10,10 +10,12 @@ from .chat_completion_user_message_param import ChatCompletionUserMessageParam
 from .chat_completion_system_message_param import ChatCompletionSystemMessageParam
 from .chat_completion_function_message_param import ChatCompletionFunctionMessageParam
 from .chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam
+from .chat_completion_developer_message_param import ChatCompletionDeveloperMessageParam
 
 __all__ = ["ChatCompletionMessageParam"]
 
 ChatCompletionMessageParam: TypeAlias = Union[
+    ChatCompletionDeveloperMessageParam,
     ChatCompletionSystemMessageParam,
     ChatCompletionUserMessageParam,
     ChatCompletionAssistantMessageParam,

@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatCompletionReasoningEffort"]
+
+ChatCompletionReasoningEffort: TypeAlias = Literal["low", "medium", "high"]

@@ -10,6 +10,7 @@ from .chat_completion_modality import ChatCompletionModality
 from .chat_completion_tool_param import ChatCompletionToolParam
 from .chat_completion_audio_param import ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam
+from .chat_completion_reasoning_effort import ChatCompletionReasoningEffort
 from ..shared_params.function_parameters import FunctionParameters
 from ..shared_params.response_format_text import ResponseFormatText
 from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
@@ -60,19 +61,21 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     Positive values penalize new tokens based on their existing frequency in the
     text so far, decreasing the model's likelihood to repeat the same line verbatim.
-
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
     """
 
     function_call: FunctionCall
     """Deprecated in favor of `tool_choice`.
 
-    Controls which (if any) function is called by the model. `none` means the model
-    will not call a function and instead generates a message. `auto` means the model
-    can pick between generating a message or calling a function. Specifying a
-    particular function via `{"name": "my_function"}` forces the model to call that
+    Controls which (if any) function is called by the model.
+
+    `none` means the model will not call a function and instead generates a message.
+
+    `auto` means the model can pick between generating a message or calling a
     function.
 
+    Specifying a particular function via `{"name": "my_function"}` forces the model
+    to call that function.
+
     `none` is the default when no functions are present. `auto` is the default if
     functions are present.
     """
@@ -164,18 +167,20 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     Positive values penalize new tokens based on whether they appear in the text so
     far, increasing the model's likelihood to talk about new topics.
+    """
+
+    reasoning_effort: ChatCompletionReasoningEffort
+    """**o1 models only**
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
     """
 
     response_format: ResponseFormat
     """An object specifying the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-    [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
     Outputs which ensures the model will match your supplied JSON schema. Learn more
     in the
@@ -237,9 +242,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """What sampling temperature to use, between 0 and 2.
 
     Higher values like 0.8 will make the output more random, while lower values like
-    0.2 will make it more focused and deterministic.
-
-    We generally recommend altering this or `top_p` but not both.
+    0.2 will make it more focused and deterministic. We generally recommend altering
+    this or `top_p` but not both.
     """
 
     tool_choice: ChatCompletionToolChoiceOptionParam

@@ -6,7 +6,16 @@ from typing_extensions import Literal
 from ..._models import BaseModel
 from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
 
-__all__ = ["FineTuningJob", "Error", "Hyperparameters"]
+__all__ = [
+    "FineTuningJob",
+    "Error",
+    "Hyperparameters",
+    "Method",
+    "MethodDpo",
+    "MethodDpoHyperparameters",
+    "MethodSupervised",
+    "MethodSupervisedHyperparameters",
+]
 
 
 class Error(BaseModel):
@@ -24,15 +33,96 @@ class Error(BaseModel):
 
 
 class Hyperparameters(BaseModel):
-    n_epochs: Union[Literal["auto"], int]
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+
+class MethodDpoHyperparameters(BaseModel):
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    beta: Union[Literal["auto"], float, None] = None
+    """The beta value for the DPO method.
+
+    A higher beta value will increase the weight of the penalty between the policy
+    and reference model.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
     """The number of epochs to train the model for.
 
-    An epoch refers to one full cycle through the training dataset. "auto" decides
-    the optimal number of epochs based on the size of the dataset. If setting the
-    number manually, we support any number between 1 and 50 epochs.
+    An epoch refers to one full cycle through the training dataset.
     """
 
 
+class MethodDpo(BaseModel):
+    hyperparameters: Optional[MethodDpoHyperparameters] = None
+    """The hyperparameters used for the fine-tuning job."""
+
+
+class MethodSupervisedHyperparameters(BaseModel):
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+
+class MethodSupervised(BaseModel):
+    hyperparameters: Optional[MethodSupervisedHyperparameters] = None
+    """The hyperparameters used for the fine-tuning job."""
+
+
+class Method(BaseModel):
+    dpo: Optional[MethodDpo] = None
+    """Configuration for the DPO fine-tuning method."""
+
+    supervised: Optional[MethodSupervised] = None
+    """Configuration for the supervised fine-tuning method."""
+
+    type: Optional[Literal["supervised", "dpo"]] = None
+    """The type of method. Is either `supervised` or `dpo`."""
+
+
 class FineTuningJob(BaseModel):
     id: str
     """The object identifier, which can be referenced in the API endpoints."""
@@ -61,8 +151,7 @@ class FineTuningJob(BaseModel):
     hyperparameters: Hyperparameters
     """The hyperparameters used for the fine-tuning job.
 
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
-    for more details.
+    This value will only be returned when running `supervised` jobs.
     """
 
     model: str
@@ -118,3 +207,6 @@ class FineTuningJob(BaseModel):
 
     integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
     """A list of integrations to enable for this fine-tuning job."""
+
+    method: Optional[Method] = None
+    """The method used for fine-tuning."""

@@ -1,5 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+import builtins
+from typing import Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
@@ -9,11 +11,22 @@ __all__ = ["FineTuningJobEvent"]
 
 class FineTuningJobEvent(BaseModel):
     id: str
+    """The object identifier."""
 
     created_at: int
+    """The Unix timestamp (in seconds) for when the fine-tuning job was created."""
 
     level: Literal["info", "warn", "error"]
+    """The log level of the event."""
 
     message: str
+    """The message of the event."""
 
     object: Literal["fine_tuning.job.event"]
+    """The object type, which is always "fine_tuning.job.event"."""
+
+    data: Optional[builtins.object] = None
+    """The data associated with the event."""
+
+    type: Optional[Literal["message", "metrics"]] = None
+    """The type of event."""

@@ -5,7 +5,17 @@ from __future__ import annotations
 from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb"]
+__all__ = [
+    "JobCreateParams",
+    "Hyperparameters",
+    "Integration",
+    "IntegrationWandb",
+    "Method",
+    "MethodDpo",
+    "MethodDpoHyperparameters",
+    "MethodSupervised",
+    "MethodSupervisedHyperparameters",
+]
 
 
 class JobCreateParams(TypedDict, total=False):
@@ -26,8 +36,10 @@ class JobCreateParams(TypedDict, total=False):
     your file with the purpose `fine-tune`.
 
     The contents of the file should differ depending on if the model uses the
-    [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+    [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
     [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+    format, or if the fine-tuning method uses the
+    [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
     format.
 
     See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
@@ -35,11 +47,17 @@ class JobCreateParams(TypedDict, total=False):
     """
 
     hyperparameters: Hyperparameters
-    """The hyperparameters used for the fine-tuning job."""
+    """
+    The hyperparameters used for the fine-tuning job. This value is now deprecated
+    in favor of `method`, and should be passed in under the `method` parameter.
+    """
 
     integrations: Optional[Iterable[Integration]]
     """A list of integrations to enable for your fine-tuning job."""
 
+    method: Method
+    """The method used for fine-tuning."""
+
     seed: Optional[int]
     """The seed controls the reproducibility of the job.
 
@@ -134,3 +152,73 @@ class Integration(TypedDict, total=False):
     can set an explicit display name for your run, add tags to your run, and set a
     default entity (team, username, etc) to be associated with your run.
     """
+
+
+class MethodDpoHyperparameters(TypedDict, total=False):
+    batch_size: Union[Literal["auto"], int]
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    beta: Union[Literal["auto"], float]
+    """The beta value for the DPO method.
+
+    A higher beta value will increase the weight of the penalty between the policy
+    and reference model.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float]
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+
+class MethodDpo(TypedDict, total=False):
+    hyperparameters: MethodDpoHyperparameters
+    """The hyperparameters used for the fine-tuning job."""
+
+
+class MethodSupervisedHyperparameters(TypedDict, total=False):
+    batch_size: Union[Literal["auto"], int]
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float]
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+
+class MethodSupervised(TypedDict, total=False):
+    hyperparameters: MethodSupervisedHyperparameters
+    """The hyperparameters used for the fine-tuning job."""
+
+
+class Method(TypedDict, total=False):
+    dpo: MethodDpo
+    """Configuration for the DPO fine-tuning method."""
+
+    supervised: MethodSupervised
+    """Configuration for the supervised fine-tuning method."""
+
+    type: Literal["supervised", "dpo"]
+    """The type of method. Is either `supervised` or `dpo`."""

@@ -5,6 +5,8 @@ from typing_extensions import Literal, TypeAlias
 __all__ = ["ChatModel"]
 
 ChatModel: TypeAlias = Literal[
+    "o1",
+    "o1-2024-12-17",
     "o1-preview",
     "o1-preview-2024-09-12",
     "o1-mini",
@@ -13,10 +15,11 @@ ChatModel: TypeAlias = Literal[
     "gpt-4o-2024-11-20",
     "gpt-4o-2024-08-06",
     "gpt-4o-2024-05-13",
-    "gpt-4o-realtime-preview",
-    "gpt-4o-realtime-preview-2024-10-01",
     "gpt-4o-audio-preview",
     "gpt-4o-audio-preview-2024-10-01",
+    "gpt-4o-audio-preview-2024-12-17",
+    "gpt-4o-mini-audio-preview",
+    "gpt-4o-mini-audio-preview-2024-12-17",
     "chatgpt-4o-latest",
     "gpt-4o-mini",
     "gpt-4o-mini-2024-07-18",

@@ -0,0 +1,1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

@@ -0,0 +1,146 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.beta.realtime import SessionCreateResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestSessions:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        session = client.beta.realtime.sessions.create(
+            model="gpt-4o-realtime-preview",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        session = client.beta.realtime.sessions.create(
+            model="gpt-4o-realtime-preview",
+            input_audio_format="pcm16",
+            input_audio_transcription={"model": "model"},
+            instructions="instructions",
+            max_response_output_tokens=0,
+            modalities=["text"],
+            output_audio_format="pcm16",
+            temperature=0,
+            tool_choice="tool_choice",
+            tools=[
+                {
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {},
+                    "type": "function",
+                }
+            ],
+            turn_detection={
+                "create_response": True,
+                "prefix_padding_ms": 0,
+                "silence_duration_ms": 0,
+                "threshold": 0,
+                "type": "type",
+            },
+            voice="alloy",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.beta.realtime.sessions.with_raw_response.create(
+            model="gpt-4o-realtime-preview",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.realtime.sessions.with_streaming_response.create(
+            model="gpt-4o-realtime-preview",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = response.parse()
+            assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncSessions:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        session = await async_client.beta.realtime.sessions.create(
+            model="gpt-4o-realtime-preview",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        session = await async_client.beta.realtime.sessions.create(
+            model="gpt-4o-realtime-preview",
+            input_audio_format="pcm16",
+            input_audio_transcription={"model": "model"},
+            instructions="instructions",
+            max_response_output_tokens=0,
+            modalities=["text"],
+            output_audio_format="pcm16",
+            temperature=0,
+            tool_choice="tool_choice",
+            tools=[
+                {
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {},
+                    "type": "function",
+                }
+            ],
+            turn_detection={
+                "create_response": True,
+                "prefix_padding_ms": 0,
+                "silence_duration_ms": 0,
+                "threshold": 0,
+                "type": "type",
+            },
+            voice="alloy",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.realtime.sessions.with_raw_response.create(
+            model="gpt-4o-realtime-preview",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.realtime.sessions.with_streaming_response.create(
+            model="gpt-4o-realtime-preview",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = await response.parse()
+            assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -26,7 +26,7 @@ class TestCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -39,8 +39,8 @@ class TestCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
-                    "name": "string",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
             model="gpt-4o",
@@ -70,6 +70,7 @@ class TestCompletions:
                 "type": "content",
             },
             presence_penalty=-2,
+            reasoning_effort="low",
             response_format={"type": "text"},
             seed=-9007199254740991,
             service_tier="auto",
@@ -102,7 +103,7 @@ class TestCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -119,7 +120,7 @@ class TestCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -138,7 +139,7 @@ class TestCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -152,8 +153,8 @@ class TestCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
-                    "name": "string",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
             model="gpt-4o",
@@ -184,6 +185,7 @@ class TestCompletions:
                 "type": "content",
             },
             presence_penalty=-2,
+            reasoning_effort="low",
             response_format={"type": "text"},
             seed=-9007199254740991,
             service_tier="auto",
@@ -215,7 +217,7 @@ class TestCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -232,7 +234,7 @@ class TestCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -273,7 +275,7 @@ class TestAsyncCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -286,8 +288,8 @@ class TestAsyncCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
-                    "name": "string",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
             model="gpt-4o",
@@ -317,6 +319,7 @@ class TestAsyncCompletions:
                 "type": "content",
             },
             presence_penalty=-2,
+            reasoning_effort="low",
             response_format={"type": "text"},
             seed=-9007199254740991,
             service_tier="auto",
@@ -349,7 +352,7 @@ class TestAsyncCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -366,7 +369,7 @@ class TestAsyncCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -385,7 +388,7 @@ class TestAsyncCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -399,8 +402,8 @@ class TestAsyncCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
-                    "name": "string",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
             model="gpt-4o",
@@ -431,6 +434,7 @@ class TestAsyncCompletions:
                 "type": "content",
             },
             presence_penalty=-2,
+            reasoning_effort="low",
             response_format={"type": "text"},
             seed=-9007199254740991,
             service_tier="auto",
@@ -462,7 +466,7 @@ class TestAsyncCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -479,7 +483,7 @@ class TestAsyncCompletions:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",

@@ -50,6 +50,24 @@ class TestJobs:
                     },
                 }
             ],
+            method={
+                "dpo": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "beta": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+                "supervised": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+                "type": "supervised",
+            },
             seed=42,
             suffix="x",
             validation_file="file-abc123",
@@ -271,6 +289,24 @@ class TestAsyncJobs:
                     },
                 }
             ],
+            method={
+                "dpo": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "beta": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+                "supervised": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+                "type": "supervised",
+            },
             seed=42,
             suffix="x",
             validation_file="file-abc123",

@@ -795,7 +795,7 @@ class TestOpenAI:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -827,7 +827,7 @@ class TestOpenAI:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -859,7 +859,7 @@ class TestOpenAI:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -891,7 +891,7 @@ class TestOpenAI:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -1663,7 +1663,7 @@ class TestAsyncOpenAI:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -1696,7 +1696,7 @@ class TestAsyncOpenAI:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -1729,7 +1729,7 @@ class TestAsyncOpenAI:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -1762,7 +1762,7 @@ class TestAsyncOpenAI:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",

@@ -1,2 +1,2 @@
-configured_endpoints: 68
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-2e0e0678be19d1118fd796af291822075e40538dba326611e177e9f3dc245a53.yml
+configured_endpoints: 69
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-779ea2754025daf5e18eb8ceb203ec321692636bc3a999338556a479178efa6c.yml

@@ -47,6 +47,7 @@ from openai.types.chat import (
     ChatCompletionContentPartInputAudio,
     ChatCompletionContentPartRefusal,
     ChatCompletionContentPartText,
+    ChatCompletionDeveloperMessageParam,
     ChatCompletionFunctionCallOption,
     ChatCompletionFunctionMessageParam,
     ChatCompletionMessage,
@@ -55,6 +56,7 @@ from openai.types.chat import (
     ChatCompletionModality,
     ChatCompletionNamedToolChoice,
     ChatCompletionPredictionContent,
+    ChatCompletionReasoningEffort,
     ChatCompletionRole,
     ChatCompletionStreamOptions,
     ChatCompletionSystemMessageParam,
@@ -235,6 +237,20 @@ Methods:
 
 # Beta
 
+## Realtime
+
+### Sessions
+
+Types:
+
+```python
+from openai.types.beta.realtime import Session, SessionCreateResponse
+```
+
+Methods:
+
+- <code title="post /realtime/sessions">client.beta.realtime.sessions.<a href="./src/openai/resources/beta/realtime/sessions.py">create</a>(\*\*<a href="src/openai/types/beta/realtime/session_create_params.py">params</a>) -> <a href="./src/openai/types/beta/realtime/session_create_response.py">SessionCreateResponse</a></code>
+
 ## VectorStores
 
 Types:

Commit 575ff607

Commit `575ff607`