Commit 575ff607
Changed files (25)
src
openai
resources
types
chat
tests
api_resources
src/openai/resources/beta/realtime/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .realtime import (
+ Realtime,
+ AsyncRealtime,
+ RealtimeWithRawResponse,
+ AsyncRealtimeWithRawResponse,
+ RealtimeWithStreamingResponse,
+ AsyncRealtimeWithStreamingResponse,
+)
+from .sessions import (
+ Sessions,
+ AsyncSessions,
+ SessionsWithRawResponse,
+ AsyncSessionsWithRawResponse,
+ SessionsWithStreamingResponse,
+ AsyncSessionsWithStreamingResponse,
+)
+
+__all__ = [
+ "Sessions",
+ "AsyncSessions",
+ "SessionsWithRawResponse",
+ "AsyncSessionsWithRawResponse",
+ "SessionsWithStreamingResponse",
+ "AsyncSessionsWithStreamingResponse",
+ "Realtime",
+ "AsyncRealtime",
+ "RealtimeWithRawResponse",
+ "AsyncRealtimeWithRawResponse",
+ "RealtimeWithStreamingResponse",
+ "AsyncRealtimeWithStreamingResponse",
+]
src/openai/resources/beta/realtime/realtime.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .sessions import (
+ Sessions,
+ AsyncSessions,
+ SessionsWithRawResponse,
+ AsyncSessionsWithRawResponse,
+ SessionsWithStreamingResponse,
+ AsyncSessionsWithStreamingResponse,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["Realtime", "AsyncRealtime"]
+
+
+class Realtime(SyncAPIResource):
+ @cached_property
+ def sessions(self) -> Sessions:
+ return Sessions(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> RealtimeWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return the
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return RealtimeWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> RealtimeWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return RealtimeWithStreamingResponse(self)
+
+
+class AsyncRealtime(AsyncAPIResource):
+ @cached_property
+ def sessions(self) -> AsyncSessions:
+ return AsyncSessions(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return the
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncRealtimeWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncRealtimeWithStreamingResponse(self)
+
+
+class RealtimeWithRawResponse:
+ def __init__(self, realtime: Realtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def sessions(self) -> SessionsWithRawResponse:
+ return SessionsWithRawResponse(self._realtime.sessions)
+
+
+class AsyncRealtimeWithRawResponse:
+ def __init__(self, realtime: AsyncRealtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def sessions(self) -> AsyncSessionsWithRawResponse:
+ return AsyncSessionsWithRawResponse(self._realtime.sessions)
+
+
+class RealtimeWithStreamingResponse:
+ def __init__(self, realtime: Realtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def sessions(self) -> SessionsWithStreamingResponse:
+ return SessionsWithStreamingResponse(self._realtime.sessions)
+
+
+class AsyncRealtimeWithStreamingResponse:
+ def __init__(self, realtime: AsyncRealtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def sessions(self) -> AsyncSessionsWithStreamingResponse:
+ return AsyncSessionsWithStreamingResponse(self._realtime.sessions)
src/openai/resources/beta/realtime/sessions.py
@@ -0,0 +1,337 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.realtime import session_create_params
+from ....types.beta.realtime.session_create_response import SessionCreateResponse
+
+__all__ = ["Sessions", "AsyncSessions"]
+
+
+class Sessions(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> SessionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return the
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return SessionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> SessionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return SessionsWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ model: Literal[
+ "gpt-4o-realtime-preview",
+ "gpt-4o-realtime-preview-2024-10-01",
+ "gpt-4o-realtime-preview-2024-12-17",
+ "gpt-4o-mini-realtime-preview",
+ "gpt-4o-mini-realtime-preview-2024-12-17",
+ ],
+ input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+ instructions: str | NotGiven = NOT_GIVEN,
+ max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+ modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+ output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: str | NotGiven = NOT_GIVEN,
+ tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+ turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SessionCreateResponse:
+ """
+ Create an ephemeral API token for use in client-side applications with the
+ Realtime API. Can be configured with the same session parameters as the
+ `session.update` client event.
+
+ It responds with a session object, plus a `client_secret` key which contains a
+ usable ephemeral API token that can be used to authenticate browser clients for
+ the Realtime API.
+
+ Args:
+ model: The Realtime model used for this session.
+
+ input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+
+ input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through Whisper and should be treated as rough guidance rather
+ than the representation understood by the model.
+
+ instructions: The default system instructions (i.e. system message) prepended to model calls.
+ This field allows the client to guide the model on desired responses. The model
+ can be instructed on response content and format, (e.g. "be extremely succinct",
+ "act friendly", "here are examples of good responses") and on audio behavior
+ (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+ instructions are not guaranteed to be followed by the model, but they provide
+ guidance to the model on the desired behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+
+ max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+ modalities: The set of modalities the model can respond with. To disable audio, set this to
+ ["text"].
+
+ output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+
+ temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
+
+ tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+ a function.
+
+ tools: Tools (functions) available to the model.
+
+ turn_detection: Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+ means that the model will detect the start and end of speech based on audio
+ volume and respond at the end of user speech.
+
+ voice: The voice the model uses to respond. Voice cannot be changed during the session
+ once the model has responded with audio at least once. Current voice options are
+ `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return self._post(
+ "/realtime/sessions",
+ body=maybe_transform(
+ {
+ "model": model,
+ "input_audio_format": input_audio_format,
+ "input_audio_transcription": input_audio_transcription,
+ "instructions": instructions,
+ "max_response_output_tokens": max_response_output_tokens,
+ "modalities": modalities,
+ "output_audio_format": output_audio_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "turn_detection": turn_detection,
+ "voice": voice,
+ },
+ session_create_params.SessionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=SessionCreateResponse,
+ )
+
+
+class AsyncSessions(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncSessionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return the
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncSessionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncSessionsWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ model: Literal[
+ "gpt-4o-realtime-preview",
+ "gpt-4o-realtime-preview-2024-10-01",
+ "gpt-4o-realtime-preview-2024-12-17",
+ "gpt-4o-mini-realtime-preview",
+ "gpt-4o-mini-realtime-preview-2024-12-17",
+ ],
+ input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+ instructions: str | NotGiven = NOT_GIVEN,
+ max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+ modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+ output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: str | NotGiven = NOT_GIVEN,
+ tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+ turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SessionCreateResponse:
+ """
+ Create an ephemeral API token for use in client-side applications with the
+ Realtime API. Can be configured with the same session parameters as the
+ `session.update` client event.
+
+ It responds with a session object, plus a `client_secret` key which contains a
+ usable ephemeral API token that can be used to authenticate browser clients for
+ the Realtime API.
+
+ Args:
+ model: The Realtime model used for this session.
+
+ input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+
+ input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through Whisper and should be treated as rough guidance rather
+ than the representation understood by the model.
+
+ instructions: The default system instructions (i.e. system message) prepended to model calls.
+ This field allows the client to guide the model on desired responses. The model
+ can be instructed on response content and format, (e.g. "be extremely succinct",
+ "act friendly", "here are examples of good responses") and on audio behavior
+ (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+ instructions are not guaranteed to be followed by the model, but they provide
+ guidance to the model on the desired behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+
+ max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+ modalities: The set of modalities the model can respond with. To disable audio, set this to
+ ["text"].
+
+ output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+
+ temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
+
+ tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+ a function.
+
+ tools: Tools (functions) available to the model.
+
+ turn_detection: Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+ means that the model will detect the start and end of speech based on audio
+ volume and respond at the end of user speech.
+
+ voice: The voice the model uses to respond. Voice cannot be changed during the session
+ once the model has responded with audio at least once. Current voice options are
+ `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+ return await self._post(
+ "/realtime/sessions",
+ body=await async_maybe_transform(
+ {
+ "model": model,
+ "input_audio_format": input_audio_format,
+ "input_audio_transcription": input_audio_transcription,
+ "instructions": instructions,
+ "max_response_output_tokens": max_response_output_tokens,
+ "modalities": modalities,
+ "output_audio_format": output_audio_format,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "turn_detection": turn_detection,
+ "voice": voice,
+ },
+ session_create_params.SessionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=SessionCreateResponse,
+ )
+
+
+class SessionsWithRawResponse:
+ def __init__(self, sessions: Sessions) -> None:
+ self._sessions = sessions
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ sessions.create,
+ )
+
+
+class AsyncSessionsWithRawResponse:
+ def __init__(self, sessions: AsyncSessions) -> None:
+ self._sessions = sessions
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ sessions.create,
+ )
+
+
+class SessionsWithStreamingResponse:
+ def __init__(self, sessions: Sessions) -> None:
+ self._sessions = sessions
+
+ self.create = to_streamed_response_wrapper(
+ sessions.create,
+ )
+
+
+class AsyncSessionsWithStreamingResponse:
+ def __init__(self, sessions: AsyncSessions) -> None:
+ self._sessions = sessions
+
+ self.create = async_to_streamed_response_wrapper(
+ sessions.create,
+ )
src/openai/resources/beta/beta.py
@@ -21,6 +21,14 @@ from .threads.threads import (
ThreadsWithStreamingResponse,
AsyncThreadsWithStreamingResponse,
)
+from .realtime.realtime import (
+ Realtime,
+ AsyncRealtime,
+ RealtimeWithRawResponse,
+ AsyncRealtimeWithRawResponse,
+ RealtimeWithStreamingResponse,
+ AsyncRealtimeWithStreamingResponse,
+)
from .vector_stores.vector_stores import (
VectorStores,
AsyncVectorStores,
@@ -38,6 +46,10 @@ class Beta(SyncAPIResource):
def chat(self) -> Chat:
return Chat(self._client)
+ @cached_property
+ def realtime(self) -> Realtime:
+ return Realtime(self._client)
+
@cached_property
def vector_stores(self) -> VectorStores:
return VectorStores(self._client)
@@ -75,6 +87,10 @@ class AsyncBeta(AsyncAPIResource):
def chat(self) -> AsyncChat:
return AsyncChat(self._client)
+ @cached_property
+ def realtime(self) -> AsyncRealtime:
+ return AsyncRealtime(self._client)
+
@cached_property
def vector_stores(self) -> AsyncVectorStores:
return AsyncVectorStores(self._client)
@@ -111,6 +127,10 @@ class BetaWithRawResponse:
def __init__(self, beta: Beta) -> None:
self._beta = beta
+ @cached_property
+ def realtime(self) -> RealtimeWithRawResponse:
+ return RealtimeWithRawResponse(self._beta.realtime)
+
@cached_property
def vector_stores(self) -> VectorStoresWithRawResponse:
return VectorStoresWithRawResponse(self._beta.vector_stores)
@@ -128,6 +148,10 @@ class AsyncBetaWithRawResponse:
def __init__(self, beta: AsyncBeta) -> None:
self._beta = beta
+ @cached_property
+ def realtime(self) -> AsyncRealtimeWithRawResponse:
+ return AsyncRealtimeWithRawResponse(self._beta.realtime)
+
@cached_property
def vector_stores(self) -> AsyncVectorStoresWithRawResponse:
return AsyncVectorStoresWithRawResponse(self._beta.vector_stores)
@@ -145,6 +169,10 @@ class BetaWithStreamingResponse:
def __init__(self, beta: Beta) -> None:
self._beta = beta
+ @cached_property
+ def realtime(self) -> RealtimeWithStreamingResponse:
+ return RealtimeWithStreamingResponse(self._beta.realtime)
+
@cached_property
def vector_stores(self) -> VectorStoresWithStreamingResponse:
return VectorStoresWithStreamingResponse(self._beta.vector_stores)
@@ -162,6 +190,10 @@ class AsyncBetaWithStreamingResponse:
def __init__(self, beta: AsyncBeta) -> None:
self._beta = beta
+ @cached_property
+ def realtime(self) -> AsyncRealtimeWithStreamingResponse:
+ return AsyncRealtimeWithStreamingResponse(self._beta.realtime)
+
@cached_property
def vector_stores(self) -> AsyncVectorStoresWithStreamingResponse:
return AsyncVectorStoresWithStreamingResponse(self._beta.vector_stores)
src/openai/resources/chat/completions.py
@@ -22,6 +22,7 @@ from ..._response import to_streamed_response_wrapper, async_to_streamed_respons
from ..._streaming import Stream, AsyncStream
from ...types.chat import (
ChatCompletionAudioParam,
+ ChatCompletionReasoningEffort,
completion_create_params,
)
from ..._base_client import make_request_options
@@ -32,6 +33,7 @@ from ...types.chat.chat_completion_modality import ChatCompletionModality
from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam
from ...types.chat.chat_completion_audio_param import ChatCompletionAudioParam
from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ...types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
from ...types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
from ...types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
@@ -79,6 +81,7 @@ class Completions(SyncAPIResource):
parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -106,6 +109,12 @@ class Completions(SyncAPIResource):
[vision](https://platform.openai.com/docs/guides/vision), and
[audio](https://platform.openai.com/docs/guides/audio) guides.
+ Parameter support can differ depending on the model used to generate the
+ response, particularly for newer reasoning models. Parameters that are only
+ supported for reasoning models are noted below. For the current state of
+ unsupported parameters in reasoning models,
+ [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
Args:
messages: A list of messages comprising the conversation so far. Depending on the
[model](https://platform.openai.com/docs/models) you use, different message
@@ -126,16 +135,18 @@ class Completions(SyncAPIResource):
existing frequency in the text so far, decreasing the model's likelihood to
repeat the same line verbatim.
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
function_call: Deprecated in favor of `tool_choice`.
- Controls which (if any) function is called by the model. `none` means the model
- will not call a function and instead generates a message. `auto` means the model
- can pick between generating a message or calling a function. Specifying a
- particular function via `{"name": "my_function"}` forces the model to call that
+ Controls which (if any) function is called by the model.
+
+ `none` means the model will not call a function and instead generates a message.
+
+ `auto` means the model can pick between generating a message or calling a
function.
+ Specifying a particular function via `{"name": "my_function"}` forces the model
+ to call that function.
+
`none` is the default when no functions are present. `auto` is the default if
functions are present.
@@ -197,13 +208,14 @@ class Completions(SyncAPIResource):
whether they appear in the text so far, increasing the model's likelihood to
talk about new topics.
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+ reasoning_effort: **o1 models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
- response_format: An object specifying the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
- all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+ response_format: An object specifying the format that the model must output.
Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -259,9 +271,8 @@ class Completions(SyncAPIResource):
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
-
- We generally recommend altering this or `top_p` but not both.
+ focused and deterministic. We generally recommend altering this or `top_p` but
+ not both.
tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
not call any tool and instead generates a message. `auto` means the model can
@@ -322,6 +333,7 @@ class Completions(SyncAPIResource):
parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -348,6 +360,12 @@ class Completions(SyncAPIResource):
[vision](https://platform.openai.com/docs/guides/vision), and
[audio](https://platform.openai.com/docs/guides/audio) guides.
+ Parameter support can differ depending on the model used to generate the
+ response, particularly for newer reasoning models. Parameters that are only
+ supported for reasoning models are noted below. For the current state of
+ unsupported parameters in reasoning models,
+ [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
Args:
messages: A list of messages comprising the conversation so far. Depending on the
[model](https://platform.openai.com/docs/models) you use, different message
@@ -375,16 +393,18 @@ class Completions(SyncAPIResource):
existing frequency in the text so far, decreasing the model's likelihood to
repeat the same line verbatim.
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
function_call: Deprecated in favor of `tool_choice`.
- Controls which (if any) function is called by the model. `none` means the model
- will not call a function and instead generates a message. `auto` means the model
- can pick between generating a message or calling a function. Specifying a
- particular function via `{"name": "my_function"}` forces the model to call that
+ Controls which (if any) function is called by the model.
+
+ `none` means the model will not call a function and instead generates a message.
+
+ `auto` means the model can pick between generating a message or calling a
function.
+ Specifying a particular function via `{"name": "my_function"}` forces the model
+ to call that function.
+
`none` is the default when no functions are present. `auto` is the default if
functions are present.
@@ -446,13 +466,14 @@ class Completions(SyncAPIResource):
whether they appear in the text so far, increasing the model's likelihood to
talk about new topics.
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+ reasoning_effort: **o1 models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
- response_format: An object specifying the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
- all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+ response_format: An object specifying the format that the model must output.
Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -501,9 +522,8 @@ class Completions(SyncAPIResource):
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
-
- We generally recommend altering this or `top_p` but not both.
+ focused and deterministic. We generally recommend altering this or `top_p` but
+ not both.
tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
not call any tool and instead generates a message. `auto` means the model can
@@ -564,6 +584,7 @@ class Completions(SyncAPIResource):
parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -590,6 +611,12 @@ class Completions(SyncAPIResource):
[vision](https://platform.openai.com/docs/guides/vision), and
[audio](https://platform.openai.com/docs/guides/audio) guides.
+ Parameter support can differ depending on the model used to generate the
+ response, particularly for newer reasoning models. Parameters that are only
+ supported for reasoning models are noted below. For the current state of
+ unsupported parameters in reasoning models,
+ [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
Args:
messages: A list of messages comprising the conversation so far. Depending on the
[model](https://platform.openai.com/docs/models) you use, different message
@@ -617,16 +644,18 @@ class Completions(SyncAPIResource):
existing frequency in the text so far, decreasing the model's likelihood to
repeat the same line verbatim.
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
function_call: Deprecated in favor of `tool_choice`.
- Controls which (if any) function is called by the model. `none` means the model
- will not call a function and instead generates a message. `auto` means the model
- can pick between generating a message or calling a function. Specifying a
- particular function via `{"name": "my_function"}` forces the model to call that
+ Controls which (if any) function is called by the model.
+
+ `none` means the model will not call a function and instead generates a message.
+
+ `auto` means the model can pick between generating a message or calling a
function.
+ Specifying a particular function via `{"name": "my_function"}` forces the model
+ to call that function.
+
`none` is the default when no functions are present. `auto` is the default if
functions are present.
@@ -688,13 +717,14 @@ class Completions(SyncAPIResource):
whether they appear in the text so far, increasing the model's likelihood to
talk about new topics.
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+ reasoning_effort: **o1 models only**
- response_format: An object specifying the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
- all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: An object specifying the format that the model must output.
Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -743,9 +773,8 @@ class Completions(SyncAPIResource):
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
-
- We generally recommend altering this or `top_p` but not both.
+ focused and deterministic. We generally recommend altering this or `top_p` but
+ not both.
tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
not call any tool and instead generates a message. `auto` means the model can
@@ -805,6 +834,7 @@ class Completions(SyncAPIResource):
parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -846,6 +876,7 @@ class Completions(SyncAPIResource):
"parallel_tool_calls": parallel_tool_calls,
"prediction": prediction,
"presence_penalty": presence_penalty,
+ "reasoning_effort": reasoning_effort,
"response_format": response_format,
"seed": seed,
"service_tier": service_tier,
@@ -911,6 +942,7 @@ class AsyncCompletions(AsyncAPIResource):
parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -938,6 +970,12 @@ class AsyncCompletions(AsyncAPIResource):
[vision](https://platform.openai.com/docs/guides/vision), and
[audio](https://platform.openai.com/docs/guides/audio) guides.
+ Parameter support can differ depending on the model used to generate the
+ response, particularly for newer reasoning models. Parameters that are only
+ supported for reasoning models are noted below. For the current state of
+ unsupported parameters in reasoning models,
+ [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
Args:
messages: A list of messages comprising the conversation so far. Depending on the
[model](https://platform.openai.com/docs/models) you use, different message
@@ -958,16 +996,18 @@ class AsyncCompletions(AsyncAPIResource):
existing frequency in the text so far, decreasing the model's likelihood to
repeat the same line verbatim.
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
function_call: Deprecated in favor of `tool_choice`.
- Controls which (if any) function is called by the model. `none` means the model
- will not call a function and instead generates a message. `auto` means the model
- can pick between generating a message or calling a function. Specifying a
- particular function via `{"name": "my_function"}` forces the model to call that
+ Controls which (if any) function is called by the model.
+
+ `none` means the model will not call a function and instead generates a message.
+
+ `auto` means the model can pick between generating a message or calling a
function.
+ Specifying a particular function via `{"name": "my_function"}` forces the model
+ to call that function.
+
`none` is the default when no functions are present. `auto` is the default if
functions are present.
@@ -1029,13 +1069,14 @@ class AsyncCompletions(AsyncAPIResource):
whether they appear in the text so far, increasing the model's likelihood to
talk about new topics.
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+ reasoning_effort: **o1 models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
- response_format: An object specifying the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
- all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+ response_format: An object specifying the format that the model must output.
Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -1091,9 +1132,8 @@ class AsyncCompletions(AsyncAPIResource):
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
-
- We generally recommend altering this or `top_p` but not both.
+ focused and deterministic. We generally recommend altering this or `top_p` but
+ not both.
tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
not call any tool and instead generates a message. `auto` means the model can
@@ -1154,6 +1194,7 @@ class AsyncCompletions(AsyncAPIResource):
parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -1180,6 +1221,12 @@ class AsyncCompletions(AsyncAPIResource):
[vision](https://platform.openai.com/docs/guides/vision), and
[audio](https://platform.openai.com/docs/guides/audio) guides.
+ Parameter support can differ depending on the model used to generate the
+ response, particularly for newer reasoning models. Parameters that are only
+ supported for reasoning models are noted below. For the current state of
+ unsupported parameters in reasoning models,
+ [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
Args:
messages: A list of messages comprising the conversation so far. Depending on the
[model](https://platform.openai.com/docs/models) you use, different message
@@ -1207,16 +1254,18 @@ class AsyncCompletions(AsyncAPIResource):
existing frequency in the text so far, decreasing the model's likelihood to
repeat the same line verbatim.
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
function_call: Deprecated in favor of `tool_choice`.
- Controls which (if any) function is called by the model. `none` means the model
- will not call a function and instead generates a message. `auto` means the model
- can pick between generating a message or calling a function. Specifying a
- particular function via `{"name": "my_function"}` forces the model to call that
+ Controls which (if any) function is called by the model.
+
+ `none` means the model will not call a function and instead generates a message.
+
+ `auto` means the model can pick between generating a message or calling a
function.
+ Specifying a particular function via `{"name": "my_function"}` forces the model
+ to call that function.
+
`none` is the default when no functions are present. `auto` is the default if
functions are present.
@@ -1278,13 +1327,14 @@ class AsyncCompletions(AsyncAPIResource):
whether they appear in the text so far, increasing the model's likelihood to
talk about new topics.
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+ reasoning_effort: **o1 models only**
+
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
- response_format: An object specifying the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
- all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+ response_format: An object specifying the format that the model must output.
Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -1333,9 +1383,8 @@ class AsyncCompletions(AsyncAPIResource):
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
-
- We generally recommend altering this or `top_p` but not both.
+ focused and deterministic. We generally recommend altering this or `top_p` but
+ not both.
tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
not call any tool and instead generates a message. `auto` means the model can
@@ -1396,6 +1445,7 @@ class AsyncCompletions(AsyncAPIResource):
parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -1422,6 +1472,12 @@ class AsyncCompletions(AsyncAPIResource):
[vision](https://platform.openai.com/docs/guides/vision), and
[audio](https://platform.openai.com/docs/guides/audio) guides.
+ Parameter support can differ depending on the model used to generate the
+ response, particularly for newer reasoning models. Parameters that are only
+ supported for reasoning models are noted below. For the current state of
+ unsupported parameters in reasoning models,
+ [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
Args:
messages: A list of messages comprising the conversation so far. Depending on the
[model](https://platform.openai.com/docs/models) you use, different message
@@ -1449,16 +1505,18 @@ class AsyncCompletions(AsyncAPIResource):
existing frequency in the text so far, decreasing the model's likelihood to
repeat the same line verbatim.
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
function_call: Deprecated in favor of `tool_choice`.
- Controls which (if any) function is called by the model. `none` means the model
- will not call a function and instead generates a message. `auto` means the model
- can pick between generating a message or calling a function. Specifying a
- particular function via `{"name": "my_function"}` forces the model to call that
+ Controls which (if any) function is called by the model.
+
+ `none` means the model will not call a function and instead generates a message.
+
+ `auto` means the model can pick between generating a message or calling a
function.
+ Specifying a particular function via `{"name": "my_function"}` forces the model
+ to call that function.
+
`none` is the default when no functions are present. `auto` is the default if
functions are present.
@@ -1520,13 +1578,14 @@ class AsyncCompletions(AsyncAPIResource):
whether they appear in the text so far, increasing the model's likelihood to
talk about new topics.
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+ reasoning_effort: **o1 models only**
- response_format: An object specifying the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
- all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
+
+ response_format: An object specifying the format that the model must output.
Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -1575,9 +1634,8 @@ class AsyncCompletions(AsyncAPIResource):
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
-
- We generally recommend altering this or `top_p` but not both.
+ focused and deterministic. We generally recommend altering this or `top_p` but
+ not both.
tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
not call any tool and instead generates a message. `auto` means the model can
@@ -1637,6 +1695,7 @@ class AsyncCompletions(AsyncAPIResource):
parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -1678,6 +1737,7 @@ class AsyncCompletions(AsyncAPIResource):
"parallel_tool_calls": parallel_tool_calls,
"prediction": prediction,
"presence_penalty": presence_penalty,
+ "reasoning_effort": reasoning_effort,
"response_format": response_format,
"seed": seed,
"service_tier": service_tier,
src/openai/resources/fine_tuning/jobs/jobs.py
@@ -67,6 +67,7 @@ class Jobs(SyncAPIResource):
training_file: str,
hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+ method: job_create_params.Method | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
suffix: Optional[str] | NotGiven = NOT_GIVEN,
validation_file: Optional[str] | NotGiven = NOT_GIVEN,
@@ -99,17 +100,22 @@ class Jobs(SyncAPIResource):
your file with the purpose `fine-tune`.
The contents of the file should differ depending on if the model uses the
- [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+ [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
[completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+ format, or if the fine-tuning method uses the
+ [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
format.
See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
for more details.
- hyperparameters: The hyperparameters used for the fine-tuning job.
+ hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+ in favor of `method`, and should be passed in under the `method` parameter.
integrations: A list of integrations to enable for your fine-tuning job.
+ method: The method used for fine-tuning.
+
seed: The seed controls the reproducibility of the job. Passing in the same seed and
job parameters should produce the same results, but may differ in rare cases. If
a seed is not specified, one will be generated for you.
@@ -149,6 +155,7 @@ class Jobs(SyncAPIResource):
"training_file": training_file,
"hyperparameters": hyperparameters,
"integrations": integrations,
+ "method": method,
"seed": seed,
"suffix": suffix,
"validation_file": validation_file,
@@ -358,6 +365,7 @@ class AsyncJobs(AsyncAPIResource):
training_file: str,
hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+ method: job_create_params.Method | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
suffix: Optional[str] | NotGiven = NOT_GIVEN,
validation_file: Optional[str] | NotGiven = NOT_GIVEN,
@@ -390,17 +398,22 @@ class AsyncJobs(AsyncAPIResource):
your file with the purpose `fine-tune`.
The contents of the file should differ depending on if the model uses the
- [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+ [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
[completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+ format, or if the fine-tuning method uses the
+ [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
format.
See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
for more details.
- hyperparameters: The hyperparameters used for the fine-tuning job.
+ hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+ in favor of `method`, and should be passed in under the `method` parameter.
integrations: A list of integrations to enable for your fine-tuning job.
+ method: The method used for fine-tuning.
+
seed: The seed controls the reproducibility of the job. Passing in the same seed and
job parameters should produce the same results, but may differ in rare cases. If
a seed is not specified, one will be generated for you.
@@ -440,6 +453,7 @@ class AsyncJobs(AsyncAPIResource):
"training_file": training_file,
"hyperparameters": hyperparameters,
"integrations": integrations,
+ "method": method,
"seed": seed,
"suffix": suffix,
"validation_file": validation_file,
src/openai/types/beta/realtime/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .session_create_params import SessionCreateParams as SessionCreateParams
+from .session_create_response import SessionCreateResponse as SessionCreateResponse
src/openai/types/beta/realtime/session_create_params.py
@@ -0,0 +1,149 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["SessionCreateParams", "InputAudioTranscription", "Tool", "TurnDetection"]
+
+
+class SessionCreateParams(TypedDict, total=False):
+ model: Required[
+ Literal[
+ "gpt-4o-realtime-preview",
+ "gpt-4o-realtime-preview-2024-10-01",
+ "gpt-4o-realtime-preview-2024-12-17",
+ "gpt-4o-mini-realtime-preview",
+ "gpt-4o-mini-realtime-preview-2024-12-17",
+ ]
+ ]
+ """The Realtime model used for this session."""
+
+ input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+ """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ input_audio_transcription: InputAudioTranscription
+ """
+ Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through Whisper and should be treated as rough guidance rather
+ than the representation understood by the model.
+ """
+
+ instructions: str
+ """The default system instructions (i.e.
+
+ system message) prepended to model calls. This field allows the client to guide
+ the model on desired responses. The model can be instructed on response content
+ and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+ good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+ into your voice", "laugh frequently"). The instructions are not guaranteed to be
+ followed by the model, but they provide guidance to the model on the desired
+ behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+ """
+
+ max_response_output_tokens: Union[int, Literal["inf"]]
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+ """
+
+ modalities: List[Literal["text", "audio"]]
+ """The set of modalities the model can respond with.
+
+ To disable audio, set this to ["text"].
+ """
+
+ output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+ """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ temperature: float
+ """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+ tool_choice: str
+ """How the model chooses tools.
+
+ Options are `auto`, `none`, `required`, or specify a function.
+ """
+
+ tools: Iterable[Tool]
+ """Tools (functions) available to the model."""
+
+ turn_detection: TurnDetection
+ """Configuration for turn detection.
+
+ Can be set to `null` to turn off. Server VAD means that the model will detect
+ the start and end of speech based on audio volume and respond at the end of user
+ speech.
+ """
+
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]
+ """The voice the model uses to respond.
+
+ Voice cannot be changed during the session once the model has responded with
+ audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+ `coral`, `echo` `sage`, `shimmer` and `verse`.
+ """
+
+
+class InputAudioTranscription(TypedDict, total=False):
+ model: str
+ """
+ The model to use for transcription, `whisper-1` is the only currently supported
+ model.
+ """
+
+
+class Tool(TypedDict, total=False):
+ description: str
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: str
+ """The name of the function."""
+
+ parameters: object
+ """Parameters of the function in JSON Schema."""
+
+ type: Literal["function"]
+ """The type of the tool, i.e. `function`."""
+
+
+class TurnDetection(TypedDict, total=False):
+ create_response: bool
+ """Whether or not to automatically generate a response when VAD is enabled.
+
+ `true` by default.
+ """
+
+ prefix_padding_ms: int
+ """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+ Defaults to 300ms.
+ """
+
+ silence_duration_ms: int
+ """Duration of silence to detect speech stop (in milliseconds).
+
+ Defaults to 500ms. With shorter values the model will respond more quickly, but
+ may jump in on short pauses from the user.
+ """
+
+ threshold: float
+ """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+ A higher threshold will require louder audio to activate the model, and thus
+ might perform better in noisy environments.
+ """
+
+ type: str
+ """Type of turn detection, only `server_vad` is currently supported."""
src/openai/types/beta/realtime/session_create_response.py
@@ -0,0 +1,150 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["SessionCreateResponse", "ClientSecret", "InputAudioTranscription", "Tool", "TurnDetection"]
+
+
+class ClientSecret(BaseModel):
+ expires_at: Optional[int] = None
+ """Timestamp for when the token expires.
+
+ Currently, all tokens expire after one minute.
+ """
+
+ value: Optional[str] = None
+ """
+ Ephemeral key usable in client environments to authenticate connections to the
+ Realtime API. Use this in client-side environments rather than a standard API
+ token, which should only be used server-side.
+ """
+
+
+class InputAudioTranscription(BaseModel):
+ model: Optional[str] = None
+ """
+ The model to use for transcription, `whisper-1` is the only currently supported
+ model.
+ """
+
+
+class Tool(BaseModel):
+ description: Optional[str] = None
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: Optional[str] = None
+ """The name of the function."""
+
+ parameters: Optional[object] = None
+ """Parameters of the function in JSON Schema."""
+
+ type: Optional[Literal["function"]] = None
+ """The type of the tool, i.e. `function`."""
+
+
+class TurnDetection(BaseModel):
+ prefix_padding_ms: Optional[int] = None
+ """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+ Defaults to 300ms.
+ """
+
+ silence_duration_ms: Optional[int] = None
+ """Duration of silence to detect speech stop (in milliseconds).
+
+ Defaults to 500ms. With shorter values the model will respond more quickly, but
+ may jump in on short pauses from the user.
+ """
+
+ threshold: Optional[float] = None
+ """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+ A higher threshold will require louder audio to activate the model, and thus
+ might perform better in noisy environments.
+ """
+
+ type: Optional[str] = None
+ """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class SessionCreateResponse(BaseModel):
+ client_secret: Optional[ClientSecret] = None
+ """Ephemeral key returned by the API."""
+
+ input_audio_format: Optional[str] = None
+ """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ input_audio_transcription: Optional[InputAudioTranscription] = None
+ """
+ Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through Whisper and should be treated as rough guidance rather
+ than the representation understood by the model.
+ """
+
+ instructions: Optional[str] = None
+ """The default system instructions (i.e.
+
+ system message) prepended to model calls. This field allows the client to guide
+ the model on desired responses. The model can be instructed on response content
+ and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+ good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+ into your voice", "laugh frequently"). The instructions are not guaranteed to be
+ followed by the model, but they provide guidance to the model on the desired
+ behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+ """
+
+ max_response_output_tokens: Union[int, Literal["inf"], None] = None
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+ """
+
+ modalities: Optional[List[Literal["text", "audio"]]] = None
+ """The set of modalities the model can respond with.
+
+ To disable audio, set this to ["text"].
+ """
+
+ output_audio_format: Optional[str] = None
+ """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ temperature: Optional[float] = None
+ """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+ tool_choice: Optional[str] = None
+ """How the model chooses tools.
+
+ Options are `auto`, `none`, `required`, or specify a function.
+ """
+
+ tools: Optional[List[Tool]] = None
+ """Tools (functions) available to the model."""
+
+ turn_detection: Optional[TurnDetection] = None
+ """Configuration for turn detection.
+
+ Can be set to `null` to turn off. Server VAD means that the model will detect
+ the start and end of speech based on audio volume and respond at the end of user
+ speech.
+ """
+
+ voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+ """The voice the model uses to respond.
+
+ Voice cannot be changed during the session once the model has responded with
+ audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+ `coral`, `echo` `sage`, `shimmer` and `verse`.
+ """
src/openai/types/chat/__init__.py
@@ -22,6 +22,7 @@ from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletio
from .chat_completion_audio_param import ChatCompletionAudioParam as ChatCompletionAudioParam
from .chat_completion_message_param import ChatCompletionMessageParam as ChatCompletionMessageParam
from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
+from .chat_completion_reasoning_effort import ChatCompletionReasoningEffort as ChatCompletionReasoningEffort
from .chat_completion_message_tool_call import ChatCompletionMessageToolCall as ChatCompletionMessageToolCall
from .chat_completion_content_part_param import ChatCompletionContentPartParam as ChatCompletionContentPartParam
from .chat_completion_tool_message_param import ChatCompletionToolMessageParam as ChatCompletionToolMessageParam
@@ -37,6 +38,9 @@ from .chat_completion_assistant_message_param import (
from .chat_completion_content_part_text_param import (
ChatCompletionContentPartTextParam as ChatCompletionContentPartTextParam,
)
+from .chat_completion_developer_message_param import (
+ ChatCompletionDeveloperMessageParam as ChatCompletionDeveloperMessageParam,
+)
from .chat_completion_message_tool_call_param import (
ChatCompletionMessageToolCallParam as ChatCompletionMessageToolCallParam,
)
src/openai/types/chat/chat_completion_developer_message_param.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
+__all__ = ["ChatCompletionDeveloperMessageParam"]
+
+
+class ChatCompletionDeveloperMessageParam(TypedDict, total=False):
+ content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
+ """The contents of the developer message."""
+
+ role: Required[Literal["developer"]]
+ """The role of the messages author, in this case `developer`."""
+
+ name: str
+ """An optional name for the participant.
+
+ Provides the model information to differentiate between participants of the same
+ role.
+ """
src/openai/types/chat/chat_completion_message_param.py
@@ -10,10 +10,12 @@ from .chat_completion_user_message_param import ChatCompletionUserMessageParam
from .chat_completion_system_message_param import ChatCompletionSystemMessageParam
from .chat_completion_function_message_param import ChatCompletionFunctionMessageParam
from .chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam
+from .chat_completion_developer_message_param import ChatCompletionDeveloperMessageParam
__all__ = ["ChatCompletionMessageParam"]
ChatCompletionMessageParam: TypeAlias = Union[
+ ChatCompletionDeveloperMessageParam,
ChatCompletionSystemMessageParam,
ChatCompletionUserMessageParam,
ChatCompletionAssistantMessageParam,
src/openai/types/chat/chat_completion_reasoning_effort.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatCompletionReasoningEffort"]
+
+ChatCompletionReasoningEffort: TypeAlias = Literal["low", "medium", "high"]
src/openai/types/chat/completion_create_params.py
@@ -10,6 +10,7 @@ from .chat_completion_modality import ChatCompletionModality
from .chat_completion_tool_param import ChatCompletionToolParam
from .chat_completion_audio_param import ChatCompletionAudioParam
from .chat_completion_message_param import ChatCompletionMessageParam
+from .chat_completion_reasoning_effort import ChatCompletionReasoningEffort
from ..shared_params.function_parameters import FunctionParameters
from ..shared_params.response_format_text import ResponseFormatText
from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
@@ -60,19 +61,21 @@ class CompletionCreateParamsBase(TypedDict, total=False):
Positive values penalize new tokens based on their existing frequency in the
text so far, decreasing the model's likelihood to repeat the same line verbatim.
-
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
"""
function_call: FunctionCall
"""Deprecated in favor of `tool_choice`.
- Controls which (if any) function is called by the model. `none` means the model
- will not call a function and instead generates a message. `auto` means the model
- can pick between generating a message or calling a function. Specifying a
- particular function via `{"name": "my_function"}` forces the model to call that
+ Controls which (if any) function is called by the model.
+
+ `none` means the model will not call a function and instead generates a message.
+
+ `auto` means the model can pick between generating a message or calling a
function.
+ Specifying a particular function via `{"name": "my_function"}` forces the model
+ to call that function.
+
`none` is the default when no functions are present. `auto` is the default if
functions are present.
"""
@@ -164,18 +167,20 @@ class CompletionCreateParamsBase(TypedDict, total=False):
Positive values penalize new tokens based on whether they appear in the text so
far, increasing the model's likelihood to talk about new topics.
+ """
+
+ reasoning_effort: ChatCompletionReasoningEffort
+ """**o1 models only**
- [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+ Constrains effort on reasoning for
+ [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+ supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+ result in faster responses and fewer tokens used on reasoning in a response.
"""
response_format: ResponseFormat
"""An object specifying the format that the model must output.
- Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
- all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
Outputs which ensures the model will match your supplied JSON schema. Learn more
in the
@@ -237,9 +242,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
"""What sampling temperature to use, between 0 and 2.
Higher values like 0.8 will make the output more random, while lower values like
- 0.2 will make it more focused and deterministic.
-
- We generally recommend altering this or `top_p` but not both.
+ 0.2 will make it more focused and deterministic. We generally recommend altering
+ this or `top_p` but not both.
"""
tool_choice: ChatCompletionToolChoiceOptionParam
src/openai/types/fine_tuning/fine_tuning_job.py
@@ -6,7 +6,16 @@ from typing_extensions import Literal
from ..._models import BaseModel
from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
-__all__ = ["FineTuningJob", "Error", "Hyperparameters"]
+__all__ = [
+ "FineTuningJob",
+ "Error",
+ "Hyperparameters",
+ "Method",
+ "MethodDpo",
+ "MethodDpoHyperparameters",
+ "MethodSupervised",
+ "MethodSupervisedHyperparameters",
+]
class Error(BaseModel):
@@ -24,15 +33,96 @@ class Error(BaseModel):
class Hyperparameters(BaseModel):
- n_epochs: Union[Literal["auto"], int]
+ batch_size: Union[Literal["auto"], int, None] = None
+ """Number of examples in each batch.
+
+ A larger batch size means that model parameters are updated less frequently, but
+ with lower variance.
+ """
+
+ learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+ """Scaling factor for the learning rate.
+
+ A smaller learning rate may be useful to avoid overfitting.
+ """
+
+ n_epochs: Union[Literal["auto"], int, None] = None
+ """The number of epochs to train the model for.
+
+ An epoch refers to one full cycle through the training dataset.
+ """
+
+
+class MethodDpoHyperparameters(BaseModel):
+ batch_size: Union[Literal["auto"], int, None] = None
+ """Number of examples in each batch.
+
+ A larger batch size means that model parameters are updated less frequently, but
+ with lower variance.
+ """
+
+ beta: Union[Literal["auto"], float, None] = None
+ """The beta value for the DPO method.
+
+ A higher beta value will increase the weight of the penalty between the policy
+ and reference model.
+ """
+
+ learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+ """Scaling factor for the learning rate.
+
+ A smaller learning rate may be useful to avoid overfitting.
+ """
+
+ n_epochs: Union[Literal["auto"], int, None] = None
"""The number of epochs to train the model for.
- An epoch refers to one full cycle through the training dataset. "auto" decides
- the optimal number of epochs based on the size of the dataset. If setting the
- number manually, we support any number between 1 and 50 epochs.
+ An epoch refers to one full cycle through the training dataset.
"""
+class MethodDpo(BaseModel):
+ hyperparameters: Optional[MethodDpoHyperparameters] = None
+ """The hyperparameters used for the fine-tuning job."""
+
+
+class MethodSupervisedHyperparameters(BaseModel):
+ batch_size: Union[Literal["auto"], int, None] = None
+ """Number of examples in each batch.
+
+ A larger batch size means that model parameters are updated less frequently, but
+ with lower variance.
+ """
+
+ learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+ """Scaling factor for the learning rate.
+
+ A smaller learning rate may be useful to avoid overfitting.
+ """
+
+ n_epochs: Union[Literal["auto"], int, None] = None
+ """The number of epochs to train the model for.
+
+ An epoch refers to one full cycle through the training dataset.
+ """
+
+
+class MethodSupervised(BaseModel):
+ hyperparameters: Optional[MethodSupervisedHyperparameters] = None
+ """The hyperparameters used for the fine-tuning job."""
+
+
+class Method(BaseModel):
+ dpo: Optional[MethodDpo] = None
+ """Configuration for the DPO fine-tuning method."""
+
+ supervised: Optional[MethodSupervised] = None
+ """Configuration for the supervised fine-tuning method."""
+
+ type: Optional[Literal["supervised", "dpo"]] = None
+ """The type of method. Is either `supervised` or `dpo`."""
+
+
class FineTuningJob(BaseModel):
id: str
"""The object identifier, which can be referenced in the API endpoints."""
@@ -61,8 +151,7 @@ class FineTuningJob(BaseModel):
hyperparameters: Hyperparameters
"""The hyperparameters used for the fine-tuning job.
- See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
- for more details.
+ This value will only be returned when running `supervised` jobs.
"""
model: str
@@ -118,3 +207,6 @@ class FineTuningJob(BaseModel):
integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
"""A list of integrations to enable for this fine-tuning job."""
+
+ method: Optional[Method] = None
+ """The method used for fine-tuning."""
src/openai/types/fine_tuning/fine_tuning_job_event.py
@@ -1,5 +1,7 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+import builtins
+from typing import Optional
from typing_extensions import Literal
from ..._models import BaseModel
@@ -9,11 +11,22 @@ __all__ = ["FineTuningJobEvent"]
class FineTuningJobEvent(BaseModel):
id: str
+ """The object identifier."""
created_at: int
+ """The Unix timestamp (in seconds) for when the fine-tuning job was created."""
level: Literal["info", "warn", "error"]
+ """The log level of the event."""
message: str
+ """The message of the event."""
object: Literal["fine_tuning.job.event"]
+ """The object type, which is always "fine_tuning.job.event"."""
+
+ data: Optional[builtins.object] = None
+ """The data associated with the event."""
+
+ type: Optional[Literal["message", "metrics"]] = None
+ """The type of event."""
src/openai/types/fine_tuning/job_create_params.py
@@ -5,7 +5,17 @@ from __future__ import annotations
from typing import List, Union, Iterable, Optional
from typing_extensions import Literal, Required, TypedDict
-__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb"]
+__all__ = [
+ "JobCreateParams",
+ "Hyperparameters",
+ "Integration",
+ "IntegrationWandb",
+ "Method",
+ "MethodDpo",
+ "MethodDpoHyperparameters",
+ "MethodSupervised",
+ "MethodSupervisedHyperparameters",
+]
class JobCreateParams(TypedDict, total=False):
@@ -26,8 +36,10 @@ class JobCreateParams(TypedDict, total=False):
your file with the purpose `fine-tune`.
The contents of the file should differ depending on if the model uses the
- [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+ [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
[completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+ format, or if the fine-tuning method uses the
+ [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
format.
See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
@@ -35,11 +47,17 @@ class JobCreateParams(TypedDict, total=False):
"""
hyperparameters: Hyperparameters
- """The hyperparameters used for the fine-tuning job."""
+ """
+ The hyperparameters used for the fine-tuning job. This value is now deprecated
+ in favor of `method`, and should be passed in under the `method` parameter.
+ """
integrations: Optional[Iterable[Integration]]
"""A list of integrations to enable for your fine-tuning job."""
+ method: Method
+ """The method used for fine-tuning."""
+
seed: Optional[int]
"""The seed controls the reproducibility of the job.
@@ -134,3 +152,73 @@ class Integration(TypedDict, total=False):
can set an explicit display name for your run, add tags to your run, and set a
default entity (team, username, etc) to be associated with your run.
"""
+
+
+class MethodDpoHyperparameters(TypedDict, total=False):
+ batch_size: Union[Literal["auto"], int]
+ """Number of examples in each batch.
+
+ A larger batch size means that model parameters are updated less frequently, but
+ with lower variance.
+ """
+
+ beta: Union[Literal["auto"], float]
+ """The beta value for the DPO method.
+
+ A higher beta value will increase the weight of the penalty between the policy
+ and reference model.
+ """
+
+ learning_rate_multiplier: Union[Literal["auto"], float]
+ """Scaling factor for the learning rate.
+
+ A smaller learning rate may be useful to avoid overfitting.
+ """
+
+ n_epochs: Union[Literal["auto"], int]
+ """The number of epochs to train the model for.
+
+ An epoch refers to one full cycle through the training dataset.
+ """
+
+
+class MethodDpo(TypedDict, total=False):
+ hyperparameters: MethodDpoHyperparameters
+ """The hyperparameters used for the fine-tuning job."""
+
+
+class MethodSupervisedHyperparameters(TypedDict, total=False):
+ batch_size: Union[Literal["auto"], int]
+ """Number of examples in each batch.
+
+ A larger batch size means that model parameters are updated less frequently, but
+ with lower variance.
+ """
+
+ learning_rate_multiplier: Union[Literal["auto"], float]
+ """Scaling factor for the learning rate.
+
+ A smaller learning rate may be useful to avoid overfitting.
+ """
+
+ n_epochs: Union[Literal["auto"], int]
+ """The number of epochs to train the model for.
+
+ An epoch refers to one full cycle through the training dataset.
+ """
+
+
+class MethodSupervised(TypedDict, total=False):
+ hyperparameters: MethodSupervisedHyperparameters
+ """The hyperparameters used for the fine-tuning job."""
+
+
+class Method(TypedDict, total=False):
+ dpo: MethodDpo
+ """Configuration for the DPO fine-tuning method."""
+
+ supervised: MethodSupervised
+ """Configuration for the supervised fine-tuning method."""
+
+ type: Literal["supervised", "dpo"]
+ """The type of method. Is either `supervised` or `dpo`."""
src/openai/types/chat_model.py
@@ -5,6 +5,8 @@ from typing_extensions import Literal, TypeAlias
__all__ = ["ChatModel"]
ChatModel: TypeAlias = Literal[
+ "o1",
+ "o1-2024-12-17",
"o1-preview",
"o1-preview-2024-09-12",
"o1-mini",
@@ -13,10 +15,11 @@ ChatModel: TypeAlias = Literal[
"gpt-4o-2024-11-20",
"gpt-4o-2024-08-06",
"gpt-4o-2024-05-13",
- "gpt-4o-realtime-preview",
- "gpt-4o-realtime-preview-2024-10-01",
"gpt-4o-audio-preview",
"gpt-4o-audio-preview-2024-10-01",
+ "gpt-4o-audio-preview-2024-12-17",
+ "gpt-4o-mini-audio-preview",
+ "gpt-4o-mini-audio-preview-2024-12-17",
"chatgpt-4o-latest",
"gpt-4o-mini",
"gpt-4o-mini-2024-07-18",
tests/api_resources/beta/realtime/__init__.py
@@ -0,0 +1,1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
tests/api_resources/beta/realtime/test_sessions.py
@@ -0,0 +1,146 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.beta.realtime import SessionCreateResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestSessions:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_create(self, client: OpenAI) -> None:
+ session = client.beta.realtime.sessions.create(
+ model="gpt-4o-realtime-preview",
+ )
+ assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+ @parametrize
+ def test_method_create_with_all_params(self, client: OpenAI) -> None:
+ session = client.beta.realtime.sessions.create(
+ model="gpt-4o-realtime-preview",
+ input_audio_format="pcm16",
+ input_audio_transcription={"model": "model"},
+ instructions="instructions",
+ max_response_output_tokens=0,
+ modalities=["text"],
+ output_audio_format="pcm16",
+ temperature=0,
+ tool_choice="tool_choice",
+ tools=[
+ {
+ "description": "description",
+ "name": "name",
+ "parameters": {},
+ "type": "function",
+ }
+ ],
+ turn_detection={
+ "create_response": True,
+ "prefix_padding_ms": 0,
+ "silence_duration_ms": 0,
+ "threshold": 0,
+ "type": "type",
+ },
+ voice="alloy",
+ )
+ assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+ @parametrize
+ def test_raw_response_create(self, client: OpenAI) -> None:
+ response = client.beta.realtime.sessions.with_raw_response.create(
+ model="gpt-4o-realtime-preview",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ session = response.parse()
+ assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+ @parametrize
+ def test_streaming_response_create(self, client: OpenAI) -> None:
+ with client.beta.realtime.sessions.with_streaming_response.create(
+ model="gpt-4o-realtime-preview",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ session = response.parse()
+ assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncSessions:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+ session = await async_client.beta.realtime.sessions.create(
+ model="gpt-4o-realtime-preview",
+ )
+ assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+ @parametrize
+ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ session = await async_client.beta.realtime.sessions.create(
+ model="gpt-4o-realtime-preview",
+ input_audio_format="pcm16",
+ input_audio_transcription={"model": "model"},
+ instructions="instructions",
+ max_response_output_tokens=0,
+ modalities=["text"],
+ output_audio_format="pcm16",
+ temperature=0,
+ tool_choice="tool_choice",
+ tools=[
+ {
+ "description": "description",
+ "name": "name",
+ "parameters": {},
+ "type": "function",
+ }
+ ],
+ turn_detection={
+ "create_response": True,
+ "prefix_padding_ms": 0,
+ "silence_duration_ms": 0,
+ "threshold": 0,
+ "type": "type",
+ },
+ voice="alloy",
+ )
+ assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+ @parametrize
+ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.beta.realtime.sessions.with_raw_response.create(
+ model="gpt-4o-realtime-preview",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ session = response.parse()
+ assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.beta.realtime.sessions.with_streaming_response.create(
+ model="gpt-4o-realtime-preview",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ session = await response.parse()
+ assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
tests/api_resources/chat/test_completions.py
@@ -26,7 +26,7 @@ class TestCompletions:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -39,8 +39,8 @@ class TestCompletions:
messages=[
{
"content": "string",
- "role": "system",
- "name": "string",
+ "role": "developer",
+ "name": "name",
}
],
model="gpt-4o",
@@ -70,6 +70,7 @@ class TestCompletions:
"type": "content",
},
presence_penalty=-2,
+ reasoning_effort="low",
response_format={"type": "text"},
seed=-9007199254740991,
service_tier="auto",
@@ -102,7 +103,7 @@ class TestCompletions:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -119,7 +120,7 @@ class TestCompletions:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -138,7 +139,7 @@ class TestCompletions:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -152,8 +153,8 @@ class TestCompletions:
messages=[
{
"content": "string",
- "role": "system",
- "name": "string",
+ "role": "developer",
+ "name": "name",
}
],
model="gpt-4o",
@@ -184,6 +185,7 @@ class TestCompletions:
"type": "content",
},
presence_penalty=-2,
+ reasoning_effort="low",
response_format={"type": "text"},
seed=-9007199254740991,
service_tier="auto",
@@ -215,7 +217,7 @@ class TestCompletions:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -232,7 +234,7 @@ class TestCompletions:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -273,7 +275,7 @@ class TestAsyncCompletions:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -286,8 +288,8 @@ class TestAsyncCompletions:
messages=[
{
"content": "string",
- "role": "system",
- "name": "string",
+ "role": "developer",
+ "name": "name",
}
],
model="gpt-4o",
@@ -317,6 +319,7 @@ class TestAsyncCompletions:
"type": "content",
},
presence_penalty=-2,
+ reasoning_effort="low",
response_format={"type": "text"},
seed=-9007199254740991,
service_tier="auto",
@@ -349,7 +352,7 @@ class TestAsyncCompletions:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -366,7 +369,7 @@ class TestAsyncCompletions:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -385,7 +388,7 @@ class TestAsyncCompletions:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -399,8 +402,8 @@ class TestAsyncCompletions:
messages=[
{
"content": "string",
- "role": "system",
- "name": "string",
+ "role": "developer",
+ "name": "name",
}
],
model="gpt-4o",
@@ -431,6 +434,7 @@ class TestAsyncCompletions:
"type": "content",
},
presence_penalty=-2,
+ reasoning_effort="low",
response_format={"type": "text"},
seed=-9007199254740991,
service_tier="auto",
@@ -462,7 +466,7 @@ class TestAsyncCompletions:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -479,7 +483,7 @@ class TestAsyncCompletions:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
tests/api_resources/fine_tuning/test_jobs.py
@@ -50,6 +50,24 @@ class TestJobs:
},
}
],
+ method={
+ "dpo": {
+ "hyperparameters": {
+ "batch_size": "auto",
+ "beta": "auto",
+ "learning_rate_multiplier": "auto",
+ "n_epochs": "auto",
+ }
+ },
+ "supervised": {
+ "hyperparameters": {
+ "batch_size": "auto",
+ "learning_rate_multiplier": "auto",
+ "n_epochs": "auto",
+ }
+ },
+ "type": "supervised",
+ },
seed=42,
suffix="x",
validation_file="file-abc123",
@@ -271,6 +289,24 @@ class TestAsyncJobs:
},
}
],
+ method={
+ "dpo": {
+ "hyperparameters": {
+ "batch_size": "auto",
+ "beta": "auto",
+ "learning_rate_multiplier": "auto",
+ "n_epochs": "auto",
+ }
+ },
+ "supervised": {
+ "hyperparameters": {
+ "batch_size": "auto",
+ "learning_rate_multiplier": "auto",
+ "n_epochs": "auto",
+ }
+ },
+ "type": "supervised",
+ },
seed=42,
suffix="x",
validation_file="file-abc123",
tests/test_client.py
@@ -795,7 +795,7 @@ class TestOpenAI:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -827,7 +827,7 @@ class TestOpenAI:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -859,7 +859,7 @@ class TestOpenAI:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -891,7 +891,7 @@ class TestOpenAI:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -1663,7 +1663,7 @@ class TestAsyncOpenAI:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -1696,7 +1696,7 @@ class TestAsyncOpenAI:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -1729,7 +1729,7 @@ class TestAsyncOpenAI:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
@@ -1762,7 +1762,7 @@ class TestAsyncOpenAI:
messages=[
{
"content": "string",
- "role": "system",
+ "role": "developer",
}
],
model="gpt-4o",
.stats.yml
@@ -1,2 +1,2 @@
-configured_endpoints: 68
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-2e0e0678be19d1118fd796af291822075e40538dba326611e177e9f3dc245a53.yml
+configured_endpoints: 69
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-779ea2754025daf5e18eb8ceb203ec321692636bc3a999338556a479178efa6c.yml
api.md
@@ -47,6 +47,7 @@ from openai.types.chat import (
ChatCompletionContentPartInputAudio,
ChatCompletionContentPartRefusal,
ChatCompletionContentPartText,
+ ChatCompletionDeveloperMessageParam,
ChatCompletionFunctionCallOption,
ChatCompletionFunctionMessageParam,
ChatCompletionMessage,
@@ -55,6 +56,7 @@ from openai.types.chat import (
ChatCompletionModality,
ChatCompletionNamedToolChoice,
ChatCompletionPredictionContent,
+ ChatCompletionReasoningEffort,
ChatCompletionRole,
ChatCompletionStreamOptions,
ChatCompletionSystemMessageParam,
@@ -235,6 +237,20 @@ Methods:
# Beta
+## Realtime
+
+### Sessions
+
+Types:
+
+```python
+from openai.types.beta.realtime import Session, SessionCreateResponse
+```
+
+Methods:
+
+- <code title="post /realtime/sessions">client.beta.realtime.sessions.<a href="./src/openai/resources/beta/realtime/sessions.py">create</a>(\*\*<a href="src/openai/types/beta/realtime/session_create_params.py">params</a>) -> <a href="./src/openai/types/beta/realtime/session_create_response.py">SessionCreateResponse</a></code>
+
## VectorStores
Types: