Commit 3d3d16ab
Changed files (163)
examples
src
openai
resources
types
realtime
responses
tests
api_resources
examples/realtime/audio_util.py
@@ -11,7 +11,7 @@ import pyaudio
import sounddevice as sd
from pydub import AudioSegment
-from openai.resources.beta.realtime.realtime import AsyncRealtimeConnection
+from openai.resources.realtime.realtime import AsyncRealtimeConnection
CHUNK_LENGTH_S = 0.05 # 100ms
SAMPLE_RATE = 24000
examples/realtime/azure_realtime.py
@@ -26,10 +26,16 @@ async def main() -> None:
azure_ad_token_provider=get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default"),
api_version="2024-10-01-preview",
)
- async with client.beta.realtime.connect(
- model="gpt-4o-realtime-preview", # deployment name for your model
+ async with client.realtime.connect(
+ model="gpt-realtime", # deployment name for your model
) as connection:
- await connection.session.update(session={"modalities": ["text"]}) # type: ignore
+ await connection.session.update(
+ session={
+ "output_modalities": ["text"],
+ "model": "gpt-realtime",
+ "type": "realtime",
+ }
+ )
while True:
user_input = input("Enter a message: ")
if user_input == "q":
@@ -44,9 +50,9 @@ async def main() -> None:
)
await connection.response.create()
async for event in connection:
- if event.type == "response.text.delta":
+ if event.type == "response.output_text.delta":
print(event.delta, flush=True, end="")
- elif event.type == "response.text.done":
+ elif event.type == "response.output_text.done":
print()
elif event.type == "response.done":
break
examples/realtime/push_to_talk_app.py
@@ -38,8 +38,8 @@ from textual.reactive import reactive
from textual.containers import Container
from openai import AsyncOpenAI
-from openai.types.beta.realtime.session import Session
-from openai.resources.beta.realtime.realtime import AsyncRealtimeConnection
+from openai.types.realtime.session import Session
+from openai.resources.realtime.realtime import AsyncRealtimeConnection
class SessionDisplay(Static):
@@ -154,13 +154,21 @@ class RealtimeApp(App[None]):
self.run_worker(self.send_mic_audio())
async def handle_realtime_connection(self) -> None:
- async with self.client.beta.realtime.connect(model="gpt-4o-realtime-preview") as conn:
+ async with self.client.realtime.connect(model="gpt-realtime") as conn:
self.connection = conn
self.connected.set()
# note: this is the default and can be omitted
# if you want to manually handle VAD yourself, then set `'turn_detection': None`
- await conn.session.update(session={"turn_detection": {"type": "server_vad"}})
+ await conn.session.update(
+ session={
+ "audio": {
+ "input": {"turn_detection": {"type": "server_vad"}},
+ },
+ "model": "gpt-realtime",
+ "type": "realtime",
+ }
+ )
acc_items: dict[str, Any] = {}
@@ -176,7 +184,7 @@ class RealtimeApp(App[None]):
self.session = event.session
continue
- if event.type == "response.audio.delta":
+ if event.type == "response.output_audio.delta":
if event.item_id != self.last_audio_item_id:
self.audio_player.reset_frame_count()
self.last_audio_item_id = event.item_id
@@ -185,7 +193,7 @@ class RealtimeApp(App[None]):
self.audio_player.add_data(bytes_data)
continue
- if event.type == "response.audio_transcript.delta":
+ if event.type == "response.output_audio_transcript.delta":
try:
text = acc_items[event.item_id]
except KeyError:
examples/realtime/realtime.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env rye run python
+import asyncio
+
+from openai import AsyncOpenAI
+
+# Azure OpenAI Realtime Docs
+
+# How-to: https://learn.microsoft.com/azure/ai-services/openai/how-to/realtime-audio
+# Supported models and API versions: https://learn.microsoft.com/azure/ai-services/openai/how-to/realtime-audio#supported-models
+# Entra ID auth: https://learn.microsoft.com/azure/ai-services/openai/how-to/managed-identity
+
+
+async def main() -> None:
+ """The following example demonstrates how to configure OpenAI to use the Realtime API.
+ For an audio example, see push_to_talk_app.py and update the client and model parameter accordingly.
+
+ When prompted for user input, type a message and hit enter to send it to the model.
+ Enter "q" to quit the conversation.
+ """
+
+ client = AsyncOpenAI()
+ async with client.realtime.connect(
+ model="gpt-realtime",
+ ) as connection:
+ await connection.session.update(
+ session={
+ "output_modalities": ["text"],
+ "model": "gpt-realtime",
+ "type": "realtime",
+ }
+ )
+ while True:
+ user_input = input("Enter a message: ")
+ if user_input == "q":
+ break
+
+ await connection.conversation.item.create(
+ item={
+ "type": "message",
+ "role": "user",
+ "content": [{"type": "input_text", "text": user_input}],
+ }
+ )
+ await connection.response.create()
+ async for event in connection:
+ if event.type == "response.output_text.delta":
+ print(event.delta, flush=True, end="")
+ elif event.type == "response.output_text.done":
+ print()
+ elif event.type == "response.done":
+ break
+
+
+asyncio.run(main())
src/openai/resources/audio/speech.py
@@ -50,7 +50,9 @@ class Speech(SyncAPIResource):
*,
input: str,
model: Union[str, SpeechModel],
- voice: Union[str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]],
+ voice: Union[
+ str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]
+ ],
instructions: str | NotGiven = NOT_GIVEN,
response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
speed: float | NotGiven = NOT_GIVEN,
@@ -144,7 +146,9 @@ class AsyncSpeech(AsyncAPIResource):
*,
input: str,
model: Union[str, SpeechModel],
- voice: Union[str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]],
+ voice: Union[
+ str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]
+ ],
instructions: str | NotGiven = NOT_GIVEN,
response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
speed: float | NotGiven = NOT_GIVEN,
src/openai/resources/beta/beta.py
@@ -24,10 +24,6 @@ from ...resources.chat import Chat, AsyncChat
from .realtime.realtime import (
Realtime,
AsyncRealtime,
- RealtimeWithRawResponse,
- AsyncRealtimeWithRawResponse,
- RealtimeWithStreamingResponse,
- AsyncRealtimeWithStreamingResponse,
)
__all__ = ["Beta", "AsyncBeta"]
@@ -111,10 +107,6 @@ class BetaWithRawResponse:
def __init__(self, beta: Beta) -> None:
self._beta = beta
- @cached_property
- def realtime(self) -> RealtimeWithRawResponse:
- return RealtimeWithRawResponse(self._beta.realtime)
-
@cached_property
def assistants(self) -> AssistantsWithRawResponse:
return AssistantsWithRawResponse(self._beta.assistants)
@@ -128,10 +120,6 @@ class AsyncBetaWithRawResponse:
def __init__(self, beta: AsyncBeta) -> None:
self._beta = beta
- @cached_property
- def realtime(self) -> AsyncRealtimeWithRawResponse:
- return AsyncRealtimeWithRawResponse(self._beta.realtime)
-
@cached_property
def assistants(self) -> AsyncAssistantsWithRawResponse:
return AsyncAssistantsWithRawResponse(self._beta.assistants)
@@ -145,10 +133,6 @@ class BetaWithStreamingResponse:
def __init__(self, beta: Beta) -> None:
self._beta = beta
- @cached_property
- def realtime(self) -> RealtimeWithStreamingResponse:
- return RealtimeWithStreamingResponse(self._beta.realtime)
-
@cached_property
def assistants(self) -> AssistantsWithStreamingResponse:
return AssistantsWithStreamingResponse(self._beta.assistants)
@@ -162,10 +146,6 @@ class AsyncBetaWithStreamingResponse:
def __init__(self, beta: AsyncBeta) -> None:
self._beta = beta
- @cached_property
- def realtime(self) -> AsyncRealtimeWithStreamingResponse:
- return AsyncRealtimeWithStreamingResponse(self._beta.realtime)
-
@cached_property
def assistants(self) -> AsyncAssistantsWithStreamingResponse:
return AsyncAssistantsWithStreamingResponse(self._beta.assistants)
src/openai/resources/realtime/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .realtime import (
+ Realtime,
+ AsyncRealtime,
+ RealtimeWithRawResponse,
+ AsyncRealtimeWithRawResponse,
+ RealtimeWithStreamingResponse,
+ AsyncRealtimeWithStreamingResponse,
+)
+from .client_secrets import (
+ ClientSecrets,
+ AsyncClientSecrets,
+ ClientSecretsWithRawResponse,
+ AsyncClientSecretsWithRawResponse,
+ ClientSecretsWithStreamingResponse,
+ AsyncClientSecretsWithStreamingResponse,
+)
+
+__all__ = [
+ "ClientSecrets",
+ "AsyncClientSecrets",
+ "ClientSecretsWithRawResponse",
+ "AsyncClientSecretsWithRawResponse",
+ "ClientSecretsWithStreamingResponse",
+ "AsyncClientSecretsWithStreamingResponse",
+ "Realtime",
+ "AsyncRealtime",
+ "RealtimeWithRawResponse",
+ "AsyncRealtimeWithRawResponse",
+ "RealtimeWithStreamingResponse",
+ "AsyncRealtimeWithStreamingResponse",
+]
src/openai/resources/realtime/client_secrets.py
@@ -0,0 +1,185 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._base_client import make_request_options
+from ...types.realtime import client_secret_create_params
+from ...types.realtime.client_secret_create_response import ClientSecretCreateResponse
+
+__all__ = ["ClientSecrets", "AsyncClientSecrets"]
+
+
+class ClientSecrets(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> ClientSecretsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return ClientSecretsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> ClientSecretsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return ClientSecretsWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ expires_after: client_secret_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
+ session: client_secret_create_params.Session | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ClientSecretCreateResponse:
+ """
+ Create a Realtime session and client secret for either realtime or
+ transcription.
+
+ Args:
+ expires_after: Configuration for the ephemeral token expiration.
+
+ session: Session configuration to use for the client secret. Choose either a realtime
+ session or a transcription session.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/realtime/client_secrets",
+ body=maybe_transform(
+ {
+ "expires_after": expires_after,
+ "session": session,
+ },
+ client_secret_create_params.ClientSecretCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ClientSecretCreateResponse,
+ )
+
+
+class AsyncClientSecrets(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncClientSecretsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncClientSecretsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncClientSecretsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncClientSecretsWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ expires_after: client_secret_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
+ session: client_secret_create_params.Session | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ClientSecretCreateResponse:
+ """
+ Create a Realtime session and client secret for either realtime or
+ transcription.
+
+ Args:
+ expires_after: Configuration for the ephemeral token expiration.
+
+ session: Session configuration to use for the client secret. Choose either a realtime
+ session or a transcription session.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/realtime/client_secrets",
+ body=await async_maybe_transform(
+ {
+ "expires_after": expires_after,
+ "session": session,
+ },
+ client_secret_create_params.ClientSecretCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ClientSecretCreateResponse,
+ )
+
+
+class ClientSecretsWithRawResponse:
+ def __init__(self, client_secrets: ClientSecrets) -> None:
+ self._client_secrets = client_secrets
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ client_secrets.create,
+ )
+
+
+class AsyncClientSecretsWithRawResponse:
+ def __init__(self, client_secrets: AsyncClientSecrets) -> None:
+ self._client_secrets = client_secrets
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ client_secrets.create,
+ )
+
+
+class ClientSecretsWithStreamingResponse:
+ def __init__(self, client_secrets: ClientSecrets) -> None:
+ self._client_secrets = client_secrets
+
+ self.create = to_streamed_response_wrapper(
+ client_secrets.create,
+ )
+
+
+class AsyncClientSecretsWithStreamingResponse:
+ def __init__(self, client_secrets: AsyncClientSecrets) -> None:
+ self._client_secrets = client_secrets
+
+ self.create = async_to_streamed_response_wrapper(
+ client_secrets.create,
+ )
src/openai/resources/realtime/realtime.py
@@ -0,0 +1,1056 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import json
+import logging
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Iterator, cast
+from typing_extensions import AsyncIterator
+
+import httpx
+from pydantic import BaseModel
+
+from ..._types import NOT_GIVEN, Query, Headers, NotGiven
+from ..._utils import (
+ is_azure_client,
+ maybe_transform,
+ strip_not_given,
+ async_maybe_transform,
+ is_async_azure_client,
+)
+from ..._compat import cached_property
+from ..._models import construct_type_unchecked
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._exceptions import OpenAIError
+from ..._base_client import _merge_mappings
+from .client_secrets import (
+ ClientSecrets,
+ AsyncClientSecrets,
+ ClientSecretsWithRawResponse,
+ AsyncClientSecretsWithRawResponse,
+ ClientSecretsWithStreamingResponse,
+ AsyncClientSecretsWithStreamingResponse,
+)
+from ...types.realtime import response_create_event_param
+from ...types.websocket_connection_options import WebsocketConnectionOptions
+from ...types.realtime.realtime_client_event import RealtimeClientEvent
+from ...types.realtime.realtime_server_event import RealtimeServerEvent
+from ...types.realtime.conversation_item_param import ConversationItemParam
+from ...types.realtime.realtime_client_event_param import RealtimeClientEventParam
+from ...types.realtime.realtime_session_create_request_param import RealtimeSessionCreateRequestParam
+from ...types.realtime.realtime_transcription_session_create_request_param import (
+ RealtimeTranscriptionSessionCreateRequestParam,
+)
+
+if TYPE_CHECKING:
+ from websockets.sync.client import ClientConnection as WebsocketConnection
+ from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
+
+ from ..._client import OpenAI, AsyncOpenAI
+
+__all__ = ["Realtime", "AsyncRealtime"]
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class Realtime(SyncAPIResource):
+ @cached_property
+ def client_secrets(self) -> ClientSecrets:
+ return ClientSecrets(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> RealtimeWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return RealtimeWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> RealtimeWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return RealtimeWithStreamingResponse(self)
+
+ def connect(
+ self,
+ *,
+ model: str,
+ extra_query: Query = {},
+ extra_headers: Headers = {},
+ websocket_connection_options: WebsocketConnectionOptions = {},
+ ) -> RealtimeConnectionManager:
+ """
+ The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+ Some notable benefits of the API include:
+
+ - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+ - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+ - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+ The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+ """
+ return RealtimeConnectionManager(
+ client=self._client,
+ extra_query=extra_query,
+ extra_headers=extra_headers,
+ websocket_connection_options=websocket_connection_options,
+ model=model,
+ )
+
+
+class AsyncRealtime(AsyncAPIResource):
+ @cached_property
+ def client_secrets(self) -> AsyncClientSecrets:
+ return AsyncClientSecrets(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncRealtimeWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncRealtimeWithStreamingResponse(self)
+
+ def connect(
+ self,
+ *,
+ model: str,
+ extra_query: Query = {},
+ extra_headers: Headers = {},
+ websocket_connection_options: WebsocketConnectionOptions = {},
+ ) -> AsyncRealtimeConnectionManager:
+ """
+ The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+ Some notable benefits of the API include:
+
+ - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+ - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+ - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+ The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+ """
+ return AsyncRealtimeConnectionManager(
+ client=self._client,
+ extra_query=extra_query,
+ extra_headers=extra_headers,
+ websocket_connection_options=websocket_connection_options,
+ model=model,
+ )
+
+
+class RealtimeWithRawResponse:
+ def __init__(self, realtime: Realtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def client_secrets(self) -> ClientSecretsWithRawResponse:
+ return ClientSecretsWithRawResponse(self._realtime.client_secrets)
+
+
+class AsyncRealtimeWithRawResponse:
+ def __init__(self, realtime: AsyncRealtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def client_secrets(self) -> AsyncClientSecretsWithRawResponse:
+ return AsyncClientSecretsWithRawResponse(self._realtime.client_secrets)
+
+
+class RealtimeWithStreamingResponse:
+ def __init__(self, realtime: Realtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def client_secrets(self) -> ClientSecretsWithStreamingResponse:
+ return ClientSecretsWithStreamingResponse(self._realtime.client_secrets)
+
+
+class AsyncRealtimeWithStreamingResponse:
+ def __init__(self, realtime: AsyncRealtime) -> None:
+ self._realtime = realtime
+
+ @cached_property
+ def client_secrets(self) -> AsyncClientSecretsWithStreamingResponse:
+ return AsyncClientSecretsWithStreamingResponse(self._realtime.client_secrets)
+
+
+class AsyncRealtimeConnection:
+ """Represents a live websocket connection to the Realtime API"""
+
+ session: AsyncRealtimeSessionResource
+ response: AsyncRealtimeResponseResource
+ input_audio_buffer: AsyncRealtimeInputAudioBufferResource
+ conversation: AsyncRealtimeConversationResource
+ output_audio_buffer: AsyncRealtimeOutputAudioBufferResource
+ transcription_session: AsyncRealtimeTranscriptionSessionResource
+
+ _connection: AsyncWebsocketConnection
+
+ def __init__(self, connection: AsyncWebsocketConnection) -> None:
+ self._connection = connection
+
+ self.session = AsyncRealtimeSessionResource(self)
+ self.response = AsyncRealtimeResponseResource(self)
+ self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
+ self.conversation = AsyncRealtimeConversationResource(self)
+ self.output_audio_buffer = AsyncRealtimeOutputAudioBufferResource(self)
+ self.transcription_session = AsyncRealtimeTranscriptionSessionResource(self)
+
+ async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
+ """
+ An infinite-iterator that will continue to yield events until
+ the connection is closed.
+ """
+ from websockets.exceptions import ConnectionClosedOK
+
+ try:
+ while True:
+ yield await self.recv()
+ except ConnectionClosedOK:
+ return
+
+ async def recv(self) -> RealtimeServerEvent:
+ """
+ Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+ Canceling this method is safe. There's no risk of losing data.
+ """
+ return self.parse_event(await self.recv_bytes())
+
+ async def recv_bytes(self) -> bytes:
+ """Receive the next message from the connection as raw bytes.
+
+ Canceling this method is safe. There's no risk of losing data.
+
+ If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+ then you can call `.parse_event(data)`.
+ """
+ message = await self._connection.recv(decode=False)
+ log.debug(f"Received websocket message: %s", message)
+ return message
+
+ async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+ data = (
+ event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+ if isinstance(event, BaseModel)
+ else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
+ )
+ await self._connection.send(data)
+
+ async def close(self, *, code: int = 1000, reason: str = "") -> None:
+ await self._connection.close(code=code, reason=reason)
+
+ def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+ """
+ Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+ This is helpful if you're using `.recv_bytes()`.
+ """
+ return cast(
+ RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+ )
+
+
+class AsyncRealtimeConnectionManager:
+ """
+ Context manager over a `AsyncRealtimeConnection` that is returned by `realtime.connect()`
+
+ This context manager ensures that the connection will be closed when it exits.
+
+ ---
+
+ Note that if your application doesn't work well with the context manager approach then you
+ can call the `.enter()` method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = await client.realtime.connect(...).enter()
+ # ...
+ await connection.close()
+ ```
+ """
+
+ def __init__(
+ self,
+ *,
+ client: AsyncOpenAI,
+ model: str,
+ extra_query: Query,
+ extra_headers: Headers,
+ websocket_connection_options: WebsocketConnectionOptions,
+ ) -> None:
+ self.__client = client
+ self.__model = model
+ self.__connection: AsyncRealtimeConnection | None = None
+ self.__extra_query = extra_query
+ self.__extra_headers = extra_headers
+ self.__websocket_connection_options = websocket_connection_options
+
+ async def __aenter__(self) -> AsyncRealtimeConnection:
+ """
+ 👋 If your application doesn't work well with the context manager approach then you
+ can call this method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = await client.realtime.connect(...).enter()
+ # ...
+ await connection.close()
+ ```
+ """
+ try:
+ from websockets.asyncio.client import connect
+ except ImportError as exc:
+ raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+ extra_query = self.__extra_query
+ auth_headers = self.__client.auth_headers
+ if is_async_azure_client(self.__client):
+ url, auth_headers = await self.__client._configure_realtime(self.__model, extra_query)
+ else:
+ url = self._prepare_url().copy_with(
+ params={
+ **self.__client.base_url.params,
+ "model": self.__model,
+ **extra_query,
+ },
+ )
+ log.debug("Connecting to %s", url)
+ if self.__websocket_connection_options:
+ log.debug("Connection options: %s", self.__websocket_connection_options)
+
+ self.__connection = AsyncRealtimeConnection(
+ await connect(
+ str(url),
+ user_agent_header=self.__client.user_agent,
+ additional_headers=_merge_mappings(
+ {
+ **auth_headers,
+ },
+ self.__extra_headers,
+ ),
+ **self.__websocket_connection_options,
+ )
+ )
+
+ return self.__connection
+
+ enter = __aenter__
+
+ def _prepare_url(self) -> httpx.URL:
+ if self.__client.websocket_base_url is not None:
+ base_url = httpx.URL(self.__client.websocket_base_url)
+ else:
+ base_url = self.__client._base_url.copy_with(scheme="wss")
+
+ merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+ return base_url.copy_with(raw_path=merge_raw_path)
+
+ async def __aexit__(
+ self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+ ) -> None:
+ if self.__connection is not None:
+ await self.__connection.close()
+
+
+class RealtimeConnection:
+ """Represents a live websocket connection to the Realtime API"""
+
+ session: RealtimeSessionResource
+ response: RealtimeResponseResource
+ input_audio_buffer: RealtimeInputAudioBufferResource
+ conversation: RealtimeConversationResource
+ output_audio_buffer: RealtimeOutputAudioBufferResource
+ transcription_session: RealtimeTranscriptionSessionResource
+
+ _connection: WebsocketConnection
+
+ def __init__(self, connection: WebsocketConnection) -> None:
+ self._connection = connection
+
+ self.session = RealtimeSessionResource(self)
+ self.response = RealtimeResponseResource(self)
+ self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
+ self.conversation = RealtimeConversationResource(self)
+ self.output_audio_buffer = RealtimeOutputAudioBufferResource(self)
+ self.transcription_session = RealtimeTranscriptionSessionResource(self)
+
+ def __iter__(self) -> Iterator[RealtimeServerEvent]:
+ """
+ An infinite-iterator that will continue to yield events until
+ the connection is closed.
+ """
+ from websockets.exceptions import ConnectionClosedOK
+
+ try:
+ while True:
+ yield self.recv()
+ except ConnectionClosedOK:
+ return
+
+ def recv(self) -> RealtimeServerEvent:
+ """
+ Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+ Canceling this method is safe. There's no risk of losing data.
+ """
+ return self.parse_event(self.recv_bytes())
+
+ def recv_bytes(self) -> bytes:
+ """Receive the next message from the connection as raw bytes.
+
+ Canceling this method is safe. There's no risk of losing data.
+
+ If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+ then you can call `.parse_event(data)`.
+ """
+ message = self._connection.recv(decode=False)
+ log.debug(f"Received websocket message: %s", message)
+ return message
+
+ def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+ data = (
+ event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+ if isinstance(event, BaseModel)
+ else json.dumps(maybe_transform(event, RealtimeClientEventParam))
+ )
+ self._connection.send(data)
+
+ def close(self, *, code: int = 1000, reason: str = "") -> None:
+ self._connection.close(code=code, reason=reason)
+
+ def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+ """
+ Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+ This is helpful if you're using `.recv_bytes()`.
+ """
+ return cast(
+ RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+ )
+
+
+class RealtimeConnectionManager:
+ """
+ Context manager over a `RealtimeConnection` that is returned by `realtime.connect()`
+
+ This context manager ensures that the connection will be closed when it exits.
+
+ ---
+
+ Note that if your application doesn't work well with the context manager approach then you
+ can call the `.enter()` method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = client.realtime.connect(...).enter()
+ # ...
+ connection.close()
+ ```
+ """
+
+ def __init__(
+ self,
+ *,
+ client: OpenAI,
+ model: str,
+ extra_query: Query,
+ extra_headers: Headers,
+ websocket_connection_options: WebsocketConnectionOptions,
+ ) -> None:
+ self.__client = client
+ self.__model = model
+ self.__connection: RealtimeConnection | None = None
+ self.__extra_query = extra_query
+ self.__extra_headers = extra_headers
+ self.__websocket_connection_options = websocket_connection_options
+
+ def __enter__(self) -> RealtimeConnection:
+ """
+ 👋 If your application doesn't work well with the context manager approach then you
+ can call this method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = client.realtime.connect(...).enter()
+ # ...
+ connection.close()
+ ```
+ """
+ try:
+ from websockets.sync.client import connect
+ except ImportError as exc:
+ raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+ extra_query = self.__extra_query
+ auth_headers = self.__client.auth_headers
+ if is_azure_client(self.__client):
+ url, auth_headers = self.__client._configure_realtime(self.__model, extra_query)
+ else:
+ url = self._prepare_url().copy_with(
+ params={
+ **self.__client.base_url.params,
+ "model": self.__model,
+ **extra_query,
+ },
+ )
+ log.debug("Connecting to %s", url)
+ if self.__websocket_connection_options:
+ log.debug("Connection options: %s", self.__websocket_connection_options)
+
+ self.__connection = RealtimeConnection(
+ connect(
+ str(url),
+ user_agent_header=self.__client.user_agent,
+ additional_headers=_merge_mappings(
+ {
+ **auth_headers,
+ },
+ self.__extra_headers,
+ ),
+ **self.__websocket_connection_options,
+ )
+ )
+
+ return self.__connection
+
+ enter = __enter__
+
+ def _prepare_url(self) -> httpx.URL:
+ if self.__client.websocket_base_url is not None:
+ base_url = httpx.URL(self.__client.websocket_base_url)
+ else:
+ base_url = self.__client._base_url.copy_with(scheme="wss")
+
+ merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+ return base_url.copy_with(raw_path=merge_raw_path)
+
+ def __exit__(
+ self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+ ) -> None:
+ if self.__connection is not None:
+ self.__connection.close()
+
+
+class BaseRealtimeConnectionResource:
+ def __init__(self, connection: RealtimeConnection) -> None:
+ self._connection = connection
+
+
+class RealtimeSessionResource(BaseRealtimeConnectionResource):
+ def update(self, *, session: RealtimeSessionCreateRequestParam, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event to update the session’s default configuration.
+ The client may send this event at any time to update any field,
+ except for `voice`. However, note that once a session has been
+ initialized with a particular `model`, it can’t be changed to
+ another model using `session.update`.
+
+ When the server receives a `session.update`, it will respond
+ with a `session.updated` event showing the full, effective configuration.
+ Only the fields that are present are updated. To clear a field like
+ `instructions`, pass an empty string.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+ )
+ )
+
+
+class RealtimeResponseResource(BaseRealtimeConnectionResource):
+ def create(
+ self,
+ *,
+ event_id: str | NotGiven = NOT_GIVEN,
+ response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ This event instructs the server to create a Response, which means triggering
+ model inference. When in Server VAD mode, the server will create Responses
+ automatically.
+
+ A Response will include at least one Item, and may have two, in which case
+ the second will be a function call. These Items will be appended to the
+ conversation history.
+
+ The server will respond with a `response.created` event, events for Items
+ and content created, and finally a `response.done` event to indicate the
+ Response is complete.
+
+ The `response.create` event includes inference configuration like
+ `instructions`, and `temperature`. These fields will override the Session's
+ configuration for this Response only.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+ )
+ )
+
+ def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to cancel an in-progress response.
+
+ The server will respond
+ with a `response.done` event with a status of `response.status=cancelled`. If
+ there is no response to cancel, the server will respond with an error.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+ )
+ )
+
+
+class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
+ def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to clear the audio bytes in the buffer.
+
+ The server will
+ respond with an `input_audio_buffer.cleared` event.
+ """
+ self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+ )
+
+ def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event to commit the user input audio buffer, which will create a
+ new user message item in the conversation. This event will produce an error
+ if the input audio buffer is empty. When in Server VAD mode, the client does
+ not need to send this event, the server will commit the audio buffer
+ automatically.
+
+ Committing the input audio buffer will trigger input audio transcription
+ (if enabled in session configuration), but it will not create a response
+ from the model. The server will respond with an `input_audio_buffer.committed`
+ event.
+ """
+ self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+ )
+
+ def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to append audio bytes to the input audio buffer.
+
+ The audio
+ buffer is temporary storage you can write to and later commit. In Server VAD
+ mode, the audio buffer is used to detect speech and the server will decide
+ when to commit. When Server VAD is disabled, you must commit the audio buffer
+ manually.
+
+ The client may choose how much audio to place in each event up to a maximum
+ of 15 MiB, for example streaming smaller chunks from the client may allow the
+ VAD to be more responsive. Unlike made other client events, the server will
+ not send a confirmation response to this event.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+ )
+ )
+
+
+class RealtimeConversationResource(BaseRealtimeConnectionResource):
+ @cached_property
+ def item(self) -> RealtimeConversationItemResource:
+ return RealtimeConversationItemResource(self._connection)
+
+
+class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
+ def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event when you want to remove any item from the conversation
+ history.
+
+ The server will respond with a `conversation.item.deleted` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+ def create(
+ self,
+ *,
+ item: ConversationItemParam,
+ event_id: str | NotGiven = NOT_GIVEN,
+ previous_item_id: str | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ Add a new Item to the Conversation's context, including messages, function
+ calls, and function call responses. This event can be used both to populate a
+ "history" of the conversation and to add new items mid-stream, but has the
+ current limitation that it cannot populate assistant audio messages.
+
+ If successful, the server will respond with a `conversation.item.created`
+ event, otherwise an `error` event will be sent.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.create",
+ "item": item,
+ "event_id": event_id,
+ "previous_item_id": previous_item_id,
+ }
+ ),
+ )
+ )
+
+ def truncate(
+ self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to truncate a previous assistant message’s audio.
+
+ The server
+ will produce audio faster than realtime, so this event is useful when the user
+ interrupts to truncate audio that has already been sent to the client but not
+ yet played. This will synchronize the server's understanding of the audio with
+ the client's playback.
+
+ Truncating audio will delete the server-side text transcript to ensure there
+ is not text in the context that hasn't been heard by the user.
+
+ If successful, the server will respond with a `conversation.item.truncated`
+ event.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.truncate",
+ "audio_end_ms": audio_end_ms,
+ "content_index": content_index,
+ "item_id": item_id,
+ "event_id": event_id,
+ }
+ ),
+ )
+ )
+
+ def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+ The server will respond with a `conversation.item.retrieved` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+
+class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
+ def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """**WebRTC Only:** Emit to cut off the current audio response.
+
+ This will trigger the server to
+ stop generating audio and emit a `output_audio_buffer.cleared` event. This
+ event should be preceded by a `response.cancel` client event to stop the
+ generation of the current response.
+ [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+ """
+ self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
+ )
+
+
+class RealtimeTranscriptionSessionResource(BaseRealtimeConnectionResource):
+ def update(
+ self, *, session: RealtimeTranscriptionSessionCreateRequestParam, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to update a transcription session."""
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
+ )
+ )
+
+
+class BaseAsyncRealtimeConnectionResource:
+ def __init__(self, connection: AsyncRealtimeConnection) -> None:
+ self._connection = connection
+
+
+class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
+ async def update(self, *, session: RealtimeSessionCreateRequestParam, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event to update the session’s default configuration.
+ The client may send this event at any time to update any field,
+ except for `voice`. However, note that once a session has been
+ initialized with a particular `model`, it can’t be changed to
+ another model using `session.update`.
+
+ When the server receives a `session.update`, it will respond
+ with a `session.updated` event showing the full, effective configuration.
+ Only the fields that are present are updated. To clear a field like
+ `instructions`, pass an empty string.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+ )
+ )
+
+
+class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
+ async def create(
+ self,
+ *,
+ event_id: str | NotGiven = NOT_GIVEN,
+ response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ This event instructs the server to create a Response, which means triggering
+ model inference. When in Server VAD mode, the server will create Responses
+ automatically.
+
+ A Response will include at least one Item, and may have two, in which case
+ the second will be a function call. These Items will be appended to the
+ conversation history.
+
+ The server will respond with a `response.created` event, events for Items
+ and content created, and finally a `response.done` event to indicate the
+ Response is complete.
+
+ The `response.create` event includes inference configuration like
+ `instructions`, and `temperature`. These fields will override the Session's
+ configuration for this Response only.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+ )
+ )
+
+ async def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to cancel an in-progress response.
+
+ The server will respond
+ with a `response.done` event with a status of `response.status=cancelled`. If
+ there is no response to cancel, the server will respond with an error.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+ )
+ )
+
+
+class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
+ async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to clear the audio bytes in the buffer.
+
+ The server will
+ respond with an `input_audio_buffer.cleared` event.
+ """
+ await self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+ )
+
+ async def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event to commit the user input audio buffer, which will create a
+ new user message item in the conversation. This event will produce an error
+ if the input audio buffer is empty. When in Server VAD mode, the client does
+ not need to send this event, the server will commit the audio buffer
+ automatically.
+
+ Committing the input audio buffer will trigger input audio transcription
+ (if enabled in session configuration), but it will not create a response
+ from the model. The server will respond with an `input_audio_buffer.committed`
+ event.
+ """
+ await self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+ )
+
+ async def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to append audio bytes to the input audio buffer.
+
+ The audio
+ buffer is temporary storage you can write to and later commit. In Server VAD
+ mode, the audio buffer is used to detect speech and the server will decide
+ when to commit. When Server VAD is disabled, you must commit the audio buffer
+ manually.
+
+ The client may choose how much audio to place in each event up to a maximum
+ of 15 MiB, for example streaming smaller chunks from the client may allow the
+ VAD to be more responsive. Unlike made other client events, the server will
+ not send a confirmation response to this event.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+ )
+ )
+
+
+class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
+ @cached_property
+ def item(self) -> AsyncRealtimeConversationItemResource:
+ return AsyncRealtimeConversationItemResource(self._connection)
+
+
+class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
+ async def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event when you want to remove any item from the conversation
+ history.
+
+ The server will respond with a `conversation.item.deleted` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+ async def create(
+ self,
+ *,
+ item: ConversationItemParam,
+ event_id: str | NotGiven = NOT_GIVEN,
+ previous_item_id: str | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ Add a new Item to the Conversation's context, including messages, function
+ calls, and function call responses. This event can be used both to populate a
+ "history" of the conversation and to add new items mid-stream, but has the
+ current limitation that it cannot populate assistant audio messages.
+
+ If successful, the server will respond with a `conversation.item.created`
+ event, otherwise an `error` event will be sent.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.create",
+ "item": item,
+ "event_id": event_id,
+ "previous_item_id": previous_item_id,
+ }
+ ),
+ )
+ )
+
+ async def truncate(
+ self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to truncate a previous assistant message’s audio.
+
+ The server
+ will produce audio faster than realtime, so this event is useful when the user
+ interrupts to truncate audio that has already been sent to the client but not
+ yet played. This will synchronize the server's understanding of the audio with
+ the client's playback.
+
+ Truncating audio will delete the server-side text transcript to ensure there
+ is not text in the context that hasn't been heard by the user.
+
+ If successful, the server will respond with a `conversation.item.truncated`
+ event.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.truncate",
+ "audio_end_ms": audio_end_ms,
+ "content_index": content_index,
+ "item_id": item_id,
+ "event_id": event_id,
+ }
+ ),
+ )
+ )
+
+ async def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+ The server will respond with a `conversation.item.retrieved` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+
+class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
+ async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """**WebRTC Only:** Emit to cut off the current audio response.
+
+ This will trigger the server to
+ stop generating audio and emit a `output_audio_buffer.cleared` event. This
+ event should be preceded by a `response.cancel` client event to stop the
+ generation of the current response.
+ [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+ """
+ await self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
+ )
+
+
+class AsyncRealtimeTranscriptionSessionResource(BaseAsyncRealtimeConnectionResource):
+ async def update(
+ self, *, session: RealtimeTranscriptionSessionCreateRequestParam, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to update a transcription session."""
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
+ )
+ )
src/openai/resources/responses/responses.py
@@ -260,7 +260,7 @@ class Responses(SyncAPIResource):
tools: An array of tools the model may call while generating a response. You can
specify which tool to use by setting the `tool_choice` parameter.
- The two categories of tools you can provide the model are:
+ We support the following categories of tools:
- **Built-in tools**: Tools that are provided by OpenAI that extend the model's
capabilities, like
@@ -268,6 +268,9 @@ class Responses(SyncAPIResource):
[file search](https://platform.openai.com/docs/guides/tools-file-search).
Learn more about
[built-in tools](https://platform.openai.com/docs/guides/tools).
+ - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+ predefined connectors such as Google Drive and Notion. Learn more about
+ [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
- **Function calls (custom tools)**: Functions that are defined by you, enabling
the model to call your own code with strongly typed arguments and outputs.
Learn more about
@@ -496,7 +499,7 @@ class Responses(SyncAPIResource):
tools: An array of tools the model may call while generating a response. You can
specify which tool to use by setting the `tool_choice` parameter.
- The two categories of tools you can provide the model are:
+ We support the following categories of tools:
- **Built-in tools**: Tools that are provided by OpenAI that extend the model's
capabilities, like
@@ -504,6 +507,9 @@ class Responses(SyncAPIResource):
[file search](https://platform.openai.com/docs/guides/tools-file-search).
Learn more about
[built-in tools](https://platform.openai.com/docs/guides/tools).
+ - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+ predefined connectors such as Google Drive and Notion. Learn more about
+ [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
- **Function calls (custom tools)**: Functions that are defined by you, enabling
the model to call your own code with strongly typed arguments and outputs.
Learn more about
@@ -732,7 +738,7 @@ class Responses(SyncAPIResource):
tools: An array of tools the model may call while generating a response. You can
specify which tool to use by setting the `tool_choice` parameter.
- The two categories of tools you can provide the model are:
+ We support the following categories of tools:
- **Built-in tools**: Tools that are provided by OpenAI that extend the model's
capabilities, like
@@ -740,6 +746,9 @@ class Responses(SyncAPIResource):
[file search](https://platform.openai.com/docs/guides/tools-file-search).
Learn more about
[built-in tools](https://platform.openai.com/docs/guides/tools).
+ - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+ predefined connectors such as Google Drive and Notion. Learn more about
+ [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
- **Function calls (custom tools)**: Functions that are defined by you, enabling
the model to call your own code with strongly typed arguments and outputs.
Learn more about
@@ -1682,7 +1691,7 @@ class AsyncResponses(AsyncAPIResource):
tools: An array of tools the model may call while generating a response. You can
specify which tool to use by setting the `tool_choice` parameter.
- The two categories of tools you can provide the model are:
+ We support the following categories of tools:
- **Built-in tools**: Tools that are provided by OpenAI that extend the model's
capabilities, like
@@ -1690,6 +1699,9 @@ class AsyncResponses(AsyncAPIResource):
[file search](https://platform.openai.com/docs/guides/tools-file-search).
Learn more about
[built-in tools](https://platform.openai.com/docs/guides/tools).
+ - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+ predefined connectors such as Google Drive and Notion. Learn more about
+ [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
- **Function calls (custom tools)**: Functions that are defined by you, enabling
the model to call your own code with strongly typed arguments and outputs.
Learn more about
@@ -1918,7 +1930,7 @@ class AsyncResponses(AsyncAPIResource):
tools: An array of tools the model may call while generating a response. You can
specify which tool to use by setting the `tool_choice` parameter.
- The two categories of tools you can provide the model are:
+ We support the following categories of tools:
- **Built-in tools**: Tools that are provided by OpenAI that extend the model's
capabilities, like
@@ -1926,6 +1938,9 @@ class AsyncResponses(AsyncAPIResource):
[file search](https://platform.openai.com/docs/guides/tools-file-search).
Learn more about
[built-in tools](https://platform.openai.com/docs/guides/tools).
+ - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+ predefined connectors such as Google Drive and Notion. Learn more about
+ [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
- **Function calls (custom tools)**: Functions that are defined by you, enabling
the model to call your own code with strongly typed arguments and outputs.
Learn more about
@@ -2154,7 +2169,7 @@ class AsyncResponses(AsyncAPIResource):
tools: An array of tools the model may call while generating a response. You can
specify which tool to use by setting the `tool_choice` parameter.
- The two categories of tools you can provide the model are:
+ We support the following categories of tools:
- **Built-in tools**: Tools that are provided by OpenAI that extend the model's
capabilities, like
@@ -2162,6 +2177,9 @@ class AsyncResponses(AsyncAPIResource):
[file search](https://platform.openai.com/docs/guides/tools-file-search).
Learn more about
[built-in tools](https://platform.openai.com/docs/guides/tools).
+ - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+ predefined connectors such as Google Drive and Notion. Learn more about
+ [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
- **Function calls (custom tools)**: Functions that are defined by you, enabling
the model to call your own code with strongly typed arguments and outputs.
Learn more about
src/openai/types/audio/speech_create_params.py
@@ -20,7 +20,9 @@ class SpeechCreateParams(TypedDict, total=False):
`tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
"""
- voice: Required[Union[str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]]]
+ voice: Required[
+ Union[str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]]
+ ]
"""The voice to use when generating the audio.
Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `onyx`,
src/openai/types/chat/chat_completion_audio_param.py
@@ -15,7 +15,9 @@ class ChatCompletionAudioParam(TypedDict, total=False):
Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.
"""
- voice: Required[Union[str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]]]
+ voice: Required[
+ Union[str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]]
+ ]
"""The voice the model uses to respond.
Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `nova`,
src/openai/types/realtime/__init__.py
@@ -0,0 +1,184 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .realtime_error import RealtimeError as RealtimeError
+from .realtime_session import RealtimeSession as RealtimeSession
+from .conversation_item import ConversationItem as ConversationItem
+from .realtime_response import RealtimeResponse as RealtimeResponse
+from .log_prob_properties import LogProbProperties as LogProbProperties
+from .realtime_truncation import RealtimeTruncation as RealtimeTruncation
+from .response_done_event import ResponseDoneEvent as ResponseDoneEvent
+from .realtime_error_event import RealtimeErrorEvent as RealtimeErrorEvent
+from .session_update_event import SessionUpdateEvent as SessionUpdateEvent
+from .mcp_list_tools_failed import McpListToolsFailed as McpListToolsFailed
+from .realtime_audio_config import RealtimeAudioConfig as RealtimeAudioConfig
+from .realtime_client_event import RealtimeClientEvent as RealtimeClientEvent
+from .realtime_server_event import RealtimeServerEvent as RealtimeServerEvent
+from .realtime_tools_config import RealtimeToolsConfig as RealtimeToolsConfig
+from .response_cancel_event import ResponseCancelEvent as ResponseCancelEvent
+from .response_create_event import ResponseCreateEvent as ResponseCreateEvent
+from .session_created_event import SessionCreatedEvent as SessionCreatedEvent
+from .session_updated_event import SessionUpdatedEvent as SessionUpdatedEvent
+from .conversation_item_done import ConversationItemDone as ConversationItemDone
+from .realtime_mcp_tool_call import RealtimeMcpToolCall as RealtimeMcpToolCall
+from .realtime_mcphttp_error import RealtimeMcphttpError as RealtimeMcphttpError
+from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
+from .conversation_item_added import ConversationItemAdded as ConversationItemAdded
+from .conversation_item_param import ConversationItemParam as ConversationItemParam
+from .realtime_connect_params import RealtimeConnectParams as RealtimeConnectParams
+from .realtime_mcp_list_tools import RealtimeMcpListTools as RealtimeMcpListTools
+from .realtime_response_usage import RealtimeResponseUsage as RealtimeResponseUsage
+from .realtime_tracing_config import RealtimeTracingConfig as RealtimeTracingConfig
+from .mcp_list_tools_completed import McpListToolsCompleted as McpListToolsCompleted
+from .realtime_response_status import RealtimeResponseStatus as RealtimeResponseStatus
+from .response_mcp_call_failed import ResponseMcpCallFailed as ResponseMcpCallFailed
+from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
+from .rate_limits_updated_event import RateLimitsUpdatedEvent as RateLimitsUpdatedEvent
+from .realtime_truncation_param import RealtimeTruncationParam as RealtimeTruncationParam
+from .response_audio_done_event import ResponseAudioDoneEvent as ResponseAudioDoneEvent
+from .response_text_delta_event import ResponseTextDeltaEvent as ResponseTextDeltaEvent
+from .conversation_created_event import ConversationCreatedEvent as ConversationCreatedEvent
+from .mcp_list_tools_in_progress import McpListToolsInProgress as McpListToolsInProgress
+from .response_audio_delta_event import ResponseAudioDeltaEvent as ResponseAudioDeltaEvent
+from .session_update_event_param import SessionUpdateEventParam as SessionUpdateEventParam
+from .client_secret_create_params import ClientSecretCreateParams as ClientSecretCreateParams
+from .realtime_audio_config_param import RealtimeAudioConfigParam as RealtimeAudioConfigParam
+from .realtime_client_event_param import RealtimeClientEventParam as RealtimeClientEventParam
+from .realtime_mcp_protocol_error import RealtimeMcpProtocolError as RealtimeMcpProtocolError
+from .realtime_tool_choice_config import RealtimeToolChoiceConfig as RealtimeToolChoiceConfig
+from .realtime_tools_config_param import RealtimeToolsConfigParam as RealtimeToolsConfigParam
+from .realtime_tools_config_union import RealtimeToolsConfigUnion as RealtimeToolsConfigUnion
+from .response_cancel_event_param import ResponseCancelEventParam as ResponseCancelEventParam
+from .response_create_event_param import ResponseCreateEventParam as ResponseCreateEventParam
+from .response_mcp_call_completed import ResponseMcpCallCompleted as ResponseMcpCallCompleted
+from .realtime_mcp_tool_call_param import RealtimeMcpToolCallParam as RealtimeMcpToolCallParam
+from .realtime_mcphttp_error_param import RealtimeMcphttpErrorParam as RealtimeMcphttpErrorParam
+from .transcription_session_update import TranscriptionSessionUpdate as TranscriptionSessionUpdate
+from .client_secret_create_response import ClientSecretCreateResponse as ClientSecretCreateResponse
+from .realtime_client_secret_config import RealtimeClientSecretConfig as RealtimeClientSecretConfig
+from .realtime_mcp_approval_request import RealtimeMcpApprovalRequest as RealtimeMcpApprovalRequest
+from .realtime_mcp_list_tools_param import RealtimeMcpListToolsParam as RealtimeMcpListToolsParam
+from .realtime_tracing_config_param import RealtimeTracingConfigParam as RealtimeTracingConfigParam
+from .response_mcp_call_in_progress import ResponseMcpCallInProgress as ResponseMcpCallInProgress
+from .transcription_session_created import TranscriptionSessionCreated as TranscriptionSessionCreated
+from .conversation_item_create_event import ConversationItemCreateEvent as ConversationItemCreateEvent
+from .conversation_item_delete_event import ConversationItemDeleteEvent as ConversationItemDeleteEvent
+from .input_audio_buffer_clear_event import InputAudioBufferClearEvent as InputAudioBufferClearEvent
+from .realtime_mcp_approval_response import RealtimeMcpApprovalResponse as RealtimeMcpApprovalResponse
+from .conversation_item_created_event import ConversationItemCreatedEvent as ConversationItemCreatedEvent
+from .conversation_item_deleted_event import ConversationItemDeletedEvent as ConversationItemDeletedEvent
+from .input_audio_buffer_append_event import InputAudioBufferAppendEvent as InputAudioBufferAppendEvent
+from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent as InputAudioBufferCommitEvent
+from .output_audio_buffer_clear_event import OutputAudioBufferClearEvent as OutputAudioBufferClearEvent
+from .realtime_session_create_request import RealtimeSessionCreateRequest as RealtimeSessionCreateRequest
+from .response_output_item_done_event import ResponseOutputItemDoneEvent as ResponseOutputItemDoneEvent
+from .conversation_item_retrieve_event import ConversationItemRetrieveEvent as ConversationItemRetrieveEvent
+from .conversation_item_truncate_event import ConversationItemTruncateEvent as ConversationItemTruncateEvent
+from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent as InputAudioBufferClearedEvent
+from .realtime_session_create_response import RealtimeSessionCreateResponse as RealtimeSessionCreateResponse
+from .response_content_part_done_event import ResponseContentPartDoneEvent as ResponseContentPartDoneEvent
+from .response_mcp_call_arguments_done import ResponseMcpCallArgumentsDone as ResponseMcpCallArgumentsDone
+from .response_output_item_added_event import ResponseOutputItemAddedEvent as ResponseOutputItemAddedEvent
+from .conversation_item_truncated_event import ConversationItemTruncatedEvent as ConversationItemTruncatedEvent
+from .realtime_mcp_protocol_error_param import RealtimeMcpProtocolErrorParam as RealtimeMcpProtocolErrorParam
+from .realtime_mcp_tool_execution_error import RealtimeMcpToolExecutionError as RealtimeMcpToolExecutionError
+from .realtime_tool_choice_config_param import RealtimeToolChoiceConfigParam as RealtimeToolChoiceConfigParam
+from .realtime_tools_config_union_param import RealtimeToolsConfigUnionParam as RealtimeToolsConfigUnionParam
+from .response_content_part_added_event import ResponseContentPartAddedEvent as ResponseContentPartAddedEvent
+from .response_mcp_call_arguments_delta import ResponseMcpCallArgumentsDelta as ResponseMcpCallArgumentsDelta
+from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent as InputAudioBufferCommittedEvent
+from .transcription_session_update_param import TranscriptionSessionUpdateParam as TranscriptionSessionUpdateParam
+from .realtime_client_secret_config_param import RealtimeClientSecretConfigParam as RealtimeClientSecretConfigParam
+from .realtime_mcp_approval_request_param import RealtimeMcpApprovalRequestParam as RealtimeMcpApprovalRequestParam
+from .transcription_session_updated_event import TranscriptionSessionUpdatedEvent as TranscriptionSessionUpdatedEvent
+from .conversation_item_create_event_param import ConversationItemCreateEventParam as ConversationItemCreateEventParam
+from .conversation_item_delete_event_param import ConversationItemDeleteEventParam as ConversationItemDeleteEventParam
+from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam as InputAudioBufferClearEventParam
+from .input_audio_buffer_timeout_triggered import InputAudioBufferTimeoutTriggered as InputAudioBufferTimeoutTriggered
+from .realtime_mcp_approval_response_param import RealtimeMcpApprovalResponseParam as RealtimeMcpApprovalResponseParam
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
+from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam as InputAudioBufferAppendEventParam
+from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam as InputAudioBufferCommitEventParam
+from .output_audio_buffer_clear_event_param import OutputAudioBufferClearEventParam as OutputAudioBufferClearEventParam
+from .realtime_session_create_request_param import (
+ RealtimeSessionCreateRequestParam as RealtimeSessionCreateRequestParam,
+)
+from .response_audio_transcript_delta_event import (
+ ResponseAudioTranscriptDeltaEvent as ResponseAudioTranscriptDeltaEvent,
+)
+from .conversation_item_retrieve_event_param import (
+ ConversationItemRetrieveEventParam as ConversationItemRetrieveEventParam,
+)
+from .conversation_item_truncate_event_param import (
+ ConversationItemTruncateEventParam as ConversationItemTruncateEventParam,
+)
+from .input_audio_buffer_speech_started_event import (
+ InputAudioBufferSpeechStartedEvent as InputAudioBufferSpeechStartedEvent,
+)
+from .input_audio_buffer_speech_stopped_event import (
+ InputAudioBufferSpeechStoppedEvent as InputAudioBufferSpeechStoppedEvent,
+)
+from .realtime_conversation_item_user_message import (
+ RealtimeConversationItemUserMessage as RealtimeConversationItemUserMessage,
+)
+from .realtime_mcp_tool_execution_error_param import (
+ RealtimeMcpToolExecutionErrorParam as RealtimeMcpToolExecutionErrorParam,
+)
+from .realtime_conversation_item_function_call import (
+ RealtimeConversationItemFunctionCall as RealtimeConversationItemFunctionCall,
+)
+from .realtime_conversation_item_system_message import (
+ RealtimeConversationItemSystemMessage as RealtimeConversationItemSystemMessage,
+)
+from .realtime_response_usage_input_token_details import (
+ RealtimeResponseUsageInputTokenDetails as RealtimeResponseUsageInputTokenDetails,
+)
+from .response_function_call_arguments_done_event import (
+ ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
+)
+from .realtime_conversation_item_assistant_message import (
+ RealtimeConversationItemAssistantMessage as RealtimeConversationItemAssistantMessage,
+)
+from .realtime_response_usage_output_token_details import (
+ RealtimeResponseUsageOutputTokenDetails as RealtimeResponseUsageOutputTokenDetails,
+)
+from .response_function_call_arguments_delta_event import (
+ ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
+)
+from .realtime_conversation_item_user_message_param import (
+ RealtimeConversationItemUserMessageParam as RealtimeConversationItemUserMessageParam,
+)
+from .realtime_transcription_session_create_request import (
+ RealtimeTranscriptionSessionCreateRequest as RealtimeTranscriptionSessionCreateRequest,
+)
+from .realtime_conversation_item_function_call_param import (
+ RealtimeConversationItemFunctionCallParam as RealtimeConversationItemFunctionCallParam,
+)
+from .realtime_conversation_item_function_call_output import (
+ RealtimeConversationItemFunctionCallOutput as RealtimeConversationItemFunctionCallOutput,
+)
+from .realtime_conversation_item_system_message_param import (
+ RealtimeConversationItemSystemMessageParam as RealtimeConversationItemSystemMessageParam,
+)
+from .realtime_conversation_item_assistant_message_param import (
+ RealtimeConversationItemAssistantMessageParam as RealtimeConversationItemAssistantMessageParam,
+)
+from .conversation_item_input_audio_transcription_segment import (
+ ConversationItemInputAudioTranscriptionSegment as ConversationItemInputAudioTranscriptionSegment,
+)
+from .realtime_transcription_session_create_request_param import (
+ RealtimeTranscriptionSessionCreateRequestParam as RealtimeTranscriptionSessionCreateRequestParam,
+)
+from .realtime_conversation_item_function_call_output_param import (
+ RealtimeConversationItemFunctionCallOutputParam as RealtimeConversationItemFunctionCallOutputParam,
+)
+from .conversation_item_input_audio_transcription_delta_event import (
+ ConversationItemInputAudioTranscriptionDeltaEvent as ConversationItemInputAudioTranscriptionDeltaEvent,
+)
+from .conversation_item_input_audio_transcription_failed_event import (
+ ConversationItemInputAudioTranscriptionFailedEvent as ConversationItemInputAudioTranscriptionFailedEvent,
+)
+from .conversation_item_input_audio_transcription_completed_event import (
+ ConversationItemInputAudioTranscriptionCompletedEvent as ConversationItemInputAudioTranscriptionCompletedEvent,
+)
src/openai/types/realtime/client_secret_create_params.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias, TypedDict
+
+from .realtime_session_create_request_param import RealtimeSessionCreateRequestParam
+from .realtime_transcription_session_create_request_param import RealtimeTranscriptionSessionCreateRequestParam
+
+__all__ = ["ClientSecretCreateParams", "ExpiresAfter", "Session"]
+
+
+class ClientSecretCreateParams(TypedDict, total=False):
+ expires_after: ExpiresAfter
+ """Configuration for the ephemeral token expiration."""
+
+ session: Session
+ """Session configuration to use for the client secret.
+
+ Choose either a realtime session or a transcription session.
+ """
+
+
+class ExpiresAfter(TypedDict, total=False):
+ anchor: Literal["created_at"]
+ """The anchor point for the ephemeral token expiration.
+
+ Only `created_at` is currently supported.
+ """
+
+ seconds: int
+ """The number of seconds from the anchor point to the expiration.
+
+ Select a value between `10` and `7200`.
+ """
+
+
+Session: TypeAlias = Union[RealtimeSessionCreateRequestParam, RealtimeTranscriptionSessionCreateRequestParam]
src/openai/types/realtime/client_secret_create_response.py
@@ -0,0 +1,110 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .realtime_session_create_response import RealtimeSessionCreateResponse
+
+__all__ = [
+ "ClientSecretCreateResponse",
+ "Session",
+ "SessionRealtimeTranscriptionSessionCreateResponse",
+ "SessionRealtimeTranscriptionSessionCreateResponseAudio",
+ "SessionRealtimeTranscriptionSessionCreateResponseAudioInput",
+ "SessionRealtimeTranscriptionSessionCreateResponseAudioInputNoiseReduction",
+ "SessionRealtimeTranscriptionSessionCreateResponseAudioInputTranscription",
+ "SessionRealtimeTranscriptionSessionCreateResponseAudioInputTurnDetection",
+]
+
+
+class SessionRealtimeTranscriptionSessionCreateResponseAudioInputNoiseReduction(BaseModel):
+ type: Optional[Literal["near_field", "far_field"]] = None
+
+
+class SessionRealtimeTranscriptionSessionCreateResponseAudioInputTranscription(BaseModel):
+ language: Optional[str] = None
+ """The language of the input audio.
+
+ Supplying the input language in
+ [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+ format will improve accuracy and latency.
+ """
+
+ model: Optional[Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"]] = None
+ """The model to use for transcription.
+
+ Can be `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, or `whisper-1`.
+ """
+
+ prompt: Optional[str] = None
+ """An optional text to guide the model's style or continue a previous audio
+ segment.
+
+ The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+ should match the audio language.
+ """
+
+
+class SessionRealtimeTranscriptionSessionCreateResponseAudioInputTurnDetection(BaseModel):
+ prefix_padding_ms: Optional[int] = None
+
+ silence_duration_ms: Optional[int] = None
+
+ threshold: Optional[float] = None
+
+ type: Optional[str] = None
+ """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class SessionRealtimeTranscriptionSessionCreateResponseAudioInput(BaseModel):
+ format: Optional[str] = None
+ """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ noise_reduction: Optional[SessionRealtimeTranscriptionSessionCreateResponseAudioInputNoiseReduction] = None
+ """Configuration for input audio noise reduction."""
+
+ transcription: Optional[SessionRealtimeTranscriptionSessionCreateResponseAudioInputTranscription] = None
+ """Configuration of the transcription model."""
+
+ turn_detection: Optional[SessionRealtimeTranscriptionSessionCreateResponseAudioInputTurnDetection] = None
+ """Configuration for turn detection."""
+
+
+class SessionRealtimeTranscriptionSessionCreateResponseAudio(BaseModel):
+ input: Optional[SessionRealtimeTranscriptionSessionCreateResponseAudioInput] = None
+
+
+class SessionRealtimeTranscriptionSessionCreateResponse(BaseModel):
+ id: Optional[str] = None
+ """Unique identifier for the session that looks like `sess_1234567890abcdef`."""
+
+ audio: Optional[SessionRealtimeTranscriptionSessionCreateResponseAudio] = None
+ """Configuration for input audio for the session."""
+
+ expires_at: Optional[int] = None
+ """Expiration timestamp for the session, in seconds since epoch."""
+
+ include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
+ """Additional fields to include in server outputs.
+
+ - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+ transcription.
+ """
+
+ object: Optional[str] = None
+ """The object type. Always `realtime.transcription_session`."""
+
+
+Session: TypeAlias = Union[RealtimeSessionCreateResponse, SessionRealtimeTranscriptionSessionCreateResponse]
+
+
+class ClientSecretCreateResponse(BaseModel):
+ expires_at: int
+ """Expiration timestamp for the client secret, in seconds since epoch."""
+
+ session: Session
+ """The session configuration for either a realtime or transcription session."""
+
+ value: str
+ """The generated client secret value."""
src/openai/types/realtime/conversation_created_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationCreatedEvent", "Conversation"]
+
+
+class Conversation(BaseModel):
+ id: Optional[str] = None
+ """The unique ID of the conversation."""
+
+ object: Optional[Literal["realtime.conversation"]] = None
+ """The object type, must be `realtime.conversation`."""
+
+
+class ConversationCreatedEvent(BaseModel):
+ conversation: Conversation
+ """The conversation resource."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ type: Literal["conversation.created"]
+ """The event type, must be `conversation.created`."""
src/openai/types/realtime/conversation_item.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .realtime_mcp_tool_call import RealtimeMcpToolCall
+from .realtime_mcp_list_tools import RealtimeMcpListTools
+from .realtime_mcp_approval_request import RealtimeMcpApprovalRequest
+from .realtime_mcp_approval_response import RealtimeMcpApprovalResponse
+from .realtime_conversation_item_user_message import RealtimeConversationItemUserMessage
+from .realtime_conversation_item_function_call import RealtimeConversationItemFunctionCall
+from .realtime_conversation_item_system_message import RealtimeConversationItemSystemMessage
+from .realtime_conversation_item_assistant_message import RealtimeConversationItemAssistantMessage
+from .realtime_conversation_item_function_call_output import RealtimeConversationItemFunctionCallOutput
+
+__all__ = ["ConversationItem"]
+
+ConversationItem: TypeAlias = Annotated[
+ Union[
+ RealtimeConversationItemSystemMessage,
+ RealtimeConversationItemUserMessage,
+ RealtimeConversationItemAssistantMessage,
+ RealtimeConversationItemFunctionCall,
+ RealtimeConversationItemFunctionCallOutput,
+ RealtimeMcpApprovalResponse,
+ RealtimeMcpListTools,
+ RealtimeMcpToolCall,
+ RealtimeMcpApprovalRequest,
+ ],
+ PropertyInfo(discriminator="type"),
+]
src/openai/types/realtime/conversation_item_added.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemAdded"]
+
+
+class ConversationItemAdded(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item: ConversationItem
+ """A single item within a Realtime conversation."""
+
+ type: Literal["conversation.item.added"]
+ """The event type, must be `conversation.item.added`."""
+
+ previous_item_id: Optional[str] = None
+ """The ID of the item that precedes this one, if any.
+
+ This is used to maintain ordering when items are inserted.
+ """
src/openai/types/realtime/conversation_item_create_event.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemCreateEvent"]
+
+
+class ConversationItemCreateEvent(BaseModel):
+ item: ConversationItem
+ """A single item within a Realtime conversation."""
+
+ type: Literal["conversation.item.create"]
+ """The event type, must be `conversation.item.create`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
+
+ previous_item_id: Optional[str] = None
+ """The ID of the preceding item after which the new item will be inserted.
+
+ If not set, the new item will be appended to the end of the conversation. If set
+ to `root`, the new item will be added to the beginning of the conversation. If
+ set to an existing ID, it allows an item to be inserted mid-conversation. If the
+ ID cannot be found, an error will be returned and the item will not be added.
+ """
src/openai/types/realtime/conversation_item_create_event_param.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .conversation_item_param import ConversationItemParam
+
+__all__ = ["ConversationItemCreateEventParam"]
+
+
+class ConversationItemCreateEventParam(TypedDict, total=False):
+ item: Required[ConversationItemParam]
+ """A single item within a Realtime conversation."""
+
+ type: Required[Literal["conversation.item.create"]]
+ """The event type, must be `conversation.item.create`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
+
+ previous_item_id: str
+ """The ID of the preceding item after which the new item will be inserted.
+
+ If not set, the new item will be appended to the end of the conversation. If set
+ to `root`, the new item will be added to the beginning of the conversation. If
+ set to an existing ID, it allows an item to be inserted mid-conversation. If the
+ ID cannot be found, an error will be returned and the item will not be added.
+ """
src/openai/types/realtime/conversation_item_created_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemCreatedEvent"]
+
+
+class ConversationItemCreatedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item: ConversationItem
+ """A single item within a Realtime conversation."""
+
+ type: Literal["conversation.item.created"]
+ """The event type, must be `conversation.item.created`."""
+
+ previous_item_id: Optional[str] = None
+ """
+ The ID of the preceding item in the Conversation context, allows the client to
+ understand the order of the conversation. Can be `null` if the item has no
+ predecessor.
+ """
src/openai/types/realtime/conversation_item_delete_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemDeleteEvent"]
+
+
+class ConversationItemDeleteEvent(BaseModel):
+ item_id: str
+ """The ID of the item to delete."""
+
+ type: Literal["conversation.item.delete"]
+ """The event type, must be `conversation.item.delete`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/conversation_item_delete_event_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemDeleteEventParam"]
+
+
+class ConversationItemDeleteEventParam(TypedDict, total=False):
+ item_id: Required[str]
+ """The ID of the item to delete."""
+
+ type: Required[Literal["conversation.item.delete"]]
+ """The event type, must be `conversation.item.delete`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/conversation_item_deleted_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemDeletedEvent"]
+
+
+class ConversationItemDeletedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item that was deleted."""
+
+ type: Literal["conversation.item.deleted"]
+ """The event type, must be `conversation.item.deleted`."""
src/openai/types/realtime/conversation_item_done.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemDone"]
+
+
+class ConversationItemDone(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item: ConversationItem
+ """A single item within a Realtime conversation."""
+
+ type: Literal["conversation.item.done"]
+ """The event type, must be `conversation.item.done`."""
+
+ previous_item_id: Optional[str] = None
+ """The ID of the item that precedes this one, if any.
+
+ This is used to maintain ordering when items are inserted.
+ """
src/openai/types/realtime/conversation_item_input_audio_transcription_completed_event.py
@@ -0,0 +1,76 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .log_prob_properties import LogProbProperties
+
+__all__ = [
+ "ConversationItemInputAudioTranscriptionCompletedEvent",
+ "Usage",
+ "UsageTranscriptTextUsageTokens",
+ "UsageTranscriptTextUsageTokensInputTokenDetails",
+ "UsageTranscriptTextUsageDuration",
+]
+
+
+class UsageTranscriptTextUsageTokensInputTokenDetails(BaseModel):
+ audio_tokens: Optional[int] = None
+ """Number of audio tokens billed for this request."""
+
+ text_tokens: Optional[int] = None
+ """Number of text tokens billed for this request."""
+
+
+class UsageTranscriptTextUsageTokens(BaseModel):
+ input_tokens: int
+ """Number of input tokens billed for this request."""
+
+ output_tokens: int
+ """Number of output tokens generated."""
+
+ total_tokens: int
+ """Total number of tokens used (input + output)."""
+
+ type: Literal["tokens"]
+ """The type of the usage object. Always `tokens` for this variant."""
+
+ input_token_details: Optional[UsageTranscriptTextUsageTokensInputTokenDetails] = None
+ """Details about the input tokens billed for this request."""
+
+
+class UsageTranscriptTextUsageDuration(BaseModel):
+ seconds: float
+ """Duration of the input audio in seconds."""
+
+ type: Literal["duration"]
+ """The type of the usage object. Always `duration` for this variant."""
+
+
+Usage: TypeAlias = Union[UsageTranscriptTextUsageTokens, UsageTranscriptTextUsageDuration]
+
+
+class ConversationItemInputAudioTranscriptionCompletedEvent(BaseModel):
+ content_index: int
+ """The index of the content part containing the audio."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the user message item containing the audio."""
+
+ transcript: str
+ """The transcribed text."""
+
+ type: Literal["conversation.item.input_audio_transcription.completed"]
+ """
+ The event type, must be `conversation.item.input_audio_transcription.completed`.
+ """
+
+ usage: Usage
+ """Usage statistics for the transcription."""
+
+ logprobs: Optional[List[LogProbProperties]] = None
+ """The log probabilities of the transcription."""
src/openai/types/realtime/conversation_item_input_audio_transcription_delta_event.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .log_prob_properties import LogProbProperties
+
+__all__ = ["ConversationItemInputAudioTranscriptionDeltaEvent"]
+
+
+class ConversationItemInputAudioTranscriptionDeltaEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ type: Literal["conversation.item.input_audio_transcription.delta"]
+ """The event type, must be `conversation.item.input_audio_transcription.delta`."""
+
+ content_index: Optional[int] = None
+ """The index of the content part in the item's content array."""
+
+ delta: Optional[str] = None
+ """The text delta."""
+
+ logprobs: Optional[List[LogProbProperties]] = None
+ """The log probabilities of the transcription."""
src/openai/types/realtime/conversation_item_input_audio_transcription_failed_event.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemInputAudioTranscriptionFailedEvent", "Error"]
+
+
+class Error(BaseModel):
+ code: Optional[str] = None
+ """Error code, if any."""
+
+ message: Optional[str] = None
+ """A human-readable error message."""
+
+ param: Optional[str] = None
+ """Parameter related to the error, if any."""
+
+ type: Optional[str] = None
+ """The type of error."""
+
+
+class ConversationItemInputAudioTranscriptionFailedEvent(BaseModel):
+ content_index: int
+ """The index of the content part containing the audio."""
+
+ error: Error
+ """Details of the transcription error."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the user message item."""
+
+ type: Literal["conversation.item.input_audio_transcription.failed"]
+ """The event type, must be `conversation.item.input_audio_transcription.failed`."""
src/openai/types/realtime/conversation_item_input_audio_transcription_segment.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemInputAudioTranscriptionSegment"]
+
+
+class ConversationItemInputAudioTranscriptionSegment(BaseModel):
+ id: str
+ """The segment identifier."""
+
+ content_index: int
+ """The index of the input audio content part within the item."""
+
+ end: float
+ """End time of the segment in seconds."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item containing the input audio content."""
+
+ speaker: str
+ """The detected speaker label for this segment."""
+
+ start: float
+ """Start time of the segment in seconds."""
+
+ text: str
+ """The text for this segment."""
+
+ type: Literal["conversation.item.input_audio_transcription.segment"]
+ """The event type, must be `conversation.item.input_audio_transcription.segment`."""
src/openai/types/realtime/conversation_item_param.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .realtime_mcp_tool_call_param import RealtimeMcpToolCallParam
+from .realtime_mcp_list_tools_param import RealtimeMcpListToolsParam
+from .realtime_mcp_approval_request_param import RealtimeMcpApprovalRequestParam
+from .realtime_mcp_approval_response_param import RealtimeMcpApprovalResponseParam
+from .realtime_conversation_item_user_message_param import RealtimeConversationItemUserMessageParam
+from .realtime_conversation_item_function_call_param import RealtimeConversationItemFunctionCallParam
+from .realtime_conversation_item_system_message_param import RealtimeConversationItemSystemMessageParam
+from .realtime_conversation_item_assistant_message_param import RealtimeConversationItemAssistantMessageParam
+from .realtime_conversation_item_function_call_output_param import RealtimeConversationItemFunctionCallOutputParam
+
+__all__ = ["ConversationItemParam"]
+
+ConversationItemParam: TypeAlias = Union[
+ RealtimeConversationItemSystemMessageParam,
+ RealtimeConversationItemUserMessageParam,
+ RealtimeConversationItemAssistantMessageParam,
+ RealtimeConversationItemFunctionCallParam,
+ RealtimeConversationItemFunctionCallOutputParam,
+ RealtimeMcpApprovalResponseParam,
+ RealtimeMcpListToolsParam,
+ RealtimeMcpToolCallParam,
+ RealtimeMcpApprovalRequestParam,
+]
src/openai/types/realtime/conversation_item_retrieve_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemRetrieveEvent"]
+
+
+class ConversationItemRetrieveEvent(BaseModel):
+ item_id: str
+ """The ID of the item to retrieve."""
+
+ type: Literal["conversation.item.retrieve"]
+ """The event type, must be `conversation.item.retrieve`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/conversation_item_retrieve_event_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemRetrieveEventParam"]
+
+
+class ConversationItemRetrieveEventParam(TypedDict, total=False):
+ item_id: Required[str]
+ """The ID of the item to retrieve."""
+
+ type: Required[Literal["conversation.item.retrieve"]]
+ """The event type, must be `conversation.item.retrieve`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/conversation_item_truncate_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemTruncateEvent"]
+
+
+class ConversationItemTruncateEvent(BaseModel):
+ audio_end_ms: int
+ """Inclusive duration up to which audio is truncated, in milliseconds.
+
+ If the audio_end_ms is greater than the actual audio duration, the server will
+ respond with an error.
+ """
+
+ content_index: int
+ """The index of the content part to truncate. Set this to 0."""
+
+ item_id: str
+ """The ID of the assistant message item to truncate.
+
+ Only assistant message items can be truncated.
+ """
+
+ type: Literal["conversation.item.truncate"]
+ """The event type, must be `conversation.item.truncate`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/conversation_item_truncate_event_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemTruncateEventParam"]
+
+
+class ConversationItemTruncateEventParam(TypedDict, total=False):
+ audio_end_ms: Required[int]
+ """Inclusive duration up to which audio is truncated, in milliseconds.
+
+ If the audio_end_ms is greater than the actual audio duration, the server will
+ respond with an error.
+ """
+
+ content_index: Required[int]
+ """The index of the content part to truncate. Set this to 0."""
+
+ item_id: Required[str]
+ """The ID of the assistant message item to truncate.
+
+ Only assistant message items can be truncated.
+ """
+
+ type: Required[Literal["conversation.item.truncate"]]
+ """The event type, must be `conversation.item.truncate`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/conversation_item_truncated_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemTruncatedEvent"]
+
+
+class ConversationItemTruncatedEvent(BaseModel):
+ audio_end_ms: int
+ """The duration up to which the audio was truncated, in milliseconds."""
+
+ content_index: int
+ """The index of the content part that was truncated."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the assistant message item that was truncated."""
+
+ type: Literal["conversation.item.truncated"]
+ """The event type, must be `conversation.item.truncated`."""
src/openai/types/realtime/input_audio_buffer_append_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferAppendEvent"]
+
+
+class InputAudioBufferAppendEvent(BaseModel):
+ audio: str
+ """Base64-encoded audio bytes.
+
+ This must be in the format specified by the `input_audio_format` field in the
+ session configuration.
+ """
+
+ type: Literal["input_audio_buffer.append"]
+ """The event type, must be `input_audio_buffer.append`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/input_audio_buffer_append_event_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferAppendEventParam"]
+
+
+class InputAudioBufferAppendEventParam(TypedDict, total=False):
+ audio: Required[str]
+ """Base64-encoded audio bytes.
+
+ This must be in the format specified by the `input_audio_format` field in the
+ session configuration.
+ """
+
+ type: Required[Literal["input_audio_buffer.append"]]
+ """The event type, must be `input_audio_buffer.append`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/input_audio_buffer_clear_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferClearEvent"]
+
+
+class InputAudioBufferClearEvent(BaseModel):
+ type: Literal["input_audio_buffer.clear"]
+ """The event type, must be `input_audio_buffer.clear`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/input_audio_buffer_clear_event_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferClearEventParam"]
+
+
+class InputAudioBufferClearEventParam(TypedDict, total=False):
+ type: Required[Literal["input_audio_buffer.clear"]]
+ """The event type, must be `input_audio_buffer.clear`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/input_audio_buffer_cleared_event.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferClearedEvent"]
+
+
+class InputAudioBufferClearedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ type: Literal["input_audio_buffer.cleared"]
+ """The event type, must be `input_audio_buffer.cleared`."""
src/openai/types/realtime/input_audio_buffer_commit_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferCommitEvent"]
+
+
+class InputAudioBufferCommitEvent(BaseModel):
+ type: Literal["input_audio_buffer.commit"]
+ """The event type, must be `input_audio_buffer.commit`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/input_audio_buffer_commit_event_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferCommitEventParam"]
+
+
+class InputAudioBufferCommitEventParam(TypedDict, total=False):
+ type: Required[Literal["input_audio_buffer.commit"]]
+ """The event type, must be `input_audio_buffer.commit`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/input_audio_buffer_committed_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferCommittedEvent"]
+
+
+class InputAudioBufferCommittedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the user message item that will be created."""
+
+ type: Literal["input_audio_buffer.committed"]
+ """The event type, must be `input_audio_buffer.committed`."""
+
+ previous_item_id: Optional[str] = None
+ """
+ The ID of the preceding item after which the new item will be inserted. Can be
+ `null` if the item has no predecessor.
+ """
src/openai/types/realtime/input_audio_buffer_speech_started_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferSpeechStartedEvent"]
+
+
+class InputAudioBufferSpeechStartedEvent(BaseModel):
+ audio_start_ms: int
+ """
+ Milliseconds from the start of all audio written to the buffer during the
+ session when speech was first detected. This will correspond to the beginning of
+ audio sent to the model, and thus includes the `prefix_padding_ms` configured in
+ the Session.
+ """
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the user message item that will be created when speech stops."""
+
+ type: Literal["input_audio_buffer.speech_started"]
+ """The event type, must be `input_audio_buffer.speech_started`."""
src/openai/types/realtime/input_audio_buffer_speech_stopped_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferSpeechStoppedEvent"]
+
+
+class InputAudioBufferSpeechStoppedEvent(BaseModel):
+ audio_end_ms: int
+ """Milliseconds since the session started when speech stopped.
+
+ This will correspond to the end of audio sent to the model, and thus includes
+ the `min_silence_duration_ms` configured in the Session.
+ """
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the user message item that will be created."""
+
+ type: Literal["input_audio_buffer.speech_stopped"]
+ """The event type, must be `input_audio_buffer.speech_stopped`."""
src/openai/types/realtime/input_audio_buffer_timeout_triggered.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferTimeoutTriggered"]
+
+
+class InputAudioBufferTimeoutTriggered(BaseModel):
+ audio_end_ms: int
+ """Millisecond offset where speech ended within the buffered audio."""
+
+ audio_start_ms: int
+ """Millisecond offset where speech started within the buffered audio."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item associated with this segment."""
+
+ type: Literal["input_audio_buffer.timeout_triggered"]
+ """The event type, must be `input_audio_buffer.timeout_triggered`."""
src/openai/types/realtime/log_prob_properties.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+
+from ..._models import BaseModel
+
+__all__ = ["LogProbProperties"]
+
+
+class LogProbProperties(BaseModel):
+ token: str
+ """The token that was used to generate the log probability."""
+
+ bytes: List[int]
+ """The bytes that were used to generate the log probability."""
+
+ logprob: float
+ """The log probability of the token."""
src/openai/types/realtime/mcp_list_tools_completed.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["McpListToolsCompleted"]
+
+
+class McpListToolsCompleted(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the MCP list tools item."""
+
+ type: Literal["mcp_list_tools.completed"]
+ """The event type, must be `mcp_list_tools.completed`."""
src/openai/types/realtime/mcp_list_tools_failed.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["McpListToolsFailed"]
+
+
+class McpListToolsFailed(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the MCP list tools item."""
+
+ type: Literal["mcp_list_tools.failed"]
+ """The event type, must be `mcp_list_tools.failed`."""
src/openai/types/realtime/mcp_list_tools_in_progress.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["McpListToolsInProgress"]
+
+
+class McpListToolsInProgress(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the MCP list tools item."""
+
+ type: Literal["mcp_list_tools.in_progress"]
+ """The event type, must be `mcp_list_tools.in_progress`."""
src/openai/types/realtime/output_audio_buffer_clear_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["OutputAudioBufferClearEvent"]
+
+
+class OutputAudioBufferClearEvent(BaseModel):
+ type: Literal["output_audio_buffer.clear"]
+ """The event type, must be `output_audio_buffer.clear`."""
+
+ event_id: Optional[str] = None
+ """The unique ID of the client event used for error handling."""
src/openai/types/realtime/output_audio_buffer_clear_event_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["OutputAudioBufferClearEventParam"]
+
+
+class OutputAudioBufferClearEventParam(TypedDict, total=False):
+ type: Required[Literal["output_audio_buffer.clear"]]
+ """The event type, must be `output_audio_buffer.clear`."""
+
+ event_id: str
+ """The unique ID of the client event used for error handling."""
src/openai/types/realtime/rate_limits_updated_event.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RateLimitsUpdatedEvent", "RateLimit"]
+
+
+class RateLimit(BaseModel):
+ limit: Optional[int] = None
+ """The maximum allowed value for the rate limit."""
+
+ name: Optional[Literal["requests", "tokens"]] = None
+ """The name of the rate limit (`requests`, `tokens`)."""
+
+ remaining: Optional[int] = None
+ """The remaining value before the limit is reached."""
+
+ reset_seconds: Optional[float] = None
+ """Seconds until the rate limit resets."""
+
+
+class RateLimitsUpdatedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ rate_limits: List[RateLimit]
+ """List of rate limit information."""
+
+ type: Literal["rate_limits.updated"]
+ """The event type, must be `rate_limits.updated`."""
src/openai/types/realtime/realtime_audio_config.py
@@ -0,0 +1,184 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeAudioConfig", "Input", "InputNoiseReduction", "InputTranscription", "InputTurnDetection", "Output"]
+
+
+class InputNoiseReduction(BaseModel):
+ type: Optional[Literal["near_field", "far_field"]] = None
+ """Type of noise reduction.
+
+ `near_field` is for close-talking microphones such as headphones, `far_field` is
+ for far-field microphones such as laptop or conference room microphones.
+ """
+
+
+class InputTranscription(BaseModel):
+ language: Optional[str] = None
+ """The language of the input audio.
+
+ Supplying the input language in
+ [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+ format will improve accuracy and latency.
+ """
+
+ model: Optional[
+ Literal[
+ "whisper-1",
+ "gpt-4o-transcribe-latest",
+ "gpt-4o-mini-transcribe",
+ "gpt-4o-transcribe",
+ "gpt-4o-transcribe-diarize",
+ ]
+ ] = None
+ """The model to use for transcription.
+
+ Current options are `whisper-1`, `gpt-4o-transcribe-latest`,
+ `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`.
+ """
+
+ prompt: Optional[str] = None
+ """
+ An optional text to guide the model's style or continue a previous audio
+ segment. For `whisper-1`, the
+ [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+ For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+ "expect words related to technology".
+ """
+
+
+class InputTurnDetection(BaseModel):
+ create_response: Optional[bool] = None
+ """
+ Whether or not to automatically generate a response when a VAD stop event
+ occurs.
+ """
+
+ eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
+ """Used only for `semantic_vad` mode.
+
+ The eagerness of the model to respond. `low` will wait longer for the user to
+ continue speaking, `high` will respond more quickly. `auto` is the default and
+ is equivalent to `medium`.
+ """
+
+ idle_timeout_ms: Optional[int] = None
+ """
+ Optional idle timeout after which turn detection will auto-timeout when no
+ additional audio is received.
+ """
+
+ interrupt_response: Optional[bool] = None
+ """
+ Whether or not to automatically interrupt any ongoing response with output to
+ the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+ occurs.
+ """
+
+ prefix_padding_ms: Optional[int] = None
+ """Used only for `server_vad` mode.
+
+ Amount of audio to include before the VAD detected speech (in milliseconds).
+ Defaults to 300ms.
+ """
+
+ silence_duration_ms: Optional[int] = None
+ """Used only for `server_vad` mode.
+
+ Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+ With shorter values the model will respond more quickly, but may jump in on
+ short pauses from the user.
+ """
+
+ threshold: Optional[float] = None
+ """Used only for `server_vad` mode.
+
+ Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+ threshold will require louder audio to activate the model, and thus might
+ perform better in noisy environments.
+ """
+
+ type: Optional[Literal["server_vad", "semantic_vad"]] = None
+ """Type of turn detection."""
+
+
+class Input(BaseModel):
+ format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+ """The format of input audio.
+
+ Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+ be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+ byte order.
+ """
+
+ noise_reduction: Optional[InputNoiseReduction] = None
+ """Configuration for input audio noise reduction.
+
+ This can be set to `null` to turn off. Noise reduction filters audio added to
+ the input audio buffer before it is sent to VAD and the model. Filtering the
+ audio can improve VAD and turn detection accuracy (reducing false positives) and
+ model performance by improving perception of the input audio.
+ """
+
+ transcription: Optional[InputTranscription] = None
+ """
+ Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through
+ [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+ and should be treated as guidance of input audio content rather than precisely
+ what the model heard. The client can optionally set the language and prompt for
+ transcription, these offer additional guidance to the transcription service.
+ """
+
+ turn_detection: Optional[InputTurnDetection] = None
+ """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+ This can be set to `null` to turn off, in which case the client must manually
+ trigger model response. Server VAD means that the model will detect the start
+ and end of speech based on audio volume and respond at the end of user speech.
+ Semantic VAD is more advanced and uses a turn detection model (in conjunction
+ with VAD) to semantically estimate whether the user has finished speaking, then
+ dynamically sets a timeout based on this probability. For example, if user audio
+ trails off with "uhhm", the model will score a low probability of turn end and
+ wait longer for the user to continue speaking. This can be useful for more
+ natural conversations, but may have a higher latency.
+ """
+
+
+class Output(BaseModel):
+ format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+ """The format of output audio.
+
+ Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+ sampled at a rate of 24kHz.
+ """
+
+ speed: Optional[float] = None
+ """The speed of the model's spoken response.
+
+ 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+ This value can only be changed in between model turns, not while a response is
+ in progress.
+ """
+
+ voice: Union[
+ str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"], None
+ ] = None
+ """The voice the model uses to respond.
+
+ Voice cannot be changed during the session once the model has responded with
+ audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+ `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`.
+ """
+
+
+class RealtimeAudioConfig(BaseModel):
+ input: Optional[Input] = None
+
+ output: Optional[Output] = None
src/openai/types/realtime/realtime_audio_config_param.py
@@ -0,0 +1,187 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, TypedDict
+
+__all__ = [
+ "RealtimeAudioConfigParam",
+ "Input",
+ "InputNoiseReduction",
+ "InputTranscription",
+ "InputTurnDetection",
+ "Output",
+]
+
+
+class InputNoiseReduction(TypedDict, total=False):
+ type: Literal["near_field", "far_field"]
+ """Type of noise reduction.
+
+ `near_field` is for close-talking microphones such as headphones, `far_field` is
+ for far-field microphones such as laptop or conference room microphones.
+ """
+
+
+class InputTranscription(TypedDict, total=False):
+ language: str
+ """The language of the input audio.
+
+ Supplying the input language in
+ [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+ format will improve accuracy and latency.
+ """
+
+ model: Literal[
+ "whisper-1",
+ "gpt-4o-transcribe-latest",
+ "gpt-4o-mini-transcribe",
+ "gpt-4o-transcribe",
+ "gpt-4o-transcribe-diarize",
+ ]
+ """The model to use for transcription.
+
+ Current options are `whisper-1`, `gpt-4o-transcribe-latest`,
+ `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`.
+ """
+
+ prompt: str
+ """
+ An optional text to guide the model's style or continue a previous audio
+ segment. For `whisper-1`, the
+ [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+ For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+ "expect words related to technology".
+ """
+
+
+class InputTurnDetection(TypedDict, total=False):
+ create_response: bool
+ """
+ Whether or not to automatically generate a response when a VAD stop event
+ occurs.
+ """
+
+ eagerness: Literal["low", "medium", "high", "auto"]
+ """Used only for `semantic_vad` mode.
+
+ The eagerness of the model to respond. `low` will wait longer for the user to
+ continue speaking, `high` will respond more quickly. `auto` is the default and
+ is equivalent to `medium`.
+ """
+
+ idle_timeout_ms: Optional[int]
+ """
+ Optional idle timeout after which turn detection will auto-timeout when no
+ additional audio is received.
+ """
+
+ interrupt_response: bool
+ """
+ Whether or not to automatically interrupt any ongoing response with output to
+ the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+ occurs.
+ """
+
+ prefix_padding_ms: int
+ """Used only for `server_vad` mode.
+
+ Amount of audio to include before the VAD detected speech (in milliseconds).
+ Defaults to 300ms.
+ """
+
+ silence_duration_ms: int
+ """Used only for `server_vad` mode.
+
+ Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+ With shorter values the model will respond more quickly, but may jump in on
+ short pauses from the user.
+ """
+
+ threshold: float
+ """Used only for `server_vad` mode.
+
+ Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+ threshold will require louder audio to activate the model, and thus might
+ perform better in noisy environments.
+ """
+
+ type: Literal["server_vad", "semantic_vad"]
+ """Type of turn detection."""
+
+
+class Input(TypedDict, total=False):
+ format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+ """The format of input audio.
+
+ Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+ be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+ byte order.
+ """
+
+ noise_reduction: InputNoiseReduction
+ """Configuration for input audio noise reduction.
+
+ This can be set to `null` to turn off. Noise reduction filters audio added to
+ the input audio buffer before it is sent to VAD and the model. Filtering the
+ audio can improve VAD and turn detection accuracy (reducing false positives) and
+ model performance by improving perception of the input audio.
+ """
+
+ transcription: InputTranscription
+ """
+ Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through
+ [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+ and should be treated as guidance of input audio content rather than precisely
+ what the model heard. The client can optionally set the language and prompt for
+ transcription, these offer additional guidance to the transcription service.
+ """
+
+ turn_detection: InputTurnDetection
+ """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+ This can be set to `null` to turn off, in which case the client must manually
+ trigger model response. Server VAD means that the model will detect the start
+ and end of speech based on audio volume and respond at the end of user speech.
+ Semantic VAD is more advanced and uses a turn detection model (in conjunction
+ with VAD) to semantically estimate whether the user has finished speaking, then
+ dynamically sets a timeout based on this probability. For example, if user audio
+ trails off with "uhhm", the model will score a low probability of turn end and
+ wait longer for the user to continue speaking. This can be useful for more
+ natural conversations, but may have a higher latency.
+ """
+
+
+class Output(TypedDict, total=False):
+ format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+ """The format of output audio.
+
+ Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+ sampled at a rate of 24kHz.
+ """
+
+ speed: float
+ """The speed of the model's spoken response.
+
+ 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+ This value can only be changed in between model turns, not while a response is
+ in progress.
+ """
+
+ voice: Union[str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]]
+ """The voice the model uses to respond.
+
+ Voice cannot be changed during the session once the model has responded with
+ audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+ `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`.
+ """
+
+
+class RealtimeAudioConfigParam(TypedDict, total=False):
+ input: Input
+
+ output: Output
src/openai/types/realtime/realtime_client_event.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .session_update_event import SessionUpdateEvent
+from .response_cancel_event import ResponseCancelEvent
+from .response_create_event import ResponseCreateEvent
+from .transcription_session_update import TranscriptionSessionUpdate
+from .conversation_item_create_event import ConversationItemCreateEvent
+from .conversation_item_delete_event import ConversationItemDeleteEvent
+from .input_audio_buffer_clear_event import InputAudioBufferClearEvent
+from .input_audio_buffer_append_event import InputAudioBufferAppendEvent
+from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent
+from .output_audio_buffer_clear_event import OutputAudioBufferClearEvent
+from .conversation_item_retrieve_event import ConversationItemRetrieveEvent
+from .conversation_item_truncate_event import ConversationItemTruncateEvent
+
+__all__ = ["RealtimeClientEvent"]
+
+RealtimeClientEvent: TypeAlias = Annotated[
+ Union[
+ ConversationItemCreateEvent,
+ ConversationItemDeleteEvent,
+ ConversationItemRetrieveEvent,
+ ConversationItemTruncateEvent,
+ InputAudioBufferAppendEvent,
+ InputAudioBufferClearEvent,
+ OutputAudioBufferClearEvent,
+ InputAudioBufferCommitEvent,
+ ResponseCancelEvent,
+ ResponseCreateEvent,
+ SessionUpdateEvent,
+ TranscriptionSessionUpdate,
+ ],
+ PropertyInfo(discriminator="type"),
+]
src/openai/types/realtime/realtime_client_event_param.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .session_update_event_param import SessionUpdateEventParam
+from .response_cancel_event_param import ResponseCancelEventParam
+from .response_create_event_param import ResponseCreateEventParam
+from .transcription_session_update_param import TranscriptionSessionUpdateParam
+from .conversation_item_create_event_param import ConversationItemCreateEventParam
+from .conversation_item_delete_event_param import ConversationItemDeleteEventParam
+from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam
+from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam
+from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam
+from .output_audio_buffer_clear_event_param import OutputAudioBufferClearEventParam
+from .conversation_item_retrieve_event_param import ConversationItemRetrieveEventParam
+from .conversation_item_truncate_event_param import ConversationItemTruncateEventParam
+
+__all__ = ["RealtimeClientEventParam"]
+
+RealtimeClientEventParam: TypeAlias = Union[
+ ConversationItemCreateEventParam,
+ ConversationItemDeleteEventParam,
+ ConversationItemRetrieveEventParam,
+ ConversationItemTruncateEventParam,
+ InputAudioBufferAppendEventParam,
+ InputAudioBufferClearEventParam,
+ OutputAudioBufferClearEventParam,
+ InputAudioBufferCommitEventParam,
+ ResponseCancelEventParam,
+ ResponseCreateEventParam,
+ SessionUpdateEventParam,
+ TranscriptionSessionUpdateParam,
+]
src/openai/types/realtime/realtime_client_secret_config.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeClientSecretConfig", "ExpiresAfter"]
+
+
+class ExpiresAfter(BaseModel):
+ anchor: Literal["created_at"]
+ """The anchor point for the ephemeral token expiration.
+
+ Only `created_at` is currently supported.
+ """
+
+ seconds: Optional[int] = None
+ """The number of seconds from the anchor point to the expiration.
+
+ Select a value between `10` and `7200`.
+ """
+
+
+class RealtimeClientSecretConfig(BaseModel):
+ expires_after: Optional[ExpiresAfter] = None
+ """Configuration for the ephemeral token expiration."""
src/openai/types/realtime/realtime_client_secret_config_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeClientSecretConfigParam", "ExpiresAfter"]
+
+
+class ExpiresAfter(TypedDict, total=False):
+ anchor: Required[Literal["created_at"]]
+ """The anchor point for the ephemeral token expiration.
+
+ Only `created_at` is currently supported.
+ """
+
+ seconds: int
+ """The number of seconds from the anchor point to the expiration.
+
+ Select a value between `10` and `7200`.
+ """
+
+
+class RealtimeClientSecretConfigParam(TypedDict, total=False):
+ expires_after: ExpiresAfter
+ """Configuration for the ephemeral token expiration."""
src/openai/types/realtime/realtime_connect_params.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["RealtimeConnectParams"]
+
+
+class RealtimeConnectParams(TypedDict, total=False):
+ model: Required[str]
src/openai/types/realtime/realtime_conversation_item_assistant_message.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeConversationItemAssistantMessage", "Content"]
+
+
+class Content(BaseModel):
+ text: Optional[str] = None
+ """The text content."""
+
+ type: Optional[Literal["text"]] = None
+ """The content type. Always `text` for assistant messages."""
+
+
+class RealtimeConversationItemAssistantMessage(BaseModel):
+ content: List[Content]
+ """The content of the message."""
+
+ role: Literal["assistant"]
+ """The role of the message sender. Always `assistant`."""
+
+ type: Literal["message"]
+ """The type of the item. Always `message`."""
+
+ id: Optional[str] = None
+ """The unique ID of the item."""
+
+ object: Optional[Literal["realtime.item"]] = None
+ """Identifier for the API object being returned - always `realtime.item`."""
+
+ status: Optional[Literal["completed", "incomplete", "in_progress"]] = None
+ """The status of the item. Has no effect on the conversation."""
src/openai/types/realtime/realtime_conversation_item_assistant_message_param.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeConversationItemAssistantMessageParam", "Content"]
+
+
+class Content(TypedDict, total=False):
+ text: str
+ """The text content."""
+
+ type: Literal["text"]
+ """The content type. Always `text` for assistant messages."""
+
+
+class RealtimeConversationItemAssistantMessageParam(TypedDict, total=False):
+ content: Required[Iterable[Content]]
+ """The content of the message."""
+
+ role: Required[Literal["assistant"]]
+ """The role of the message sender. Always `assistant`."""
+
+ type: Required[Literal["message"]]
+ """The type of the item. Always `message`."""
+
+ id: str
+ """The unique ID of the item."""
+
+ object: Literal["realtime.item"]
+ """Identifier for the API object being returned - always `realtime.item`."""
+
+ status: Literal["completed", "incomplete", "in_progress"]
+ """The status of the item. Has no effect on the conversation."""
src/openai/types/realtime/realtime_conversation_item_function_call.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeConversationItemFunctionCall"]
+
+
+class RealtimeConversationItemFunctionCall(BaseModel):
+ arguments: str
+ """The arguments of the function call."""
+
+ name: str
+ """The name of the function being called."""
+
+ type: Literal["function_call"]
+ """The type of the item. Always `function_call`."""
+
+ id: Optional[str] = None
+ """The unique ID of the item."""
+
+ call_id: Optional[str] = None
+ """The ID of the function call."""
+
+ object: Optional[Literal["realtime.item"]] = None
+ """Identifier for the API object being returned - always `realtime.item`."""
+
+ status: Optional[Literal["completed", "incomplete", "in_progress"]] = None
+ """The status of the item. Has no effect on the conversation."""
src/openai/types/realtime/realtime_conversation_item_function_call_output.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeConversationItemFunctionCallOutput"]
+
+
+class RealtimeConversationItemFunctionCallOutput(BaseModel):
+ call_id: str
+ """The ID of the function call this output is for."""
+
+ output: str
+ """The output of the function call."""
+
+ type: Literal["function_call_output"]
+ """The type of the item. Always `function_call_output`."""
+
+ id: Optional[str] = None
+ """The unique ID of the item."""
+
+ object: Optional[Literal["realtime.item"]] = None
+ """Identifier for the API object being returned - always `realtime.item`."""
+
+ status: Optional[Literal["completed", "incomplete", "in_progress"]] = None
+ """The status of the item. Has no effect on the conversation."""
src/openai/types/realtime/realtime_conversation_item_function_call_output_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeConversationItemFunctionCallOutputParam"]
+
+
+class RealtimeConversationItemFunctionCallOutputParam(TypedDict, total=False):
+ call_id: Required[str]
+ """The ID of the function call this output is for."""
+
+ output: Required[str]
+ """The output of the function call."""
+
+ type: Required[Literal["function_call_output"]]
+ """The type of the item. Always `function_call_output`."""
+
+ id: str
+ """The unique ID of the item."""
+
+ object: Literal["realtime.item"]
+ """Identifier for the API object being returned - always `realtime.item`."""
+
+ status: Literal["completed", "incomplete", "in_progress"]
+ """The status of the item. Has no effect on the conversation."""
src/openai/types/realtime/realtime_conversation_item_function_call_param.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeConversationItemFunctionCallParam"]
+
+
+class RealtimeConversationItemFunctionCallParam(TypedDict, total=False):
+ arguments: Required[str]
+ """The arguments of the function call."""
+
+ name: Required[str]
+ """The name of the function being called."""
+
+ type: Required[Literal["function_call"]]
+ """The type of the item. Always `function_call`."""
+
+ id: str
+ """The unique ID of the item."""
+
+ call_id: str
+ """The ID of the function call."""
+
+ object: Literal["realtime.item"]
+ """Identifier for the API object being returned - always `realtime.item`."""
+
+ status: Literal["completed", "incomplete", "in_progress"]
+ """The status of the item. Has no effect on the conversation."""
src/openai/types/realtime/realtime_conversation_item_system_message.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeConversationItemSystemMessage", "Content"]
+
+
+class Content(BaseModel):
+ text: Optional[str] = None
+ """The text content."""
+
+ type: Optional[Literal["input_text"]] = None
+ """The content type. Always `input_text` for system messages."""
+
+
+class RealtimeConversationItemSystemMessage(BaseModel):
+ content: List[Content]
+ """The content of the message."""
+
+ role: Literal["system"]
+ """The role of the message sender. Always `system`."""
+
+ type: Literal["message"]
+ """The type of the item. Always `message`."""
+
+ id: Optional[str] = None
+ """The unique ID of the item."""
+
+ object: Optional[Literal["realtime.item"]] = None
+ """Identifier for the API object being returned - always `realtime.item`."""
+
+ status: Optional[Literal["completed", "incomplete", "in_progress"]] = None
+ """The status of the item. Has no effect on the conversation."""
src/openai/types/realtime/realtime_conversation_item_system_message_param.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeConversationItemSystemMessageParam", "Content"]
+
+
+class Content(TypedDict, total=False):
+ text: str
+ """The text content."""
+
+ type: Literal["input_text"]
+ """The content type. Always `input_text` for system messages."""
+
+
+class RealtimeConversationItemSystemMessageParam(TypedDict, total=False):
+ content: Required[Iterable[Content]]
+ """The content of the message."""
+
+ role: Required[Literal["system"]]
+ """The role of the message sender. Always `system`."""
+
+ type: Required[Literal["message"]]
+ """The type of the item. Always `message`."""
+
+ id: str
+ """The unique ID of the item."""
+
+ object: Literal["realtime.item"]
+ """Identifier for the API object being returned - always `realtime.item`."""
+
+ status: Literal["completed", "incomplete", "in_progress"]
+ """The status of the item. Has no effect on the conversation."""
src/openai/types/realtime/realtime_conversation_item_user_message.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeConversationItemUserMessage", "Content"]
+
+
+class Content(BaseModel):
+ audio: Optional[str] = None
+ """Base64-encoded audio bytes (for `input_audio`)."""
+
+ text: Optional[str] = None
+ """The text content (for `input_text`)."""
+
+ transcript: Optional[str] = None
+ """Transcript of the audio (for `input_audio`)."""
+
+ type: Optional[Literal["input_text", "input_audio"]] = None
+ """The content type (`input_text` or `input_audio`)."""
+
+
+class RealtimeConversationItemUserMessage(BaseModel):
+ content: List[Content]
+ """The content of the message."""
+
+ role: Literal["user"]
+ """The role of the message sender. Always `user`."""
+
+ type: Literal["message"]
+ """The type of the item. Always `message`."""
+
+ id: Optional[str] = None
+ """The unique ID of the item."""
+
+ object: Optional[Literal["realtime.item"]] = None
+ """Identifier for the API object being returned - always `realtime.item`."""
+
+ status: Optional[Literal["completed", "incomplete", "in_progress"]] = None
+ """The status of the item. Has no effect on the conversation."""
src/openai/types/realtime/realtime_conversation_item_user_message_param.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeConversationItemUserMessageParam", "Content"]
+
+
+class Content(TypedDict, total=False):
+ audio: str
+ """Base64-encoded audio bytes (for `input_audio`)."""
+
+ text: str
+ """The text content (for `input_text`)."""
+
+ transcript: str
+ """Transcript of the audio (for `input_audio`)."""
+
+ type: Literal["input_text", "input_audio"]
+ """The content type (`input_text` or `input_audio`)."""
+
+
+class RealtimeConversationItemUserMessageParam(TypedDict, total=False):
+ content: Required[Iterable[Content]]
+ """The content of the message."""
+
+ role: Required[Literal["user"]]
+ """The role of the message sender. Always `user`."""
+
+ type: Required[Literal["message"]]
+ """The type of the item. Always `message`."""
+
+ id: str
+ """The unique ID of the item."""
+
+ object: Literal["realtime.item"]
+ """Identifier for the API object being returned - always `realtime.item`."""
+
+ status: Literal["completed", "incomplete", "in_progress"]
+ """The status of the item. Has no effect on the conversation."""
src/openai/types/realtime/realtime_error.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeError"]
+
+
+class RealtimeError(BaseModel):
+ message: str
+ """A human-readable error message."""
+
+ type: str
+ """The type of error (e.g., "invalid_request_error", "server_error")."""
+
+ code: Optional[str] = None
+ """Error code, if any."""
+
+ event_id: Optional[str] = None
+ """The event_id of the client event that caused the error, if applicable."""
+
+ param: Optional[str] = None
+ """Parameter related to the error, if any."""
src/openai/types/realtime/realtime_error_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_error import RealtimeError
+
+__all__ = ["RealtimeErrorEvent"]
+
+
+class RealtimeErrorEvent(BaseModel):
+ error: RealtimeError
+ """Details of the error."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ type: Literal["error"]
+ """The event type, must be `error`."""
src/openai/types/realtime/realtime_mcp_approval_request.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeMcpApprovalRequest"]
+
+
+class RealtimeMcpApprovalRequest(BaseModel):
+ id: str
+ """The unique ID of the approval request."""
+
+ arguments: str
+ """A JSON string of arguments for the tool."""
+
+ name: str
+ """The name of the tool to run."""
+
+ server_label: str
+ """The label of the MCP server making the request."""
+
+ type: Literal["mcp_approval_request"]
+ """The type of the item. Always `mcp_approval_request`."""
src/openai/types/realtime/realtime_mcp_approval_request_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeMcpApprovalRequestParam"]
+
+
+class RealtimeMcpApprovalRequestParam(TypedDict, total=False):
+ id: Required[str]
+ """The unique ID of the approval request."""
+
+ arguments: Required[str]
+ """A JSON string of arguments for the tool."""
+
+ name: Required[str]
+ """The name of the tool to run."""
+
+ server_label: Required[str]
+ """The label of the MCP server making the request."""
+
+ type: Required[Literal["mcp_approval_request"]]
+ """The type of the item. Always `mcp_approval_request`."""
src/openai/types/realtime/realtime_mcp_approval_response.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeMcpApprovalResponse"]
+
+
+class RealtimeMcpApprovalResponse(BaseModel):
+ id: str
+ """The unique ID of the approval response."""
+
+ approval_request_id: str
+ """The ID of the approval request being answered."""
+
+ approve: bool
+ """Whether the request was approved."""
+
+ type: Literal["mcp_approval_response"]
+ """The type of the item. Always `mcp_approval_response`."""
+
+ reason: Optional[str] = None
+ """Optional reason for the decision."""
src/openai/types/realtime/realtime_mcp_approval_response_param.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeMcpApprovalResponseParam"]
+
+
+class RealtimeMcpApprovalResponseParam(TypedDict, total=False):
+ id: Required[str]
+ """The unique ID of the approval response."""
+
+ approval_request_id: Required[str]
+ """The ID of the approval request being answered."""
+
+ approve: Required[bool]
+ """Whether the request was approved."""
+
+ type: Required[Literal["mcp_approval_response"]]
+ """The type of the item. Always `mcp_approval_response`."""
+
+ reason: Optional[str]
+ """Optional reason for the decision."""
src/openai/types/realtime/realtime_mcp_list_tools.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeMcpListTools", "Tool"]
+
+
+class Tool(BaseModel):
+ input_schema: object
+ """The JSON schema describing the tool's input."""
+
+ name: str
+ """The name of the tool."""
+
+ annotations: Optional[object] = None
+ """Additional annotations about the tool."""
+
+ description: Optional[str] = None
+ """The description of the tool."""
+
+
+class RealtimeMcpListTools(BaseModel):
+ server_label: str
+ """The label of the MCP server."""
+
+ tools: List[Tool]
+ """The tools available on the server."""
+
+ type: Literal["mcp_list_tools"]
+ """The type of the item. Always `mcp_list_tools`."""
+
+ id: Optional[str] = None
+ """The unique ID of the list."""
src/openai/types/realtime/realtime_mcp_list_tools_param.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeMcpListToolsParam", "Tool"]
+
+
+class Tool(TypedDict, total=False):
+ input_schema: Required[object]
+ """The JSON schema describing the tool's input."""
+
+ name: Required[str]
+ """The name of the tool."""
+
+ annotations: Optional[object]
+ """Additional annotations about the tool."""
+
+ description: Optional[str]
+ """The description of the tool."""
+
+
+class RealtimeMcpListToolsParam(TypedDict, total=False):
+ server_label: Required[str]
+ """The label of the MCP server."""
+
+ tools: Required[Iterable[Tool]]
+ """The tools available on the server."""
+
+ type: Required[Literal["mcp_list_tools"]]
+ """The type of the item. Always `mcp_list_tools`."""
+
+ id: str
+ """The unique ID of the list."""
src/openai/types/realtime/realtime_mcp_protocol_error.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeMcpProtocolError"]
+
+
+class RealtimeMcpProtocolError(BaseModel):
+ code: int
+
+ message: str
+
+ type: Literal["protocol_error"]
src/openai/types/realtime/realtime_mcp_protocol_error_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeMcpProtocolErrorParam"]
+
+
+class RealtimeMcpProtocolErrorParam(TypedDict, total=False):
+ code: Required[int]
+
+ message: Required[str]
+
+ type: Required[Literal["protocol_error"]]
src/openai/types/realtime/realtime_mcp_tool_call.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .realtime_mcphttp_error import RealtimeMcphttpError
+from .realtime_mcp_protocol_error import RealtimeMcpProtocolError
+from .realtime_mcp_tool_execution_error import RealtimeMcpToolExecutionError
+
+__all__ = ["RealtimeMcpToolCall", "Error"]
+
+Error: TypeAlias = Annotated[
+ Union[RealtimeMcpProtocolError, RealtimeMcpToolExecutionError, RealtimeMcphttpError, None],
+ PropertyInfo(discriminator="type"),
+]
+
+
+class RealtimeMcpToolCall(BaseModel):
+ id: str
+ """The unique ID of the tool call."""
+
+ arguments: str
+ """A JSON string of the arguments passed to the tool."""
+
+ name: str
+ """The name of the tool that was run."""
+
+ server_label: str
+ """The label of the MCP server running the tool."""
+
+ type: Literal["mcp_tool_call"]
+ """The type of the item. Always `mcp_tool_call`."""
+
+ approval_request_id: Optional[str] = None
+ """The ID of an associated approval request, if any."""
+
+ error: Optional[Error] = None
+ """The error from the tool call, if any."""
+
+ output: Optional[str] = None
+ """The output from the tool call."""
src/openai/types/realtime/realtime_mcp_tool_call_param.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .realtime_mcphttp_error_param import RealtimeMcphttpErrorParam
+from .realtime_mcp_protocol_error_param import RealtimeMcpProtocolErrorParam
+from .realtime_mcp_tool_execution_error_param import RealtimeMcpToolExecutionErrorParam
+
+__all__ = ["RealtimeMcpToolCallParam", "Error"]
+
+Error: TypeAlias = Union[RealtimeMcpProtocolErrorParam, RealtimeMcpToolExecutionErrorParam, RealtimeMcphttpErrorParam]
+
+
+class RealtimeMcpToolCallParam(TypedDict, total=False):
+ id: Required[str]
+ """The unique ID of the tool call."""
+
+ arguments: Required[str]
+ """A JSON string of the arguments passed to the tool."""
+
+ name: Required[str]
+ """The name of the tool that was run."""
+
+ server_label: Required[str]
+ """The label of the MCP server running the tool."""
+
+ type: Required[Literal["mcp_tool_call"]]
+ """The type of the item. Always `mcp_tool_call`."""
+
+ approval_request_id: Optional[str]
+ """The ID of an associated approval request, if any."""
+
+ error: Optional[Error]
+ """The error from the tool call, if any."""
+
+ output: Optional[str]
+ """The output from the tool call."""
src/openai/types/realtime/realtime_mcp_tool_execution_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeMcpToolExecutionError"]
+
+
+class RealtimeMcpToolExecutionError(BaseModel):
+ message: str
+
+ type: Literal["tool_execution_error"]
src/openai/types/realtime/realtime_mcp_tool_execution_error_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeMcpToolExecutionErrorParam"]
+
+
+class RealtimeMcpToolExecutionErrorParam(TypedDict, total=False):
+ message: Required[str]
+
+ type: Required[Literal["tool_execution_error"]]
src/openai/types/realtime/realtime_mcphttp_error.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeMcphttpError"]
+
+
+class RealtimeMcphttpError(BaseModel):
+ code: int
+
+ message: str
+
+ type: Literal["http_error"]
src/openai/types/realtime/realtime_mcphttp_error_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeMcphttpErrorParam"]
+
+
+class RealtimeMcphttpErrorParam(TypedDict, total=False):
+ code: Required[int]
+
+ message: Required[str]
+
+ type: Required[Literal["http_error"]]
src/openai/types/realtime/realtime_response.py
@@ -0,0 +1,89 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.metadata import Metadata
+from .conversation_item import ConversationItem
+from .realtime_response_usage import RealtimeResponseUsage
+from .realtime_response_status import RealtimeResponseStatus
+
+__all__ = ["RealtimeResponse"]
+
+
+class RealtimeResponse(BaseModel):
+ id: Optional[str] = None
+ """The unique ID of the response."""
+
+ conversation_id: Optional[str] = None
+ """
+ Which conversation the response is added to, determined by the `conversation`
+ field in the `response.create` event. If `auto`, the response will be added to
+ the default conversation and the value of `conversation_id` will be an id like
+ `conv_1234`. If `none`, the response will not be added to any conversation and
+ the value of `conversation_id` will be `null`. If responses are being triggered
+ by server VAD, the response will be added to the default conversation, thus the
+ `conversation_id` will be an id like `conv_1234`.
+ """
+
+ max_output_tokens: Union[int, Literal["inf"], None] = None
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls, that was used in this response.
+ """
+
+ metadata: Optional[Metadata] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ modalities: Optional[List[Literal["text", "audio"]]] = None
+ """The set of modalities the model used to respond.
+
+ If there are multiple modalities, the model will pick one, for example if
+ `modalities` is `["text", "audio"]`, the model could be responding in either
+ text or audio.
+ """
+
+ object: Optional[Literal["realtime.response"]] = None
+ """The object type, must be `realtime.response`."""
+
+ output: Optional[List[ConversationItem]] = None
+ """The list of output items generated by the response."""
+
+ output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+ """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ status: Optional[Literal["completed", "cancelled", "failed", "incomplete", "in_progress"]] = None
+ """
+ The final status of the response (`completed`, `cancelled`, `failed`, or
+ `incomplete`, `in_progress`).
+ """
+
+ status_details: Optional[RealtimeResponseStatus] = None
+ """Additional details about the status."""
+
+ temperature: Optional[float] = None
+ """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+ usage: Optional[RealtimeResponseUsage] = None
+ """Usage statistics for the Response, this will correspond to billing.
+
+ A Realtime API session will maintain a conversation context and append new Items
+ to the Conversation, thus output from previous turns (text and audio tokens)
+ will become the input for later turns.
+ """
+
+ voice: Union[
+ str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"], None
+ ] = None
+ """
+ The voice the model used to respond. Current voice options are `alloy`, `ash`,
+ `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`.
+ """
src/openai/types/realtime/realtime_response_status.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeResponseStatus", "Error"]
+
+
+class Error(BaseModel):
+ code: Optional[str] = None
+ """Error code, if any."""
+
+ type: Optional[str] = None
+ """The type of error."""
+
+
+class RealtimeResponseStatus(BaseModel):
+ error: Optional[Error] = None
+ """
+ A description of the error that caused the response to fail, populated when the
+ `status` is `failed`.
+ """
+
+ reason: Optional[Literal["turn_detected", "client_cancelled", "max_output_tokens", "content_filter"]] = None
+ """The reason the Response did not complete.
+
+ For a `cancelled` Response, one of `turn_detected` (the server VAD detected a
+ new start of speech) or `client_cancelled` (the client sent a cancel event). For
+ an `incomplete` Response, one of `max_output_tokens` or `content_filter` (the
+ server-side safety filter activated and cut off the response).
+ """
+
+ type: Optional[Literal["completed", "cancelled", "incomplete", "failed"]] = None
+ """
+ The type of error that caused the response to fail, corresponding with the
+ `status` field (`completed`, `cancelled`, `incomplete`, `failed`).
+ """
src/openai/types/realtime/realtime_response_usage.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .realtime_response_usage_input_token_details import RealtimeResponseUsageInputTokenDetails
+from .realtime_response_usage_output_token_details import RealtimeResponseUsageOutputTokenDetails
+
+__all__ = ["RealtimeResponseUsage"]
+
+
+class RealtimeResponseUsage(BaseModel):
+ input_token_details: Optional[RealtimeResponseUsageInputTokenDetails] = None
+ """Details about the input tokens used in the Response."""
+
+ input_tokens: Optional[int] = None
+ """
+ The number of input tokens used in the Response, including text and audio
+ tokens.
+ """
+
+ output_token_details: Optional[RealtimeResponseUsageOutputTokenDetails] = None
+ """Details about the output tokens used in the Response."""
+
+ output_tokens: Optional[int] = None
+ """
+ The number of output tokens sent in the Response, including text and audio
+ tokens.
+ """
+
+ total_tokens: Optional[int] = None
+ """
+ The total number of tokens in the Response including input and output text and
+ audio tokens.
+ """
src/openai/types/realtime/realtime_response_usage_input_token_details.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeResponseUsageInputTokenDetails"]
+
+
+class RealtimeResponseUsageInputTokenDetails(BaseModel):
+ audio_tokens: Optional[int] = None
+ """The number of audio tokens used in the Response."""
+
+ cached_tokens: Optional[int] = None
+ """The number of cached tokens used in the Response."""
+
+ text_tokens: Optional[int] = None
+ """The number of text tokens used in the Response."""
src/openai/types/realtime/realtime_response_usage_output_token_details.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeResponseUsageOutputTokenDetails"]
+
+
+class RealtimeResponseUsageOutputTokenDetails(BaseModel):
+ audio_tokens: Optional[int] = None
+ """The number of audio tokens used in the Response."""
+
+ text_tokens: Optional[int] = None
+ """The number of text tokens used in the Response."""
src/openai/types/realtime/realtime_server_event.py
@@ -0,0 +1,159 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+from .response_done_event import ResponseDoneEvent
+from .realtime_error_event import RealtimeErrorEvent
+from .mcp_list_tools_failed import McpListToolsFailed
+from .session_created_event import SessionCreatedEvent
+from .session_updated_event import SessionUpdatedEvent
+from .conversation_item_done import ConversationItemDone
+from .response_created_event import ResponseCreatedEvent
+from .conversation_item_added import ConversationItemAdded
+from .mcp_list_tools_completed import McpListToolsCompleted
+from .response_mcp_call_failed import ResponseMcpCallFailed
+from .response_text_done_event import ResponseTextDoneEvent
+from .rate_limits_updated_event import RateLimitsUpdatedEvent
+from .response_audio_done_event import ResponseAudioDoneEvent
+from .response_text_delta_event import ResponseTextDeltaEvent
+from .conversation_created_event import ConversationCreatedEvent
+from .mcp_list_tools_in_progress import McpListToolsInProgress
+from .response_audio_delta_event import ResponseAudioDeltaEvent
+from .response_mcp_call_completed import ResponseMcpCallCompleted
+from .response_mcp_call_in_progress import ResponseMcpCallInProgress
+from .transcription_session_created import TranscriptionSessionCreated
+from .conversation_item_created_event import ConversationItemCreatedEvent
+from .conversation_item_deleted_event import ConversationItemDeletedEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent
+from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent
+from .response_mcp_call_arguments_done import ResponseMcpCallArgumentsDone
+from .response_output_item_added_event import ResponseOutputItemAddedEvent
+from .conversation_item_truncated_event import ConversationItemTruncatedEvent
+from .response_content_part_added_event import ResponseContentPartAddedEvent
+from .response_mcp_call_arguments_delta import ResponseMcpCallArgumentsDelta
+from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent
+from .transcription_session_updated_event import TranscriptionSessionUpdatedEvent
+from .input_audio_buffer_timeout_triggered import InputAudioBufferTimeoutTriggered
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent
+from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent
+from .input_audio_buffer_speech_started_event import InputAudioBufferSpeechStartedEvent
+from .input_audio_buffer_speech_stopped_event import InputAudioBufferSpeechStoppedEvent
+from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
+from .conversation_item_input_audio_transcription_segment import ConversationItemInputAudioTranscriptionSegment
+from .conversation_item_input_audio_transcription_delta_event import ConversationItemInputAudioTranscriptionDeltaEvent
+from .conversation_item_input_audio_transcription_failed_event import ConversationItemInputAudioTranscriptionFailedEvent
+from .conversation_item_input_audio_transcription_completed_event import (
+ ConversationItemInputAudioTranscriptionCompletedEvent,
+)
+
+__all__ = [
+ "RealtimeServerEvent",
+ "ConversationItemRetrieved",
+ "OutputAudioBufferStarted",
+ "OutputAudioBufferStopped",
+ "OutputAudioBufferCleared",
+]
+
+
+class ConversationItemRetrieved(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item: ConversationItem
+ """A single item within a Realtime conversation."""
+
+ type: Literal["conversation.item.retrieved"]
+ """The event type, must be `conversation.item.retrieved`."""
+
+
+class OutputAudioBufferStarted(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ response_id: str
+ """The unique ID of the response that produced the audio."""
+
+ type: Literal["output_audio_buffer.started"]
+ """The event type, must be `output_audio_buffer.started`."""
+
+
+class OutputAudioBufferStopped(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ response_id: str
+ """The unique ID of the response that produced the audio."""
+
+ type: Literal["output_audio_buffer.stopped"]
+ """The event type, must be `output_audio_buffer.stopped`."""
+
+
+class OutputAudioBufferCleared(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ response_id: str
+ """The unique ID of the response that produced the audio."""
+
+ type: Literal["output_audio_buffer.cleared"]
+ """The event type, must be `output_audio_buffer.cleared`."""
+
+
+RealtimeServerEvent: TypeAlias = Annotated[
+ Union[
+ ConversationCreatedEvent,
+ ConversationItemCreatedEvent,
+ ConversationItemDeletedEvent,
+ ConversationItemInputAudioTranscriptionCompletedEvent,
+ ConversationItemInputAudioTranscriptionDeltaEvent,
+ ConversationItemInputAudioTranscriptionFailedEvent,
+ ConversationItemRetrieved,
+ ConversationItemTruncatedEvent,
+ RealtimeErrorEvent,
+ InputAudioBufferClearedEvent,
+ InputAudioBufferCommittedEvent,
+ InputAudioBufferSpeechStartedEvent,
+ InputAudioBufferSpeechStoppedEvent,
+ RateLimitsUpdatedEvent,
+ ResponseAudioDeltaEvent,
+ ResponseAudioDoneEvent,
+ ResponseAudioTranscriptDeltaEvent,
+ ResponseAudioTranscriptDoneEvent,
+ ResponseContentPartAddedEvent,
+ ResponseContentPartDoneEvent,
+ ResponseCreatedEvent,
+ ResponseDoneEvent,
+ ResponseFunctionCallArgumentsDeltaEvent,
+ ResponseFunctionCallArgumentsDoneEvent,
+ ResponseOutputItemAddedEvent,
+ ResponseOutputItemDoneEvent,
+ ResponseTextDeltaEvent,
+ ResponseTextDoneEvent,
+ SessionCreatedEvent,
+ SessionUpdatedEvent,
+ TranscriptionSessionUpdatedEvent,
+ TranscriptionSessionCreated,
+ OutputAudioBufferStarted,
+ OutputAudioBufferStopped,
+ OutputAudioBufferCleared,
+ ConversationItemAdded,
+ ConversationItemDone,
+ InputAudioBufferTimeoutTriggered,
+ ConversationItemInputAudioTranscriptionSegment,
+ McpListToolsInProgress,
+ McpListToolsCompleted,
+ McpListToolsFailed,
+ ResponseMcpCallArgumentsDelta,
+ ResponseMcpCallArgumentsDone,
+ ResponseMcpCallInProgress,
+ ResponseMcpCallCompleted,
+ ResponseMcpCallFailed,
+ ],
+ PropertyInfo(discriminator="type"),
+]
src/openai/types/realtime/realtime_session.py
@@ -0,0 +1,305 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from ..responses.response_prompt import ResponsePrompt
+
+__all__ = [
+ "RealtimeSession",
+ "InputAudioNoiseReduction",
+ "InputAudioTranscription",
+ "Tool",
+ "Tracing",
+ "TracingTracingConfiguration",
+ "TurnDetection",
+]
+
+
+class InputAudioNoiseReduction(BaseModel):
+ type: Optional[Literal["near_field", "far_field"]] = None
+ """Type of noise reduction.
+
+ `near_field` is for close-talking microphones such as headphones, `far_field` is
+ for far-field microphones such as laptop or conference room microphones.
+ """
+
+
+class InputAudioTranscription(BaseModel):
+ language: Optional[str] = None
+ """The language of the input audio.
+
+ Supplying the input language in
+ [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+ format will improve accuracy and latency.
+ """
+
+ model: Optional[str] = None
+ """
+ The model to use for transcription, current options are `gpt-4o-transcribe`,
+ `gpt-4o-mini-transcribe`, and `whisper-1`.
+ """
+
+ prompt: Optional[str] = None
+ """
+ An optional text to guide the model's style or continue a previous audio
+ segment. For `whisper-1`, the
+ [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+ For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+ "expect words related to technology".
+ """
+
+
+class Tool(BaseModel):
+ description: Optional[str] = None
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: Optional[str] = None
+ """The name of the function."""
+
+ parameters: Optional[object] = None
+ """Parameters of the function in JSON Schema."""
+
+ type: Optional[Literal["function"]] = None
+ """The type of the tool, i.e. `function`."""
+
+
+class TracingTracingConfiguration(BaseModel):
+ group_id: Optional[str] = None
+ """
+ The group id to attach to this trace to enable filtering and grouping in the
+ traces dashboard.
+ """
+
+ metadata: Optional[object] = None
+ """
+ The arbitrary metadata to attach to this trace to enable filtering in the traces
+ dashboard.
+ """
+
+ workflow_name: Optional[str] = None
+ """The name of the workflow to attach to this trace.
+
+ This is used to name the trace in the traces dashboard.
+ """
+
+
+Tracing: TypeAlias = Union[Literal["auto"], TracingTracingConfiguration, None]
+
+
+class TurnDetection(BaseModel):
+ create_response: Optional[bool] = None
+ """
+ Whether or not to automatically generate a response when a VAD stop event
+ occurs.
+ """
+
+ eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
+ """Used only for `semantic_vad` mode.
+
+ The eagerness of the model to respond. `low` will wait longer for the user to
+ continue speaking, `high` will respond more quickly. `auto` is the default and
+ is equivalent to `medium`.
+ """
+
+ idle_timeout_ms: Optional[int] = None
+ """
+ Optional idle timeout after which turn detection will auto-timeout when no
+ additional audio is received.
+ """
+
+ interrupt_response: Optional[bool] = None
+ """
+ Whether or not to automatically interrupt any ongoing response with output to
+ the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+ occurs.
+ """
+
+ prefix_padding_ms: Optional[int] = None
+ """Used only for `server_vad` mode.
+
+ Amount of audio to include before the VAD detected speech (in milliseconds).
+ Defaults to 300ms.
+ """
+
+ silence_duration_ms: Optional[int] = None
+ """Used only for `server_vad` mode.
+
+ Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+ With shorter values the model will respond more quickly, but may jump in on
+ short pauses from the user.
+ """
+
+ threshold: Optional[float] = None
+ """Used only for `server_vad` mode.
+
+ Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+ threshold will require louder audio to activate the model, and thus might
+ perform better in noisy environments.
+ """
+
+ type: Optional[Literal["server_vad", "semantic_vad"]] = None
+ """Type of turn detection."""
+
+
+class RealtimeSession(BaseModel):
+ id: Optional[str] = None
+ """Unique identifier for the session that looks like `sess_1234567890abcdef`."""
+
+ expires_at: Optional[int] = None
+ """Expiration timestamp for the session, in seconds since epoch."""
+
+ include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
+ """Additional fields to include in server outputs.
+
+ - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+ transcription.
+ """
+
+ input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+ """The format of input audio.
+
+ Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+ be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+ byte order.
+ """
+
+ input_audio_noise_reduction: Optional[InputAudioNoiseReduction] = None
+ """Configuration for input audio noise reduction.
+
+ This can be set to `null` to turn off. Noise reduction filters audio added to
+ the input audio buffer before it is sent to VAD and the model. Filtering the
+ audio can improve VAD and turn detection accuracy (reducing false positives) and
+ model performance by improving perception of the input audio.
+ """
+
+ input_audio_transcription: Optional[InputAudioTranscription] = None
+ """
+ Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through
+ [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+ and should be treated as guidance of input audio content rather than precisely
+ what the model heard. The client can optionally set the language and prompt for
+ transcription, these offer additional guidance to the transcription service.
+ """
+
+ instructions: Optional[str] = None
+ """The default system instructions (i.e.
+
+ system message) prepended to model calls. This field allows the client to guide
+ the model on desired responses. The model can be instructed on response content
+ and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+ good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+ into your voice", "laugh frequently"). The instructions are not guaranteed to be
+ followed by the model, but they provide guidance to the model on the desired
+ behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+ """
+
+ max_response_output_tokens: Union[int, Literal["inf"], None] = None
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+ """
+
+ modalities: Optional[List[Literal["text", "audio"]]] = None
+ """The set of modalities the model can respond with.
+
+ To disable audio, set this to ["text"].
+ """
+
+ model: Optional[
+ Literal[
+ "gpt-4o-realtime-preview",
+ "gpt-4o-realtime-preview-2024-10-01",
+ "gpt-4o-realtime-preview-2024-12-17",
+ "gpt-4o-realtime-preview-2025-06-03",
+ "gpt-4o-mini-realtime-preview",
+ "gpt-4o-mini-realtime-preview-2024-12-17",
+ ]
+ ] = None
+ """The Realtime model used for this session."""
+
+ object: Optional[Literal["realtime.session"]] = None
+ """The object type. Always `realtime.session`."""
+
+ output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+ """The format of output audio.
+
+ Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+ sampled at a rate of 24kHz.
+ """
+
+ prompt: Optional[ResponsePrompt] = None
+ """Reference to a prompt template and its variables.
+
+ [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+ """
+
+ speed: Optional[float] = None
+ """The speed of the model's spoken response.
+
+ 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+ This value can only be changed in between model turns, not while a response is
+ in progress.
+ """
+
+ temperature: Optional[float] = None
+ """Sampling temperature for the model, limited to [0.6, 1.2].
+
+ For audio models a temperature of 0.8 is highly recommended for best
+ performance.
+ """
+
+ tool_choice: Optional[str] = None
+ """How the model chooses tools.
+
+ Options are `auto`, `none`, `required`, or specify a function.
+ """
+
+ tools: Optional[List[Tool]] = None
+ """Tools (functions) available to the model."""
+
+ tracing: Optional[Tracing] = None
+ """Configuration options for tracing.
+
+ Set to null to disable tracing. Once tracing is enabled for a session, the
+ configuration cannot be modified.
+
+ `auto` will create a trace for the session with default values for the workflow
+ name, group id, and metadata.
+ """
+
+ turn_detection: Optional[TurnDetection] = None
+ """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+ This can be set to `null` to turn off, in which case the client must manually
+ trigger model response. Server VAD means that the model will detect the start
+ and end of speech based on audio volume and respond at the end of user speech.
+ Semantic VAD is more advanced and uses a turn detection model (in conjunction
+ with VAD) to semantically estimate whether the user has finished speaking, then
+ dynamically sets a timeout based on this probability. For example, if user audio
+ trails off with "uhhm", the model will score a low probability of turn end and
+ wait longer for the user to continue speaking. This can be useful for more
+ natural conversations, but may have a higher latency.
+ """
+
+ voice: Union[
+ str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"], None
+ ] = None
+ """The voice the model uses to respond.
+
+ Voice cannot be changed during the session once the model has responded with
+ audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+ `coral`, `echo`, `sage`, `shimmer`, and `verse`.
+ """
src/openai/types/realtime/realtime_session_create_request.py
@@ -0,0 +1,116 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_truncation import RealtimeTruncation
+from .realtime_audio_config import RealtimeAudioConfig
+from .realtime_tools_config import RealtimeToolsConfig
+from .realtime_tracing_config import RealtimeTracingConfig
+from ..responses.response_prompt import ResponsePrompt
+from .realtime_tool_choice_config import RealtimeToolChoiceConfig
+from .realtime_client_secret_config import RealtimeClientSecretConfig
+
+__all__ = ["RealtimeSessionCreateRequest"]
+
+
+class RealtimeSessionCreateRequest(BaseModel):
+ model: Union[
+ str,
+ Literal[
+ "gpt-4o-realtime",
+ "gpt-4o-mini-realtime",
+ "gpt-4o-realtime-preview",
+ "gpt-4o-realtime-preview-2024-10-01",
+ "gpt-4o-realtime-preview-2024-12-17",
+ "gpt-4o-realtime-preview-2025-06-03",
+ "gpt-4o-mini-realtime-preview",
+ "gpt-4o-mini-realtime-preview-2024-12-17",
+ ],
+ ]
+ """The Realtime model used for this session."""
+
+ type: Literal["realtime"]
+ """The type of session to create. Always `realtime` for the Realtime API."""
+
+ audio: Optional[RealtimeAudioConfig] = None
+ """Configuration for input and output audio."""
+
+ client_secret: Optional[RealtimeClientSecretConfig] = None
+ """Configuration options for the generated client secret."""
+
+ include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
+ """Additional fields to include in server outputs.
+
+ - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+ transcription.
+ """
+
+ instructions: Optional[str] = None
+ """The default system instructions (i.e.
+
+ system message) prepended to model calls. This field allows the client to guide
+ the model on desired responses. The model can be instructed on response content
+ and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+ good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+ into your voice", "laugh frequently"). The instructions are not guaranteed to be
+ followed by the model, but they provide guidance to the model on the desired
+ behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+ """
+
+ max_output_tokens: Union[int, Literal["inf"], None] = None
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+ """
+
+ output_modalities: Optional[List[Literal["text", "audio"]]] = None
+ """The set of modalities the model can respond with.
+
+ To disable audio, set this to ["text"].
+ """
+
+ prompt: Optional[ResponsePrompt] = None
+ """Reference to a prompt template and its variables.
+
+ [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+ """
+
+ temperature: Optional[float] = None
+ """Sampling temperature for the model, limited to [0.6, 1.2].
+
+ For audio models a temperature of 0.8 is highly recommended for best
+ performance.
+ """
+
+ tool_choice: Optional[RealtimeToolChoiceConfig] = None
+ """How the model chooses tools.
+
+ Provide one of the string modes or force a specific function/MCP tool.
+ """
+
+ tools: Optional[RealtimeToolsConfig] = None
+ """Tools available to the model."""
+
+ tracing: Optional[RealtimeTracingConfig] = None
+ """Configuration options for tracing.
+
+ Set to null to disable tracing. Once tracing is enabled for a session, the
+ configuration cannot be modified.
+
+ `auto` will create a trace for the session with default values for the workflow
+ name, group id, and metadata.
+ """
+
+ truncation: Optional[RealtimeTruncation] = None
+ """
+ Controls how the realtime conversation is truncated prior to model inference.
+ The default is `auto`. When set to `retention_ratio`, the server retains a
+ fraction of the conversation tokens prior to the instructions.
+ """
src/openai/types/realtime/realtime_session_create_request_param.py
@@ -0,0 +1,119 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .realtime_truncation_param import RealtimeTruncationParam
+from .realtime_audio_config_param import RealtimeAudioConfigParam
+from .realtime_tools_config_param import RealtimeToolsConfigParam
+from .realtime_tracing_config_param import RealtimeTracingConfigParam
+from ..responses.response_prompt_param import ResponsePromptParam
+from .realtime_tool_choice_config_param import RealtimeToolChoiceConfigParam
+from .realtime_client_secret_config_param import RealtimeClientSecretConfigParam
+
+__all__ = ["RealtimeSessionCreateRequestParam"]
+
+
+class RealtimeSessionCreateRequestParam(TypedDict, total=False):
+ model: Required[
+ Union[
+ str,
+ Literal[
+ "gpt-4o-realtime",
+ "gpt-4o-mini-realtime",
+ "gpt-4o-realtime-preview",
+ "gpt-4o-realtime-preview-2024-10-01",
+ "gpt-4o-realtime-preview-2024-12-17",
+ "gpt-4o-realtime-preview-2025-06-03",
+ "gpt-4o-mini-realtime-preview",
+ "gpt-4o-mini-realtime-preview-2024-12-17",
+ ],
+ ]
+ ]
+ """The Realtime model used for this session."""
+
+ type: Required[Literal["realtime"]]
+ """The type of session to create. Always `realtime` for the Realtime API."""
+
+ audio: RealtimeAudioConfigParam
+ """Configuration for input and output audio."""
+
+ client_secret: RealtimeClientSecretConfigParam
+ """Configuration options for the generated client secret."""
+
+ include: List[Literal["item.input_audio_transcription.logprobs"]]
+ """Additional fields to include in server outputs.
+
+ - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+ transcription.
+ """
+
+ instructions: str
+ """The default system instructions (i.e.
+
+ system message) prepended to model calls. This field allows the client to guide
+ the model on desired responses. The model can be instructed on response content
+ and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+ good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+ into your voice", "laugh frequently"). The instructions are not guaranteed to be
+ followed by the model, but they provide guidance to the model on the desired
+ behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+ """
+
+ max_output_tokens: Union[int, Literal["inf"]]
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+ """
+
+ output_modalities: List[Literal["text", "audio"]]
+ """The set of modalities the model can respond with.
+
+ To disable audio, set this to ["text"].
+ """
+
+ prompt: Optional[ResponsePromptParam]
+ """Reference to a prompt template and its variables.
+
+ [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+ """
+
+ temperature: float
+ """Sampling temperature for the model, limited to [0.6, 1.2].
+
+ For audio models a temperature of 0.8 is highly recommended for best
+ performance.
+ """
+
+ tool_choice: RealtimeToolChoiceConfigParam
+ """How the model chooses tools.
+
+ Provide one of the string modes or force a specific function/MCP tool.
+ """
+
+ tools: RealtimeToolsConfigParam
+ """Tools available to the model."""
+
+ tracing: Optional[RealtimeTracingConfigParam]
+ """Configuration options for tracing.
+
+ Set to null to disable tracing. Once tracing is enabled for a session, the
+ configuration cannot be modified.
+
+ `auto` will create a trace for the session with default values for the workflow
+ name, group id, and metadata.
+ """
+
+ truncation: RealtimeTruncationParam
+ """
+ Controls how the realtime conversation is truncated prior to model inference.
+ The default is `auto`. When set to `retention_ratio`, the server retains a
+ fraction of the conversation tokens prior to the instructions.
+ """
src/openai/types/realtime/realtime_session_create_response.py
@@ -0,0 +1,222 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+
+__all__ = [
+ "RealtimeSessionCreateResponse",
+ "Audio",
+ "AudioInput",
+ "AudioInputNoiseReduction",
+ "AudioInputTranscription",
+ "AudioInputTurnDetection",
+ "AudioOutput",
+ "Tool",
+ "Tracing",
+ "TracingTracingConfiguration",
+ "TurnDetection",
+]
+
+
+class AudioInputNoiseReduction(BaseModel):
+ type: Optional[Literal["near_field", "far_field"]] = None
+
+
+class AudioInputTranscription(BaseModel):
+ language: Optional[str] = None
+ """The language of the input audio."""
+
+ model: Optional[str] = None
+ """The model to use for transcription."""
+
+ prompt: Optional[str] = None
+ """Optional text to guide the model's style or continue a previous audio segment."""
+
+
+class AudioInputTurnDetection(BaseModel):
+ prefix_padding_ms: Optional[int] = None
+
+ silence_duration_ms: Optional[int] = None
+
+ threshold: Optional[float] = None
+
+ type: Optional[str] = None
+ """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class AudioInput(BaseModel):
+ format: Optional[str] = None
+ """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ noise_reduction: Optional[AudioInputNoiseReduction] = None
+ """Configuration for input audio noise reduction."""
+
+ transcription: Optional[AudioInputTranscription] = None
+ """Configuration for input audio transcription."""
+
+ turn_detection: Optional[AudioInputTurnDetection] = None
+ """Configuration for turn detection."""
+
+
+class AudioOutput(BaseModel):
+ format: Optional[str] = None
+ """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ speed: Optional[float] = None
+
+ voice: Union[
+ str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"], None
+ ] = None
+
+
+class Audio(BaseModel):
+ input: Optional[AudioInput] = None
+
+ output: Optional[AudioOutput] = None
+
+
+class Tool(BaseModel):
+ description: Optional[str] = None
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: Optional[str] = None
+ """The name of the function."""
+
+ parameters: Optional[object] = None
+ """Parameters of the function in JSON Schema."""
+
+ type: Optional[Literal["function"]] = None
+ """The type of the tool, i.e. `function`."""
+
+
+class TracingTracingConfiguration(BaseModel):
+ group_id: Optional[str] = None
+ """
+ The group id to attach to this trace to enable filtering and grouping in the
+ traces dashboard.
+ """
+
+ metadata: Optional[object] = None
+ """
+ The arbitrary metadata to attach to this trace to enable filtering in the traces
+ dashboard.
+ """
+
+ workflow_name: Optional[str] = None
+ """The name of the workflow to attach to this trace.
+
+ This is used to name the trace in the traces dashboard.
+ """
+
+
+Tracing: TypeAlias = Union[Literal["auto"], TracingTracingConfiguration]
+
+
+class TurnDetection(BaseModel):
+ prefix_padding_ms: Optional[int] = None
+ """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+ Defaults to 300ms.
+ """
+
+ silence_duration_ms: Optional[int] = None
+ """Duration of silence to detect speech stop (in milliseconds).
+
+ Defaults to 500ms. With shorter values the model will respond more quickly, but
+ may jump in on short pauses from the user.
+ """
+
+ threshold: Optional[float] = None
+ """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+ A higher threshold will require louder audio to activate the model, and thus
+ might perform better in noisy environments.
+ """
+
+ type: Optional[str] = None
+ """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class RealtimeSessionCreateResponse(BaseModel):
+ id: Optional[str] = None
+ """Unique identifier for the session that looks like `sess_1234567890abcdef`."""
+
+ audio: Optional[Audio] = None
+ """Configuration for input and output audio for the session."""
+
+ expires_at: Optional[int] = None
+ """Expiration timestamp for the session, in seconds since epoch."""
+
+ include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
+ """Additional fields to include in server outputs.
+
+ - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+ transcription.
+ """
+
+ instructions: Optional[str] = None
+ """The default system instructions (i.e.
+
+ system message) prepended to model calls. This field allows the client to guide
+ the model on desired responses. The model can be instructed on response content
+ and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+ good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+ into your voice", "laugh frequently"). The instructions are not guaranteed to be
+ followed by the model, but they provide guidance to the model on the desired
+ behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+ """
+
+ max_output_tokens: Union[int, Literal["inf"], None] = None
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+ """
+
+ model: Optional[str] = None
+ """The Realtime model used for this session."""
+
+ object: Optional[str] = None
+ """The object type. Always `realtime.session`."""
+
+ output_modalities: Optional[List[Literal["text", "audio"]]] = None
+ """The set of modalities the model can respond with.
+
+ To disable audio, set this to ["text"].
+ """
+
+ tool_choice: Optional[str] = None
+ """How the model chooses tools.
+
+ Options are `auto`, `none`, `required`, or specify a function.
+ """
+
+ tools: Optional[List[Tool]] = None
+ """Tools (functions) available to the model."""
+
+ tracing: Optional[Tracing] = None
+ """Configuration options for tracing.
+
+ Set to null to disable tracing. Once tracing is enabled for a session, the
+ configuration cannot be modified.
+
+ `auto` will create a trace for the session with default values for the workflow
+ name, group id, and metadata.
+ """
+
+ turn_detection: Optional[TurnDetection] = None
+ """Configuration for turn detection.
+
+ Can be set to `null` to turn off. Server VAD means that the model will detect
+ the start and end of speech based on audio volume and respond at the end of user
+ speech.
+ """
src/openai/types/realtime/realtime_tool_choice_config.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from ..responses.tool_choice_mcp import ToolChoiceMcp
+from ..responses.tool_choice_options import ToolChoiceOptions
+from ..responses.tool_choice_function import ToolChoiceFunction
+
+__all__ = ["RealtimeToolChoiceConfig"]
+
+RealtimeToolChoiceConfig: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunction, ToolChoiceMcp]
src/openai/types/realtime/realtime_tool_choice_config_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from ..responses.tool_choice_options import ToolChoiceOptions
+from ..responses.tool_choice_mcp_param import ToolChoiceMcpParam
+from ..responses.tool_choice_function_param import ToolChoiceFunctionParam
+
+__all__ = ["RealtimeToolChoiceConfigParam"]
+
+RealtimeToolChoiceConfigParam: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunctionParam, ToolChoiceMcpParam]
src/openai/types/realtime/realtime_tools_config.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import TypeAlias
+
+from .realtime_tools_config_union import RealtimeToolsConfigUnion
+
+__all__ = ["RealtimeToolsConfig"]
+
+RealtimeToolsConfig: TypeAlias = List[RealtimeToolsConfigUnion]
src/openai/types/realtime/realtime_tools_config_param.py
@@ -0,0 +1,158 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+ "RealtimeToolsConfigParam",
+ "RealtimeToolsConfigUnionParam",
+ "Function",
+ "Mcp",
+ "McpAllowedTools",
+ "McpAllowedToolsMcpToolFilter",
+ "McpRequireApproval",
+ "McpRequireApprovalMcpToolApprovalFilter",
+ "McpRequireApprovalMcpToolApprovalFilterAlways",
+ "McpRequireApprovalMcpToolApprovalFilterNever",
+]
+
+
+class Function(TypedDict, total=False):
+ description: str
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: str
+ """The name of the function."""
+
+ parameters: object
+ """Parameters of the function in JSON Schema."""
+
+ type: Literal["function"]
+ """The type of the tool, i.e. `function`."""
+
+
+class McpAllowedToolsMcpToolFilter(TypedDict, total=False):
+ read_only: bool
+ """Indicates whether or not a tool modifies data or is read-only.
+
+ If an MCP server is
+ [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+ it will match this filter.
+ """
+
+ tool_names: List[str]
+ """List of allowed tool names."""
+
+
+McpAllowedTools: TypeAlias = Union[List[str], McpAllowedToolsMcpToolFilter]
+
+
+class McpRequireApprovalMcpToolApprovalFilterAlways(TypedDict, total=False):
+ read_only: bool
+ """Indicates whether or not a tool modifies data or is read-only.
+
+ If an MCP server is
+ [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+ it will match this filter.
+ """
+
+ tool_names: List[str]
+ """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilterNever(TypedDict, total=False):
+ read_only: bool
+ """Indicates whether or not a tool modifies data or is read-only.
+
+ If an MCP server is
+ [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+ it will match this filter.
+ """
+
+ tool_names: List[str]
+ """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilter(TypedDict, total=False):
+ always: McpRequireApprovalMcpToolApprovalFilterAlways
+ """A filter object to specify which tools are allowed."""
+
+ never: McpRequireApprovalMcpToolApprovalFilterNever
+ """A filter object to specify which tools are allowed."""
+
+
+McpRequireApproval: TypeAlias = Union[McpRequireApprovalMcpToolApprovalFilter, Literal["always", "never"]]
+
+
+class Mcp(TypedDict, total=False):
+ server_label: Required[str]
+ """A label for this MCP server, used to identify it in tool calls."""
+
+ type: Required[Literal["mcp"]]
+ """The type of the MCP tool. Always `mcp`."""
+
+ allowed_tools: Optional[McpAllowedTools]
+ """List of allowed tool names or a filter object."""
+
+ authorization: str
+ """
+ An OAuth access token that can be used with a remote MCP server, either with a
+ custom MCP server URL or a service connector. Your application must handle the
+ OAuth authorization flow and provide the token here.
+ """
+
+ connector_id: Literal[
+ "connector_dropbox",
+ "connector_gmail",
+ "connector_googlecalendar",
+ "connector_googledrive",
+ "connector_microsoftteams",
+ "connector_outlookcalendar",
+ "connector_outlookemail",
+ "connector_sharepoint",
+ ]
+ """Identifier for service connectors, like those available in ChatGPT.
+
+ One of `server_url` or `connector_id` must be provided. Learn more about service
+ connectors
+ [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+
+ Currently supported `connector_id` values are:
+
+ - Dropbox: `connector_dropbox`
+ - Gmail: `connector_gmail`
+ - Google Calendar: `connector_googlecalendar`
+ - Google Drive: `connector_googledrive`
+ - Microsoft Teams: `connector_microsoftteams`
+ - Outlook Calendar: `connector_outlookcalendar`
+ - Outlook Email: `connector_outlookemail`
+ - SharePoint: `connector_sharepoint`
+ """
+
+ headers: Optional[Dict[str, str]]
+ """Optional HTTP headers to send to the MCP server.
+
+ Use for authentication or other purposes.
+ """
+
+ require_approval: Optional[McpRequireApproval]
+ """Specify which of the MCP server's tools require approval."""
+
+ server_description: str
+ """Optional description of the MCP server, used to provide more context."""
+
+ server_url: str
+ """The URL for the MCP server.
+
+ One of `server_url` or `connector_id` must be provided.
+ """
+
+
+RealtimeToolsConfigUnionParam: TypeAlias = Union[Function, Mcp]
+
+RealtimeToolsConfigParam: TypeAlias = List[RealtimeToolsConfigUnionParam]
src/openai/types/realtime/realtime_tools_config_union.py
@@ -0,0 +1,158 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+ "RealtimeToolsConfigUnion",
+ "Function",
+ "Mcp",
+ "McpAllowedTools",
+ "McpAllowedToolsMcpToolFilter",
+ "McpRequireApproval",
+ "McpRequireApprovalMcpToolApprovalFilter",
+ "McpRequireApprovalMcpToolApprovalFilterAlways",
+ "McpRequireApprovalMcpToolApprovalFilterNever",
+]
+
+
+class Function(BaseModel):
+ description: Optional[str] = None
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: Optional[str] = None
+ """The name of the function."""
+
+ parameters: Optional[object] = None
+ """Parameters of the function in JSON Schema."""
+
+ type: Optional[Literal["function"]] = None
+ """The type of the tool, i.e. `function`."""
+
+
+class McpAllowedToolsMcpToolFilter(BaseModel):
+ read_only: Optional[bool] = None
+ """Indicates whether or not a tool modifies data or is read-only.
+
+ If an MCP server is
+ [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+ it will match this filter.
+ """
+
+ tool_names: Optional[List[str]] = None
+ """List of allowed tool names."""
+
+
+McpAllowedTools: TypeAlias = Union[List[str], McpAllowedToolsMcpToolFilter, None]
+
+
+class McpRequireApprovalMcpToolApprovalFilterAlways(BaseModel):
+ read_only: Optional[bool] = None
+ """Indicates whether or not a tool modifies data or is read-only.
+
+ If an MCP server is
+ [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+ it will match this filter.
+ """
+
+ tool_names: Optional[List[str]] = None
+ """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilterNever(BaseModel):
+ read_only: Optional[bool] = None
+ """Indicates whether or not a tool modifies data or is read-only.
+
+ If an MCP server is
+ [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+ it will match this filter.
+ """
+
+ tool_names: Optional[List[str]] = None
+ """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilter(BaseModel):
+ always: Optional[McpRequireApprovalMcpToolApprovalFilterAlways] = None
+ """A filter object to specify which tools are allowed."""
+
+ never: Optional[McpRequireApprovalMcpToolApprovalFilterNever] = None
+ """A filter object to specify which tools are allowed."""
+
+
+McpRequireApproval: TypeAlias = Union[McpRequireApprovalMcpToolApprovalFilter, Literal["always", "never"], None]
+
+
+class Mcp(BaseModel):
+ server_label: str
+ """A label for this MCP server, used to identify it in tool calls."""
+
+ type: Literal["mcp"]
+ """The type of the MCP tool. Always `mcp`."""
+
+ allowed_tools: Optional[McpAllowedTools] = None
+ """List of allowed tool names or a filter object."""
+
+ authorization: Optional[str] = None
+ """
+ An OAuth access token that can be used with a remote MCP server, either with a
+ custom MCP server URL or a service connector. Your application must handle the
+ OAuth authorization flow and provide the token here.
+ """
+
+ connector_id: Optional[
+ Literal[
+ "connector_dropbox",
+ "connector_gmail",
+ "connector_googlecalendar",
+ "connector_googledrive",
+ "connector_microsoftteams",
+ "connector_outlookcalendar",
+ "connector_outlookemail",
+ "connector_sharepoint",
+ ]
+ ] = None
+ """Identifier for service connectors, like those available in ChatGPT.
+
+ One of `server_url` or `connector_id` must be provided. Learn more about service
+ connectors
+ [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+
+ Currently supported `connector_id` values are:
+
+ - Dropbox: `connector_dropbox`
+ - Gmail: `connector_gmail`
+ - Google Calendar: `connector_googlecalendar`
+ - Google Drive: `connector_googledrive`
+ - Microsoft Teams: `connector_microsoftteams`
+ - Outlook Calendar: `connector_outlookcalendar`
+ - Outlook Email: `connector_outlookemail`
+ - SharePoint: `connector_sharepoint`
+ """
+
+ headers: Optional[Dict[str, str]] = None
+ """Optional HTTP headers to send to the MCP server.
+
+ Use for authentication or other purposes.
+ """
+
+ require_approval: Optional[McpRequireApproval] = None
+ """Specify which of the MCP server's tools require approval."""
+
+ server_description: Optional[str] = None
+ """Optional description of the MCP server, used to provide more context."""
+
+ server_url: Optional[str] = None
+ """The URL for the MCP server.
+
+ One of `server_url` or `connector_id` must be provided.
+ """
+
+
+RealtimeToolsConfigUnion: TypeAlias = Annotated[Union[Function, Mcp], PropertyInfo(discriminator="type")]
src/openai/types/realtime/realtime_tools_config_union_param.py
@@ -0,0 +1,155 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+ "RealtimeToolsConfigUnionParam",
+ "Function",
+ "Mcp",
+ "McpAllowedTools",
+ "McpAllowedToolsMcpToolFilter",
+ "McpRequireApproval",
+ "McpRequireApprovalMcpToolApprovalFilter",
+ "McpRequireApprovalMcpToolApprovalFilterAlways",
+ "McpRequireApprovalMcpToolApprovalFilterNever",
+]
+
+
+class Function(TypedDict, total=False):
+ description: str
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: str
+ """The name of the function."""
+
+ parameters: object
+ """Parameters of the function in JSON Schema."""
+
+ type: Literal["function"]
+ """The type of the tool, i.e. `function`."""
+
+
+class McpAllowedToolsMcpToolFilter(TypedDict, total=False):
+ read_only: bool
+ """Indicates whether or not a tool modifies data or is read-only.
+
+ If an MCP server is
+ [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+ it will match this filter.
+ """
+
+ tool_names: List[str]
+ """List of allowed tool names."""
+
+
+McpAllowedTools: TypeAlias = Union[List[str], McpAllowedToolsMcpToolFilter]
+
+
+class McpRequireApprovalMcpToolApprovalFilterAlways(TypedDict, total=False):
+ read_only: bool
+ """Indicates whether or not a tool modifies data or is read-only.
+
+ If an MCP server is
+ [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+ it will match this filter.
+ """
+
+ tool_names: List[str]
+ """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilterNever(TypedDict, total=False):
+ read_only: bool
+ """Indicates whether or not a tool modifies data or is read-only.
+
+ If an MCP server is
+ [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+ it will match this filter.
+ """
+
+ tool_names: List[str]
+ """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilter(TypedDict, total=False):
+ always: McpRequireApprovalMcpToolApprovalFilterAlways
+ """A filter object to specify which tools are allowed."""
+
+ never: McpRequireApprovalMcpToolApprovalFilterNever
+ """A filter object to specify which tools are allowed."""
+
+
+McpRequireApproval: TypeAlias = Union[McpRequireApprovalMcpToolApprovalFilter, Literal["always", "never"]]
+
+
+class Mcp(TypedDict, total=False):
+ server_label: Required[str]
+ """A label for this MCP server, used to identify it in tool calls."""
+
+ type: Required[Literal["mcp"]]
+ """The type of the MCP tool. Always `mcp`."""
+
+ allowed_tools: Optional[McpAllowedTools]
+ """List of allowed tool names or a filter object."""
+
+ authorization: str
+ """
+ An OAuth access token that can be used with a remote MCP server, either with a
+ custom MCP server URL or a service connector. Your application must handle the
+ OAuth authorization flow and provide the token here.
+ """
+
+ connector_id: Literal[
+ "connector_dropbox",
+ "connector_gmail",
+ "connector_googlecalendar",
+ "connector_googledrive",
+ "connector_microsoftteams",
+ "connector_outlookcalendar",
+ "connector_outlookemail",
+ "connector_sharepoint",
+ ]
+ """Identifier for service connectors, like those available in ChatGPT.
+
+ One of `server_url` or `connector_id` must be provided. Learn more about service
+ connectors
+ [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+
+ Currently supported `connector_id` values are:
+
+ - Dropbox: `connector_dropbox`
+ - Gmail: `connector_gmail`
+ - Google Calendar: `connector_googlecalendar`
+ - Google Drive: `connector_googledrive`
+ - Microsoft Teams: `connector_microsoftteams`
+ - Outlook Calendar: `connector_outlookcalendar`
+ - Outlook Email: `connector_outlookemail`
+ - SharePoint: `connector_sharepoint`
+ """
+
+ headers: Optional[Dict[str, str]]
+ """Optional HTTP headers to send to the MCP server.
+
+ Use for authentication or other purposes.
+ """
+
+ require_approval: Optional[McpRequireApproval]
+ """Specify which of the MCP server's tools require approval."""
+
+ server_description: str
+ """Optional description of the MCP server, used to provide more context."""
+
+ server_url: str
+ """The URL for the MCP server.
+
+ One of `server_url` or `connector_id` must be provided.
+ """
+
+
+RealtimeToolsConfigUnionParam: TypeAlias = Union[Function, Mcp]
src/openai/types/realtime/realtime_tracing_config.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeTracingConfig", "TracingConfiguration"]
+
+
+class TracingConfiguration(BaseModel):
+ group_id: Optional[str] = None
+ """
+ The group id to attach to this trace to enable filtering and grouping in the
+ traces dashboard.
+ """
+
+ metadata: Optional[object] = None
+ """
+ The arbitrary metadata to attach to this trace to enable filtering in the traces
+ dashboard.
+ """
+
+ workflow_name: Optional[str] = None
+ """The name of the workflow to attach to this trace.
+
+ This is used to name the trace in the traces dashboard.
+ """
+
+
+RealtimeTracingConfig: TypeAlias = Union[Literal["auto"], TracingConfiguration, None]
src/openai/types/realtime/realtime_tracing_config_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias, TypedDict
+
+__all__ = ["RealtimeTracingConfigParam", "TracingConfiguration"]
+
+
+class TracingConfiguration(TypedDict, total=False):
+ group_id: str
+ """
+ The group id to attach to this trace to enable filtering and grouping in the
+ traces dashboard.
+ """
+
+ metadata: object
+ """
+ The arbitrary metadata to attach to this trace to enable filtering in the traces
+ dashboard.
+ """
+
+ workflow_name: str
+ """The name of the workflow to attach to this trace.
+
+ This is used to name the trace in the traces dashboard.
+ """
+
+
+RealtimeTracingConfigParam: TypeAlias = Union[Literal["auto"], TracingConfiguration]
src/openai/types/realtime/realtime_transcription_session_create_request.py
@@ -0,0 +1,128 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = [
+ "RealtimeTranscriptionSessionCreateRequest",
+ "InputAudioNoiseReduction",
+ "InputAudioTranscription",
+ "TurnDetection",
+]
+
+
+class InputAudioNoiseReduction(BaseModel):
+ type: Optional[Literal["near_field", "far_field"]] = None
+ """Type of noise reduction.
+
+ `near_field` is for close-talking microphones such as headphones, `far_field` is
+ for far-field microphones such as laptop or conference room microphones.
+ """
+
+
+class InputAudioTranscription(BaseModel):
+ language: Optional[str] = None
+ """The language of the input audio.
+
+ Supplying the input language in
+ [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+ format will improve accuracy and latency.
+ """
+
+ model: Optional[Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"]] = None
+ """
+ The model to use for transcription, current options are `gpt-4o-transcribe`,
+ `gpt-4o-mini-transcribe`, and `whisper-1`.
+ """
+
+ prompt: Optional[str] = None
+ """
+ An optional text to guide the model's style or continue a previous audio
+ segment. For `whisper-1`, the
+ [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+ For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+ "expect words related to technology".
+ """
+
+
+class TurnDetection(BaseModel):
+ prefix_padding_ms: Optional[int] = None
+ """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+ Defaults to 300ms.
+ """
+
+ silence_duration_ms: Optional[int] = None
+ """Duration of silence to detect speech stop (in milliseconds).
+
+ Defaults to 500ms. With shorter values the model will respond more quickly, but
+ may jump in on short pauses from the user.
+ """
+
+ threshold: Optional[float] = None
+ """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+ A higher threshold will require louder audio to activate the model, and thus
+ might perform better in noisy environments.
+ """
+
+ type: Optional[Literal["server_vad"]] = None
+ """Type of turn detection.
+
+ Only `server_vad` is currently supported for transcription sessions.
+ """
+
+
+class RealtimeTranscriptionSessionCreateRequest(BaseModel):
+ model: Union[str, Literal["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]]
+ """ID of the model to use.
+
+ The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, and `whisper-1`
+ (which is powered by our open source Whisper V2 model).
+ """
+
+ type: Literal["transcription"]
+ """The type of session to create.
+
+ Always `transcription` for transcription sessions.
+ """
+
+ include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
+ """The set of items to include in the transcription. Current available items are:
+
+ - `item.input_audio_transcription.logprobs`
+ """
+
+ input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+ """The format of input audio.
+
+ Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+ be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+ byte order.
+ """
+
+ input_audio_noise_reduction: Optional[InputAudioNoiseReduction] = None
+ """Configuration for input audio noise reduction.
+
+ This can be set to `null` to turn off. Noise reduction filters audio added to
+ the input audio buffer before it is sent to VAD and the model. Filtering the
+ audio can improve VAD and turn detection accuracy (reducing false positives) and
+ model performance by improving perception of the input audio.
+ """
+
+ input_audio_transcription: Optional[InputAudioTranscription] = None
+ """Configuration for input audio transcription.
+
+ The client can optionally set the language and prompt for transcription, these
+ offer additional guidance to the transcription service.
+ """
+
+ turn_detection: Optional[TurnDetection] = None
+ """Configuration for turn detection.
+
+ Can be set to `null` to turn off. Server VAD means that the model will detect
+ the start and end of speech based on audio volume and respond at the end of user
+ speech.
+ """
src/openai/types/realtime/realtime_transcription_session_create_request_param.py
@@ -0,0 +1,128 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = [
+ "RealtimeTranscriptionSessionCreateRequestParam",
+ "InputAudioNoiseReduction",
+ "InputAudioTranscription",
+ "TurnDetection",
+]
+
+
+class InputAudioNoiseReduction(TypedDict, total=False):
+ type: Literal["near_field", "far_field"]
+ """Type of noise reduction.
+
+ `near_field` is for close-talking microphones such as headphones, `far_field` is
+ for far-field microphones such as laptop or conference room microphones.
+ """
+
+
+class InputAudioTranscription(TypedDict, total=False):
+ language: str
+ """The language of the input audio.
+
+ Supplying the input language in
+ [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+ format will improve accuracy and latency.
+ """
+
+ model: Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"]
+ """
+ The model to use for transcription, current options are `gpt-4o-transcribe`,
+ `gpt-4o-mini-transcribe`, and `whisper-1`.
+ """
+
+ prompt: str
+ """
+ An optional text to guide the model's style or continue a previous audio
+ segment. For `whisper-1`, the
+ [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+ For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+ "expect words related to technology".
+ """
+
+
+class TurnDetection(TypedDict, total=False):
+ prefix_padding_ms: int
+ """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+ Defaults to 300ms.
+ """
+
+ silence_duration_ms: int
+ """Duration of silence to detect speech stop (in milliseconds).
+
+ Defaults to 500ms. With shorter values the model will respond more quickly, but
+ may jump in on short pauses from the user.
+ """
+
+ threshold: float
+ """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+ A higher threshold will require louder audio to activate the model, and thus
+ might perform better in noisy environments.
+ """
+
+ type: Literal["server_vad"]
+ """Type of turn detection.
+
+ Only `server_vad` is currently supported for transcription sessions.
+ """
+
+
+class RealtimeTranscriptionSessionCreateRequestParam(TypedDict, total=False):
+ model: Required[Union[str, Literal["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]]]
+ """ID of the model to use.
+
+ The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, and `whisper-1`
+ (which is powered by our open source Whisper V2 model).
+ """
+
+ type: Required[Literal["transcription"]]
+ """The type of session to create.
+
+ Always `transcription` for transcription sessions.
+ """
+
+ include: List[Literal["item.input_audio_transcription.logprobs"]]
+ """The set of items to include in the transcription. Current available items are:
+
+ - `item.input_audio_transcription.logprobs`
+ """
+
+ input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+ """The format of input audio.
+
+ Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+ be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+ byte order.
+ """
+
+ input_audio_noise_reduction: InputAudioNoiseReduction
+ """Configuration for input audio noise reduction.
+
+ This can be set to `null` to turn off. Noise reduction filters audio added to
+ the input audio buffer before it is sent to VAD and the model. Filtering the
+ audio can improve VAD and turn detection accuracy (reducing false positives) and
+ model performance by improving perception of the input audio.
+ """
+
+ input_audio_transcription: InputAudioTranscription
+ """Configuration for input audio transcription.
+
+ The client can optionally set the language and prompt for transcription, these
+ offer additional guidance to the transcription service.
+ """
+
+ turn_detection: TurnDetection
+ """Configuration for turn detection.
+
+ Can be set to `null` to turn off. Server VAD means that the model will detect
+ the start and end of speech based on audio volume and respond at the end of user
+ speech.
+ """
src/openai/types/realtime/realtime_truncation.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeTruncation", "RetentionRatioTruncation"]
+
+
+class RetentionRatioTruncation(BaseModel):
+ retention_ratio: float
+ """Fraction of pre-instruction conversation tokens to retain (0.0 - 1.0)."""
+
+ type: Literal["retention_ratio"]
+ """Use retention ratio truncation."""
+
+ post_instructions_token_limit: Optional[int] = None
+ """Optional cap on tokens allowed after the instructions."""
+
+
+RealtimeTruncation: TypeAlias = Union[Literal["auto", "disabled"], RetentionRatioTruncation]
src/openai/types/realtime/realtime_truncation_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = ["RealtimeTruncationParam", "RetentionRatioTruncation"]
+
+
+class RetentionRatioTruncation(TypedDict, total=False):
+ retention_ratio: Required[float]
+ """Fraction of pre-instruction conversation tokens to retain (0.0 - 1.0)."""
+
+ type: Required[Literal["retention_ratio"]]
+ """Use retention ratio truncation."""
+
+ post_instructions_token_limit: Optional[int]
+ """Optional cap on tokens allowed after the instructions."""
+
+
+RealtimeTruncationParam: TypeAlias = Union[Literal["auto", "disabled"], RetentionRatioTruncation]
src/openai/types/realtime/response_audio_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioDeltaEvent"]
+
+
+class ResponseAudioDeltaEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ delta: str
+ """Base64-encoded audio data delta."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.output_audio.delta"]
+ """The event type, must be `response.output_audio.delta`."""
src/openai/types/realtime/response_audio_done_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioDoneEvent"]
+
+
+class ResponseAudioDoneEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.output_audio.done"]
+ """The event type, must be `response.output_audio.done`."""
src/openai/types/realtime/response_audio_transcript_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDeltaEvent"]
+
+
+class ResponseAudioTranscriptDeltaEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ delta: str
+ """The transcript delta."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.output_audio_transcript.delta"]
+ """The event type, must be `response.output_audio_transcript.delta`."""
src/openai/types/realtime/response_audio_transcript_done_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDoneEvent"]
+
+
+class ResponseAudioTranscriptDoneEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ transcript: str
+ """The final transcript of the audio."""
+
+ type: Literal["response.output_audio_transcript.done"]
+ """The event type, must be `response.output_audio_transcript.done`."""
src/openai/types/realtime/response_cancel_event.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCancelEvent"]
+
+
+class ResponseCancelEvent(BaseModel):
+ type: Literal["response.cancel"]
+ """The event type, must be `response.cancel`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
+
+ response_id: Optional[str] = None
+ """
+ A specific response ID to cancel - if not provided, will cancel an in-progress
+ response in the default conversation.
+ """
src/openai/types/realtime/response_cancel_event_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseCancelEventParam"]
+
+
+class ResponseCancelEventParam(TypedDict, total=False):
+ type: Required[Literal["response.cancel"]]
+ """The event type, must be `response.cancel`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
+
+ response_id: str
+ """
+ A specific response ID to cancel - if not provided, will cancel an in-progress
+ response in the default conversation.
+ """
src/openai/types/realtime/response_content_part_added_event.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseContentPartAddedEvent", "Part"]
+
+
+class Part(BaseModel):
+ audio: Optional[str] = None
+ """Base64-encoded audio data (if type is "audio")."""
+
+ text: Optional[str] = None
+ """The text content (if type is "text")."""
+
+ transcript: Optional[str] = None
+ """The transcript of the audio (if type is "audio")."""
+
+ type: Optional[Literal["text", "audio"]] = None
+ """The content type ("text", "audio")."""
+
+
+class ResponseContentPartAddedEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item to which the content part was added."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ part: Part
+ """The content part that was added."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.content_part.added"]
+ """The event type, must be `response.content_part.added`."""
src/openai/types/realtime/response_content_part_done_event.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseContentPartDoneEvent", "Part"]
+
+
+class Part(BaseModel):
+ audio: Optional[str] = None
+ """Base64-encoded audio data (if type is "audio")."""
+
+ text: Optional[str] = None
+ """The text content (if type is "text")."""
+
+ transcript: Optional[str] = None
+ """The transcript of the audio (if type is "audio")."""
+
+ type: Optional[Literal["text", "audio"]] = None
+ """The content type ("text", "audio")."""
+
+
+class ResponseContentPartDoneEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ part: Part
+ """The content part that is done."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.content_part.done"]
+ """The event type, must be `response.content_part.done`."""
src/openai/types/realtime/response_create_event.py
@@ -0,0 +1,134 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from ..shared.metadata import Metadata
+from .conversation_item import ConversationItem
+from ..responses.response_prompt import ResponsePrompt
+from ..responses.tool_choice_mcp import ToolChoiceMcp
+from ..responses.tool_choice_options import ToolChoiceOptions
+from ..responses.tool_choice_function import ToolChoiceFunction
+
+__all__ = ["ResponseCreateEvent", "Response", "ResponseToolChoice", "ResponseTool"]
+
+ResponseToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunction, ToolChoiceMcp]
+
+
+class ResponseTool(BaseModel):
+ description: Optional[str] = None
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: Optional[str] = None
+ """The name of the function."""
+
+ parameters: Optional[object] = None
+ """Parameters of the function in JSON Schema."""
+
+ type: Optional[Literal["function"]] = None
+ """The type of the tool, i.e. `function`."""
+
+
+class Response(BaseModel):
+ conversation: Union[str, Literal["auto", "none"], None] = None
+ """Controls which conversation the response is added to.
+
+ Currently supports `auto` and `none`, with `auto` as the default value. The
+ `auto` value means that the contents of the response will be added to the
+ default conversation. Set this to `none` to create an out-of-band response which
+ will not add items to default conversation.
+ """
+
+ input: Optional[List[ConversationItem]] = None
+ """Input items to include in the prompt for the model.
+
+ Using this field creates a new context for this Response instead of using the
+ default conversation. An empty array `[]` will clear the context for this
+ Response. Note that this can include references to items from the default
+ conversation.
+ """
+
+ instructions: Optional[str] = None
+ """The default system instructions (i.e.
+
+ system message) prepended to model calls. This field allows the client to guide
+ the model on desired responses. The model can be instructed on response content
+ and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+ good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+ into your voice", "laugh frequently"). The instructions are not guaranteed to be
+ followed by the model, but they provide guidance to the model on the desired
+ behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+ """
+
+ max_output_tokens: Union[int, Literal["inf"], None] = None
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+ """
+
+ metadata: Optional[Metadata] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ modalities: Optional[List[Literal["text", "audio"]]] = None
+ """The set of modalities the model can respond with.
+
+ To disable audio, set this to ["text"].
+ """
+
+ output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+ """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ prompt: Optional[ResponsePrompt] = None
+ """Reference to a prompt template and its variables.
+
+ [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+ """
+
+ temperature: Optional[float] = None
+ """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+ tool_choice: Optional[ResponseToolChoice] = None
+ """How the model chooses tools.
+
+ Provide one of the string modes or force a specific function/MCP tool.
+ """
+
+ tools: Optional[List[ResponseTool]] = None
+ """Tools (functions) available to the model."""
+
+ voice: Union[
+ str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"], None
+ ] = None
+ """The voice the model uses to respond.
+
+ Voice cannot be changed during the session once the model has responded with
+ audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+ `coral`, `echo`, `sage`, `shimmer`, and `verse`.
+ """
+
+
+class ResponseCreateEvent(BaseModel):
+ type: Literal["response.create"]
+ """The event type, must be `response.create`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
+
+ response: Optional[Response] = None
+ """Create a new Realtime response with these parameters"""
src/openai/types/realtime/response_create_event_param.py
@@ -0,0 +1,133 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..shared_params.metadata import Metadata
+from .conversation_item_param import ConversationItemParam
+from ..responses.tool_choice_options import ToolChoiceOptions
+from ..responses.response_prompt_param import ResponsePromptParam
+from ..responses.tool_choice_mcp_param import ToolChoiceMcpParam
+from ..responses.tool_choice_function_param import ToolChoiceFunctionParam
+
+__all__ = ["ResponseCreateEventParam", "Response", "ResponseToolChoice", "ResponseTool"]
+
+ResponseToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunctionParam, ToolChoiceMcpParam]
+
+
+class ResponseTool(TypedDict, total=False):
+ description: str
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: str
+ """The name of the function."""
+
+ parameters: object
+ """Parameters of the function in JSON Schema."""
+
+ type: Literal["function"]
+ """The type of the tool, i.e. `function`."""
+
+
+class Response(TypedDict, total=False):
+ conversation: Union[str, Literal["auto", "none"]]
+ """Controls which conversation the response is added to.
+
+ Currently supports `auto` and `none`, with `auto` as the default value. The
+ `auto` value means that the contents of the response will be added to the
+ default conversation. Set this to `none` to create an out-of-band response which
+ will not add items to default conversation.
+ """
+
+ input: Iterable[ConversationItemParam]
+ """Input items to include in the prompt for the model.
+
+ Using this field creates a new context for this Response instead of using the
+ default conversation. An empty array `[]` will clear the context for this
+ Response. Note that this can include references to items from the default
+ conversation.
+ """
+
+ instructions: str
+ """The default system instructions (i.e.
+
+ system message) prepended to model calls. This field allows the client to guide
+ the model on desired responses. The model can be instructed on response content
+ and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+ good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+ into your voice", "laugh frequently"). The instructions are not guaranteed to be
+ followed by the model, but they provide guidance to the model on the desired
+ behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+ """
+
+ max_output_tokens: Union[int, Literal["inf"]]
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+ """
+
+ metadata: Optional[Metadata]
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ modalities: List[Literal["text", "audio"]]
+ """The set of modalities the model can respond with.
+
+ To disable audio, set this to ["text"].
+ """
+
+ output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+ """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ prompt: Optional[ResponsePromptParam]
+ """Reference to a prompt template and its variables.
+
+ [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+ """
+
+ temperature: float
+ """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+ tool_choice: ResponseToolChoice
+ """How the model chooses tools.
+
+ Provide one of the string modes or force a specific function/MCP tool.
+ """
+
+ tools: Iterable[ResponseTool]
+ """Tools (functions) available to the model."""
+
+ voice: Union[str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]]
+ """The voice the model uses to respond.
+
+ Voice cannot be changed during the session once the model has responded with
+ audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+ `coral`, `echo`, `sage`, `shimmer`, and `verse`.
+ """
+
+
+class ResponseCreateEventParam(TypedDict, total=False):
+ type: Required[Literal["response.create"]]
+ """The event type, must be `response.create`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
+
+ response: Response
+ """Create a new Realtime response with these parameters"""
src/openai/types/realtime/response_created_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_response import RealtimeResponse
+
+__all__ = ["ResponseCreatedEvent"]
+
+
+class ResponseCreatedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ response: RealtimeResponse
+ """The response resource."""
+
+ type: Literal["response.created"]
+ """The event type, must be `response.created`."""
src/openai/types/realtime/response_done_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_response import RealtimeResponse
+
+__all__ = ["ResponseDoneEvent"]
+
+
+class ResponseDoneEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ response: RealtimeResponse
+ """The response resource."""
+
+ type: Literal["response.done"]
+ """The event type, must be `response.done`."""
src/openai/types/realtime/response_function_call_arguments_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDeltaEvent"]
+
+
+class ResponseFunctionCallArgumentsDeltaEvent(BaseModel):
+ call_id: str
+ """The ID of the function call."""
+
+ delta: str
+ """The arguments delta as a JSON string."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the function call item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.function_call_arguments.delta"]
+ """The event type, must be `response.function_call_arguments.delta`."""
src/openai/types/realtime/response_function_call_arguments_done_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDoneEvent"]
+
+
+class ResponseFunctionCallArgumentsDoneEvent(BaseModel):
+ arguments: str
+ """The final arguments as a JSON string."""
+
+ call_id: str
+ """The ID of the function call."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the function call item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.function_call_arguments.done"]
+ """The event type, must be `response.function_call_arguments.done`."""
src/openai/types/realtime/response_mcp_call_arguments_delta.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallArgumentsDelta"]
+
+
+class ResponseMcpCallArgumentsDelta(BaseModel):
+ delta: str
+ """The JSON-encoded arguments delta."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the MCP tool call item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.mcp_call_arguments.delta"]
+ """The event type, must be `response.mcp_call_arguments.delta`."""
+
+ obfuscation: Optional[str] = None
+ """If present, indicates the delta text was obfuscated."""
src/openai/types/realtime/response_mcp_call_arguments_done.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallArgumentsDone"]
+
+
+class ResponseMcpCallArgumentsDone(BaseModel):
+ arguments: str
+ """The final JSON-encoded arguments string."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the MCP tool call item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.mcp_call_arguments.done"]
+ """The event type, must be `response.mcp_call_arguments.done`."""
src/openai/types/realtime/response_mcp_call_completed.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallCompleted"]
+
+
+class ResponseMcpCallCompleted(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the MCP tool call item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ type: Literal["response.mcp_call.completed"]
+ """The event type, must be `response.mcp_call.completed`."""
src/openai/types/realtime/response_mcp_call_failed.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallFailed"]
+
+
+class ResponseMcpCallFailed(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the MCP tool call item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ type: Literal["response.mcp_call.failed"]
+ """The event type, must be `response.mcp_call.failed`."""
src/openai/types/realtime/response_mcp_call_in_progress.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallInProgress"]
+
+
+class ResponseMcpCallInProgress(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the MCP tool call item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ type: Literal["response.mcp_call.in_progress"]
+ """The event type, must be `response.mcp_call.in_progress`."""
src/openai/types/realtime/response_output_item_added_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseOutputItemAddedEvent"]
+
+
+class ResponseOutputItemAddedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item: ConversationItem
+ """A single item within a Realtime conversation."""
+
+ output_index: int
+ """The index of the output item in the Response."""
+
+ response_id: str
+ """The ID of the Response to which the item belongs."""
+
+ type: Literal["response.output_item.added"]
+ """The event type, must be `response.output_item.added`."""
src/openai/types/realtime/response_output_item_done_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseOutputItemDoneEvent"]
+
+
+class ResponseOutputItemDoneEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item: ConversationItem
+ """A single item within a Realtime conversation."""
+
+ output_index: int
+ """The index of the output item in the Response."""
+
+ response_id: str
+ """The ID of the Response to which the item belongs."""
+
+ type: Literal["response.output_item.done"]
+ """The event type, must be `response.output_item.done`."""
src/openai/types/realtime/response_text_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseTextDeltaEvent"]
+
+
+class ResponseTextDeltaEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ delta: str
+ """The text delta."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.output_text.delta"]
+ """The event type, must be `response.output_text.delta`."""
src/openai/types/realtime/response_text_done_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseTextDoneEvent"]
+
+
+class ResponseTextDoneEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ text: str
+ """The final text content."""
+
+ type: Literal["response.output_text.done"]
+ """The event type, must be `response.output_text.done`."""
src/openai/types/realtime/session_created_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_session import RealtimeSession
+
+__all__ = ["SessionCreatedEvent"]
+
+
+class SessionCreatedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ session: RealtimeSession
+ """Realtime session object."""
+
+ type: Literal["session.created"]
+ """The event type, must be `session.created`."""
src/openai/types/realtime/session_update_event.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_session_create_request import RealtimeSessionCreateRequest
+
+__all__ = ["SessionUpdateEvent"]
+
+
+class SessionUpdateEvent(BaseModel):
+ session: RealtimeSessionCreateRequest
+ """Realtime session object configuration."""
+
+ type: Literal["session.update"]
+ """The event type, must be `session.update`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/session_update_event_param.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .realtime_session_create_request_param import RealtimeSessionCreateRequestParam
+
+__all__ = ["SessionUpdateEventParam"]
+
+
+class SessionUpdateEventParam(TypedDict, total=False):
+ session: Required[RealtimeSessionCreateRequestParam]
+ """Realtime session object configuration."""
+
+ type: Required[Literal["session.update"]]
+ """The event type, must be `session.update`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/session_updated_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_session import RealtimeSession
+
+__all__ = ["SessionUpdatedEvent"]
+
+
+class SessionUpdatedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ session: RealtimeSession
+ """Realtime session object."""
+
+ type: Literal["session.updated"]
+ """The event type, must be `session.updated`."""
src/openai/types/realtime/transcription_session_created.py
@@ -0,0 +1,105 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = [
+ "TranscriptionSessionCreated",
+ "Session",
+ "SessionAudio",
+ "SessionAudioInput",
+ "SessionAudioInputNoiseReduction",
+ "SessionAudioInputTranscription",
+ "SessionAudioInputTurnDetection",
+]
+
+
+class SessionAudioInputNoiseReduction(BaseModel):
+ type: Optional[Literal["near_field", "far_field"]] = None
+
+
+class SessionAudioInputTranscription(BaseModel):
+ language: Optional[str] = None
+ """The language of the input audio.
+
+ Supplying the input language in
+ [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+ format will improve accuracy and latency.
+ """
+
+ model: Optional[Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"]] = None
+ """The model to use for transcription.
+
+ Can be `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, or `whisper-1`.
+ """
+
+ prompt: Optional[str] = None
+ """An optional text to guide the model's style or continue a previous audio
+ segment.
+
+ The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+ should match the audio language.
+ """
+
+
+class SessionAudioInputTurnDetection(BaseModel):
+ prefix_padding_ms: Optional[int] = None
+
+ silence_duration_ms: Optional[int] = None
+
+ threshold: Optional[float] = None
+
+ type: Optional[str] = None
+ """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class SessionAudioInput(BaseModel):
+ format: Optional[str] = None
+ """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ noise_reduction: Optional[SessionAudioInputNoiseReduction] = None
+ """Configuration for input audio noise reduction."""
+
+ transcription: Optional[SessionAudioInputTranscription] = None
+ """Configuration of the transcription model."""
+
+ turn_detection: Optional[SessionAudioInputTurnDetection] = None
+ """Configuration for turn detection."""
+
+
+class SessionAudio(BaseModel):
+ input: Optional[SessionAudioInput] = None
+
+
+class Session(BaseModel):
+ id: Optional[str] = None
+ """Unique identifier for the session that looks like `sess_1234567890abcdef`."""
+
+ audio: Optional[SessionAudio] = None
+ """Configuration for input audio for the session."""
+
+ expires_at: Optional[int] = None
+ """Expiration timestamp for the session, in seconds since epoch."""
+
+ include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
+ """Additional fields to include in server outputs.
+
+ - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+ transcription.
+ """
+
+ object: Optional[str] = None
+ """The object type. Always `realtime.transcription_session`."""
+
+
+class TranscriptionSessionCreated(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ session: Session
+ """A Realtime transcription session configuration object."""
+
+ type: Literal["transcription_session.created"]
+ """The event type, must be `transcription_session.created`."""
src/openai/types/realtime/transcription_session_update.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_transcription_session_create_request import RealtimeTranscriptionSessionCreateRequest
+
+__all__ = ["TranscriptionSessionUpdate"]
+
+
+class TranscriptionSessionUpdate(BaseModel):
+ session: RealtimeTranscriptionSessionCreateRequest
+ """Realtime transcription session object configuration."""
+
+ type: Literal["transcription_session.update"]
+ """The event type, must be `transcription_session.update`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/transcription_session_update_param.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .realtime_transcription_session_create_request_param import RealtimeTranscriptionSessionCreateRequestParam
+
+__all__ = ["TranscriptionSessionUpdateParam"]
+
+
+class TranscriptionSessionUpdateParam(TypedDict, total=False):
+ session: Required[RealtimeTranscriptionSessionCreateRequestParam]
+ """Realtime transcription session object configuration."""
+
+ type: Required[Literal["transcription_session.update"]]
+ """The event type, must be `transcription_session.update`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/transcription_session_updated_event.py
@@ -0,0 +1,105 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = [
+ "TranscriptionSessionUpdatedEvent",
+ "Session",
+ "SessionAudio",
+ "SessionAudioInput",
+ "SessionAudioInputNoiseReduction",
+ "SessionAudioInputTranscription",
+ "SessionAudioInputTurnDetection",
+]
+
+
+class SessionAudioInputNoiseReduction(BaseModel):
+ type: Optional[Literal["near_field", "far_field"]] = None
+
+
+class SessionAudioInputTranscription(BaseModel):
+ language: Optional[str] = None
+ """The language of the input audio.
+
+ Supplying the input language in
+ [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+ format will improve accuracy and latency.
+ """
+
+ model: Optional[Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"]] = None
+ """The model to use for transcription.
+
+ Can be `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, or `whisper-1`.
+ """
+
+ prompt: Optional[str] = None
+ """An optional text to guide the model's style or continue a previous audio
+ segment.
+
+ The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+ should match the audio language.
+ """
+
+
+class SessionAudioInputTurnDetection(BaseModel):
+ prefix_padding_ms: Optional[int] = None
+
+ silence_duration_ms: Optional[int] = None
+
+ threshold: Optional[float] = None
+
+ type: Optional[str] = None
+ """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class SessionAudioInput(BaseModel):
+ format: Optional[str] = None
+ """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ noise_reduction: Optional[SessionAudioInputNoiseReduction] = None
+ """Configuration for input audio noise reduction."""
+
+ transcription: Optional[SessionAudioInputTranscription] = None
+ """Configuration of the transcription model."""
+
+ turn_detection: Optional[SessionAudioInputTurnDetection] = None
+ """Configuration for turn detection."""
+
+
+class SessionAudio(BaseModel):
+ input: Optional[SessionAudioInput] = None
+
+
+class Session(BaseModel):
+ id: Optional[str] = None
+ """Unique identifier for the session that looks like `sess_1234567890abcdef`."""
+
+ audio: Optional[SessionAudio] = None
+ """Configuration for input audio for the session."""
+
+ expires_at: Optional[int] = None
+ """Expiration timestamp for the session, in seconds since epoch."""
+
+ include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
+ """Additional fields to include in server outputs.
+
+ - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+ transcription.
+ """
+
+ object: Optional[str] = None
+ """The object type. Always `realtime.transcription_session`."""
+
+
+class TranscriptionSessionUpdatedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ session: Session
+ """A Realtime transcription session configuration object."""
+
+ type: Literal["transcription_session.updated"]
+ """The event type, must be `transcription_session.updated`."""
src/openai/types/responses/__init__.py
@@ -59,6 +59,7 @@ from .response_output_message import ResponseOutputMessage as ResponseOutputMess
from .response_output_refusal import ResponseOutputRefusal as ResponseOutputRefusal
from .response_reasoning_item import ResponseReasoningItem as ResponseReasoningItem
from .tool_choice_types_param import ToolChoiceTypesParam as ToolChoiceTypesParam
+from .web_search_preview_tool import WebSearchPreviewTool as WebSearchPreviewTool
from .easy_input_message_param import EasyInputMessageParam as EasyInputMessageParam
from .response_completed_event import ResponseCompletedEvent as ResponseCompletedEvent
from .response_retrieve_params import ResponseRetrieveParams as ResponseRetrieveParams
@@ -90,6 +91,7 @@ from .response_refusal_delta_event import ResponseRefusalDeltaEvent as ResponseR
from .response_output_message_param import ResponseOutputMessageParam as ResponseOutputMessageParam
from .response_output_refusal_param import ResponseOutputRefusalParam as ResponseOutputRefusalParam
from .response_reasoning_item_param import ResponseReasoningItemParam as ResponseReasoningItemParam
+from .web_search_preview_tool_param import WebSearchPreviewToolParam as WebSearchPreviewToolParam
from .response_file_search_tool_call import ResponseFileSearchToolCall as ResponseFileSearchToolCall
from .response_mcp_call_failed_event import ResponseMcpCallFailedEvent as ResponseMcpCallFailedEvent
from .response_custom_tool_call_param import ResponseCustomToolCallParam as ResponseCustomToolCallParam
src/openai/types/responses/response.py
@@ -116,7 +116,7 @@ class Response(BaseModel):
You can specify which tool to use by setting the `tool_choice` parameter.
- The two categories of tools you can provide the model are:
+ We support the following categories of tools:
- **Built-in tools**: Tools that are provided by OpenAI that extend the model's
capabilities, like
@@ -124,6 +124,9 @@ class Response(BaseModel):
[file search](https://platform.openai.com/docs/guides/tools-file-search).
Learn more about
[built-in tools](https://platform.openai.com/docs/guides/tools).
+ - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+ predefined connectors such as Google Drive and Notion. Learn more about
+ [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
- **Function calls (custom tools)**: Functions that are defined by you, enabling
the model to call your own code with strongly typed arguments and outputs.
Learn more about
src/openai/types/responses/response_create_params.py
@@ -216,7 +216,7 @@ class ResponseCreateParamsBase(TypedDict, total=False):
You can specify which tool to use by setting the `tool_choice` parameter.
- The two categories of tools you can provide the model are:
+ We support the following categories of tools:
- **Built-in tools**: Tools that are provided by OpenAI that extend the model's
capabilities, like
@@ -224,6 +224,9 @@ class ResponseCreateParamsBase(TypedDict, total=False):
[file search](https://platform.openai.com/docs/guides/tools-file-search).
Learn more about
[built-in tools](https://platform.openai.com/docs/guides/tools).
+ - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+ predefined connectors such as Google Drive and Notion. Learn more about
+ [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
- **Function calls (custom tools)**: Functions that are defined by you, enabling
the model to call your own code with strongly typed arguments and outputs.
Learn more about
src/openai/types/responses/tool.py
@@ -3,19 +3,18 @@
from typing import Dict, List, Union, Optional
from typing_extensions import Literal, Annotated, TypeAlias
-from . import web_search_tool
from ..._utils import PropertyInfo
from ..._models import BaseModel
from .custom_tool import CustomTool
from .computer_tool import ComputerTool
from .function_tool import FunctionTool
+from .web_search_tool import WebSearchTool
from .file_search_tool import FileSearchTool
+from .web_search_preview_tool import WebSearchPreviewTool
__all__ = [
"Tool",
"WebSearchTool",
- "WebSearchToolFilters",
- "WebSearchToolUserLocation",
"Mcp",
"McpAllowedTools",
"McpAllowedToolsMcpToolFilter",
@@ -32,61 +31,6 @@ __all__ = [
]
-class WebSearchToolFilters(BaseModel):
- allowed_domains: Optional[List[str]] = None
- """Allowed domains for the search.
-
- If not provided, all domains are allowed. Subdomains of the provided domains are
- allowed as well.
-
- Example: `["pubmed.ncbi.nlm.nih.gov"]`
- """
-
-
-class WebSearchToolUserLocation(BaseModel):
- city: Optional[str] = None
- """Free text input for the city of the user, e.g. `San Francisco`."""
-
- country: Optional[str] = None
- """
- The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
- the user, e.g. `US`.
- """
-
- region: Optional[str] = None
- """Free text input for the region of the user, e.g. `California`."""
-
- timezone: Optional[str] = None
- """
- The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
- user, e.g. `America/Los_Angeles`.
- """
-
- type: Optional[Literal["approximate"]] = None
- """The type of location approximation. Always `approximate`."""
-
-
-class WebSearchTool(BaseModel):
- type: Literal["web_search", "web_search_2025_08_26"]
- """The type of the web search tool.
-
- One of `web_search` or `web_search_2025_08_26`.
- """
-
- filters: Optional[WebSearchToolFilters] = None
- """Filters for the search."""
-
- search_context_size: Optional[Literal["low", "medium", "high"]] = None
- """High level guidance for the amount of context window space to use for the
- search.
-
- One of `low`, `medium`, or `high`. `medium` is the default.
- """
-
- user_location: Optional[WebSearchToolUserLocation] = None
- """The approximate location of the user."""
-
-
class McpAllowedToolsMcpToolFilter(BaseModel):
read_only: Optional[bool] = None
"""Indicates whether or not a tool modifies data or is read-only.
@@ -310,7 +254,7 @@ Tool: TypeAlias = Annotated[
ImageGeneration,
LocalShell,
CustomTool,
- web_search_tool.WebSearchTool,
+ WebSearchPreviewTool,
],
PropertyInfo(discriminator="type"),
]
src/openai/types/responses/tool_param.py
@@ -11,12 +11,10 @@ from .computer_tool_param import ComputerToolParam
from .function_tool_param import FunctionToolParam
from .web_search_tool_param import WebSearchToolParam
from .file_search_tool_param import FileSearchToolParam
+from .web_search_preview_tool_param import WebSearchPreviewToolParam
__all__ = [
"ToolParam",
- "WebSearchTool",
- "WebSearchToolFilters",
- "WebSearchToolUserLocation",
"Mcp",
"McpAllowedTools",
"McpAllowedToolsMcpToolFilter",
@@ -33,61 +31,6 @@ __all__ = [
]
-class WebSearchToolFilters(TypedDict, total=False):
- allowed_domains: Optional[List[str]]
- """Allowed domains for the search.
-
- If not provided, all domains are allowed. Subdomains of the provided domains are
- allowed as well.
-
- Example: `["pubmed.ncbi.nlm.nih.gov"]`
- """
-
-
-class WebSearchToolUserLocation(TypedDict, total=False):
- city: Optional[str]
- """Free text input for the city of the user, e.g. `San Francisco`."""
-
- country: Optional[str]
- """
- The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
- the user, e.g. `US`.
- """
-
- region: Optional[str]
- """Free text input for the region of the user, e.g. `California`."""
-
- timezone: Optional[str]
- """
- The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
- user, e.g. `America/Los_Angeles`.
- """
-
- type: Literal["approximate"]
- """The type of location approximation. Always `approximate`."""
-
-
-class WebSearchTool(TypedDict, total=False):
- type: Required[Literal["web_search", "web_search_2025_08_26"]]
- """The type of the web search tool.
-
- One of `web_search` or `web_search_2025_08_26`.
- """
-
- filters: Optional[WebSearchToolFilters]
- """Filters for the search."""
-
- search_context_size: Literal["low", "medium", "high"]
- """High level guidance for the amount of context window space to use for the
- search.
-
- One of `low`, `medium`, or `high`. `medium` is the default.
- """
-
- user_location: Optional[WebSearchToolUserLocation]
- """The approximate location of the user."""
-
-
class McpAllowedToolsMcpToolFilter(TypedDict, total=False):
read_only: bool
"""Indicates whether or not a tool modifies data or is read-only.
@@ -302,13 +245,13 @@ ToolParam: TypeAlias = Union[
FunctionToolParam,
FileSearchToolParam,
ComputerToolParam,
- WebSearchTool,
+ WebSearchToolParam,
Mcp,
CodeInterpreter,
ImageGeneration,
LocalShell,
CustomToolParam,
- WebSearchToolParam,
+ WebSearchPreviewToolParam,
]
src/openai/types/responses/web_search_preview_tool.py
@@ -0,0 +1,49 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["WebSearchPreviewTool", "UserLocation"]
+
+
+class UserLocation(BaseModel):
+ type: Literal["approximate"]
+ """The type of location approximation. Always `approximate`."""
+
+ city: Optional[str] = None
+ """Free text input for the city of the user, e.g. `San Francisco`."""
+
+ country: Optional[str] = None
+ """
+ The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+ the user, e.g. `US`.
+ """
+
+ region: Optional[str] = None
+ """Free text input for the region of the user, e.g. `California`."""
+
+ timezone: Optional[str] = None
+ """
+ The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+ user, e.g. `America/Los_Angeles`.
+ """
+
+
+class WebSearchPreviewTool(BaseModel):
+ type: Literal["web_search_preview", "web_search_preview_2025_03_11"]
+ """The type of the web search tool.
+
+ One of `web_search_preview` or `web_search_preview_2025_03_11`.
+ """
+
+ search_context_size: Optional[Literal["low", "medium", "high"]] = None
+ """High level guidance for the amount of context window space to use for the
+ search.
+
+ One of `low`, `medium`, or `high`. `medium` is the default.
+ """
+
+ user_location: Optional[UserLocation] = None
+ """The user's location."""
src/openai/types/responses/web_search_preview_tool_param.py
@@ -0,0 +1,49 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["WebSearchPreviewToolParam", "UserLocation"]
+
+
+class UserLocation(TypedDict, total=False):
+ type: Required[Literal["approximate"]]
+ """The type of location approximation. Always `approximate`."""
+
+ city: Optional[str]
+ """Free text input for the city of the user, e.g. `San Francisco`."""
+
+ country: Optional[str]
+ """
+ The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+ the user, e.g. `US`.
+ """
+
+ region: Optional[str]
+ """Free text input for the region of the user, e.g. `California`."""
+
+ timezone: Optional[str]
+ """
+ The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+ user, e.g. `America/Los_Angeles`.
+ """
+
+
+class WebSearchPreviewToolParam(TypedDict, total=False):
+ type: Required[Literal["web_search_preview", "web_search_preview_2025_03_11"]]
+ """The type of the web search tool.
+
+ One of `web_search_preview` or `web_search_preview_2025_03_11`.
+ """
+
+ search_context_size: Literal["low", "medium", "high"]
+ """High level guidance for the amount of context window space to use for the
+ search.
+
+ One of `low`, `medium`, or `high`. `medium` is the default.
+ """
+
+ user_location: Optional[UserLocation]
+ """The user's location."""
src/openai/types/responses/web_search_tool.py
@@ -1,17 +1,25 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import Optional
+from typing import List, Optional
from typing_extensions import Literal
from ..._models import BaseModel
-__all__ = ["WebSearchTool", "UserLocation"]
+__all__ = ["WebSearchTool", "Filters", "UserLocation"]
-class UserLocation(BaseModel):
- type: Literal["approximate"]
- """The type of location approximation. Always `approximate`."""
+class Filters(BaseModel):
+ allowed_domains: Optional[List[str]] = None
+ """Allowed domains for the search.
+
+ If not provided, all domains are allowed. Subdomains of the provided domains are
+ allowed as well.
+
+ Example: `["pubmed.ncbi.nlm.nih.gov"]`
+ """
+
+class UserLocation(BaseModel):
city: Optional[str] = None
"""Free text input for the city of the user, e.g. `San Francisco`."""
@@ -30,14 +38,20 @@ class UserLocation(BaseModel):
user, e.g. `America/Los_Angeles`.
"""
+ type: Optional[Literal["approximate"]] = None
+ """The type of location approximation. Always `approximate`."""
+
class WebSearchTool(BaseModel):
- type: Literal["web_search_preview", "web_search_preview_2025_03_11"]
+ type: Literal["web_search", "web_search_2025_08_26"]
"""The type of the web search tool.
- One of `web_search_preview` or `web_search_preview_2025_03_11`.
+ One of `web_search` or `web_search_2025_08_26`.
"""
+ filters: Optional[Filters] = None
+ """Filters for the search."""
+
search_context_size: Optional[Literal["low", "medium", "high"]] = None
"""High level guidance for the amount of context window space to use for the
search.
@@ -46,4 +60,4 @@ class WebSearchTool(BaseModel):
"""
user_location: Optional[UserLocation] = None
- """The user's location."""
+ """The approximate location of the user."""
src/openai/types/responses/web_search_tool_param.py
@@ -2,16 +2,24 @@
from __future__ import annotations
-from typing import Optional
+from typing import List, Optional
from typing_extensions import Literal, Required, TypedDict
-__all__ = ["WebSearchToolParam", "UserLocation"]
+__all__ = ["WebSearchToolParam", "Filters", "UserLocation"]
-class UserLocation(TypedDict, total=False):
- type: Required[Literal["approximate"]]
- """The type of location approximation. Always `approximate`."""
+class Filters(TypedDict, total=False):
+ allowed_domains: Optional[List[str]]
+ """Allowed domains for the search.
+
+ If not provided, all domains are allowed. Subdomains of the provided domains are
+ allowed as well.
+
+ Example: `["pubmed.ncbi.nlm.nih.gov"]`
+ """
+
+class UserLocation(TypedDict, total=False):
city: Optional[str]
"""Free text input for the city of the user, e.g. `San Francisco`."""
@@ -30,14 +38,20 @@ class UserLocation(TypedDict, total=False):
user, e.g. `America/Los_Angeles`.
"""
+ type: Literal["approximate"]
+ """The type of location approximation. Always `approximate`."""
+
class WebSearchToolParam(TypedDict, total=False):
- type: Required[Literal["web_search_preview", "web_search_preview_2025_03_11"]]
+ type: Required[Literal["web_search", "web_search_2025_08_26"]]
"""The type of the web search tool.
- One of `web_search_preview` or `web_search_preview_2025_03_11`.
+ One of `web_search` or `web_search_2025_08_26`.
"""
+ filters: Optional[Filters]
+ """Filters for the search."""
+
search_context_size: Literal["low", "medium", "high"]
"""High level guidance for the amount of context window space to use for the
search.
@@ -46,4 +60,4 @@ class WebSearchToolParam(TypedDict, total=False):
"""
user_location: Optional[UserLocation]
- """The user's location."""
+ """The approximate location of the user."""
src/openai/types/webhooks/__init__.py
@@ -15,6 +15,7 @@ from .response_cancelled_webhook_event import ResponseCancelledWebhookEvent as R
from .response_completed_webhook_event import ResponseCompletedWebhookEvent as ResponseCompletedWebhookEvent
from .response_incomplete_webhook_event import ResponseIncompleteWebhookEvent as ResponseIncompleteWebhookEvent
from .fine_tuning_job_failed_webhook_event import FineTuningJobFailedWebhookEvent as FineTuningJobFailedWebhookEvent
+from .realtime_call_incoming_webhook_event import RealtimeCallIncomingWebhookEvent as RealtimeCallIncomingWebhookEvent
from .fine_tuning_job_cancelled_webhook_event import (
FineTuningJobCancelledWebhookEvent as FineTuningJobCancelledWebhookEvent,
)
src/openai/types/webhooks/realtime_call_incoming_webhook_event.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeCallIncomingWebhookEvent", "Data", "DataSipHeader"]
+
+
+class DataSipHeader(BaseModel):
+ name: str
+ """Name of the SIP Header."""
+
+ value: str
+ """Value of the SIP Header."""
+
+
+class Data(BaseModel):
+ call_id: str
+ """The unique ID of this call."""
+
+ sip_headers: List[DataSipHeader]
+ """Headers from the SIP Invite."""
+
+
+class RealtimeCallIncomingWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the model response was completed."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["realtime.call.incoming"]
+ """The type of the event. Always `realtime.call.incoming`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
src/openai/types/webhooks/unwrap_webhook_event.py
@@ -16,6 +16,7 @@ from .response_cancelled_webhook_event import ResponseCancelledWebhookEvent
from .response_completed_webhook_event import ResponseCompletedWebhookEvent
from .response_incomplete_webhook_event import ResponseIncompleteWebhookEvent
from .fine_tuning_job_failed_webhook_event import FineTuningJobFailedWebhookEvent
+from .realtime_call_incoming_webhook_event import RealtimeCallIncomingWebhookEvent
from .fine_tuning_job_cancelled_webhook_event import FineTuningJobCancelledWebhookEvent
from .fine_tuning_job_succeeded_webhook_event import FineTuningJobSucceededWebhookEvent
@@ -33,6 +34,7 @@ UnwrapWebhookEvent: TypeAlias = Annotated[
FineTuningJobCancelledWebhookEvent,
FineTuningJobFailedWebhookEvent,
FineTuningJobSucceededWebhookEvent,
+ RealtimeCallIncomingWebhookEvent,
ResponseCancelledWebhookEvent,
ResponseCompletedWebhookEvent,
ResponseFailedWebhookEvent,
src/openai/__init__.py
@@ -379,6 +379,7 @@ from ._module_client import (
models as models,
batches as batches,
uploads as uploads,
+ realtime as realtime,
webhooks as webhooks,
responses as responses,
containers as containers,
src/openai/_client.py
@@ -45,6 +45,7 @@ if TYPE_CHECKING:
models,
batches,
uploads,
+ realtime,
responses,
containers,
embeddings,
@@ -67,6 +68,7 @@ if TYPE_CHECKING:
from .resources.evals.evals import Evals, AsyncEvals
from .resources.moderations import Moderations, AsyncModerations
from .resources.uploads.uploads import Uploads, AsyncUploads
+ from .resources.realtime.realtime import Realtime, AsyncRealtime
from .resources.responses.responses import Responses, AsyncResponses
from .resources.containers.containers import Containers, AsyncContainers
from .resources.fine_tuning.fine_tuning import FineTuning, AsyncFineTuning
@@ -256,6 +258,12 @@ class OpenAI(SyncAPIClient):
return Responses(self)
+ @cached_property
+ def realtime(self) -> Realtime:
+ from .resources.realtime import Realtime
+
+ return Realtime(self)
+
@cached_property
def conversations(self) -> Conversations:
from .resources.conversations import Conversations
@@ -581,6 +589,12 @@ class AsyncOpenAI(AsyncAPIClient):
return AsyncResponses(self)
+ @cached_property
+ def realtime(self) -> AsyncRealtime:
+ from .resources.realtime import AsyncRealtime
+
+ return AsyncRealtime(self)
+
@cached_property
def conversations(self) -> AsyncConversations:
from .resources.conversations import AsyncConversations
@@ -816,6 +830,12 @@ class OpenAIWithRawResponse:
return ResponsesWithRawResponse(self._client.responses)
+ @cached_property
+ def realtime(self) -> realtime.RealtimeWithRawResponse:
+ from .resources.realtime import RealtimeWithRawResponse
+
+ return RealtimeWithRawResponse(self._client.realtime)
+
@cached_property
def conversations(self) -> conversations.ConversationsWithRawResponse:
from .resources.conversations import ConversationsWithRawResponse
@@ -925,6 +945,12 @@ class AsyncOpenAIWithRawResponse:
return AsyncResponsesWithRawResponse(self._client.responses)
+ @cached_property
+ def realtime(self) -> realtime.AsyncRealtimeWithRawResponse:
+ from .resources.realtime import AsyncRealtimeWithRawResponse
+
+ return AsyncRealtimeWithRawResponse(self._client.realtime)
+
@cached_property
def conversations(self) -> conversations.AsyncConversationsWithRawResponse:
from .resources.conversations import AsyncConversationsWithRawResponse
@@ -1034,6 +1060,12 @@ class OpenAIWithStreamedResponse:
return ResponsesWithStreamingResponse(self._client.responses)
+ @cached_property
+ def realtime(self) -> realtime.RealtimeWithStreamingResponse:
+ from .resources.realtime import RealtimeWithStreamingResponse
+
+ return RealtimeWithStreamingResponse(self._client.realtime)
+
@cached_property
def conversations(self) -> conversations.ConversationsWithStreamingResponse:
from .resources.conversations import ConversationsWithStreamingResponse
@@ -1143,6 +1175,12 @@ class AsyncOpenAIWithStreamedResponse:
return AsyncResponsesWithStreamingResponse(self._client.responses)
+ @cached_property
+ def realtime(self) -> realtime.AsyncRealtimeWithStreamingResponse:
+ from .resources.realtime import AsyncRealtimeWithStreamingResponse
+
+ return AsyncRealtimeWithStreamingResponse(self._client.realtime)
+
@cached_property
def conversations(self) -> conversations.AsyncConversationsWithStreamingResponse:
from .resources.conversations import AsyncConversationsWithStreamingResponse
src/openai/_module_client.py
@@ -19,6 +19,7 @@ if TYPE_CHECKING:
from .resources.evals.evals import Evals
from .resources.moderations import Moderations
from .resources.uploads.uploads import Uploads
+ from .resources.realtime.realtime import Realtime
from .resources.responses.responses import Responses
from .resources.containers.containers import Containers
from .resources.fine_tuning.fine_tuning import FineTuning
@@ -89,6 +90,12 @@ class WebhooksProxy(LazyProxy["Webhooks"]):
return _load_client().webhooks
+class RealtimeProxy(LazyProxy["Realtime"]):
+ @override
+ def __load__(self) -> Realtime:
+ return _load_client().realtime
+
+
class ResponsesProxy(LazyProxy["Responses"]):
@override
def __load__(self) -> Responses:
@@ -147,6 +154,7 @@ models: Models = ModelsProxy().__as_proxied__()
batches: Batches = BatchesProxy().__as_proxied__()
uploads: Uploads = UploadsProxy().__as_proxied__()
webhooks: Webhooks = WebhooksProxy().__as_proxied__()
+realtime: Realtime = RealtimeProxy().__as_proxied__()
responses: Responses = ResponsesProxy().__as_proxied__()
embeddings: Embeddings = EmbeddingsProxy().__as_proxied__()
containers: Containers = ContainersProxy().__as_proxied__()
tests/api_resources/beta/realtime/test_sessions.py
@@ -1,166 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from openai import OpenAI, AsyncOpenAI
-from tests.utils import assert_matches_type
-from openai.types.beta.realtime import SessionCreateResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestSessions:
- parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
- @parametrize
- def test_method_create(self, client: OpenAI) -> None:
- session = client.beta.realtime.sessions.create()
- assert_matches_type(SessionCreateResponse, session, path=["response"])
-
- @parametrize
- def test_method_create_with_all_params(self, client: OpenAI) -> None:
- session = client.beta.realtime.sessions.create(
- client_secret={
- "expires_after": {
- "anchor": "created_at",
- "seconds": 0,
- }
- },
- input_audio_format="pcm16",
- input_audio_noise_reduction={"type": "near_field"},
- input_audio_transcription={
- "language": "language",
- "model": "model",
- "prompt": "prompt",
- },
- instructions="instructions",
- max_response_output_tokens=0,
- modalities=["text"],
- model="gpt-4o-realtime-preview",
- output_audio_format="pcm16",
- speed=0.25,
- temperature=0,
- tool_choice="tool_choice",
- tools=[
- {
- "description": "description",
- "name": "name",
- "parameters": {},
- "type": "function",
- }
- ],
- tracing="auto",
- turn_detection={
- "create_response": True,
- "eagerness": "low",
- "interrupt_response": True,
- "prefix_padding_ms": 0,
- "silence_duration_ms": 0,
- "threshold": 0,
- "type": "server_vad",
- },
- voice="ash",
- )
- assert_matches_type(SessionCreateResponse, session, path=["response"])
-
- @parametrize
- def test_raw_response_create(self, client: OpenAI) -> None:
- response = client.beta.realtime.sessions.with_raw_response.create()
-
- assert response.is_closed is True
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
- session = response.parse()
- assert_matches_type(SessionCreateResponse, session, path=["response"])
-
- @parametrize
- def test_streaming_response_create(self, client: OpenAI) -> None:
- with client.beta.realtime.sessions.with_streaming_response.create() as response:
- assert not response.is_closed
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
- session = response.parse()
- assert_matches_type(SessionCreateResponse, session, path=["response"])
-
- assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncSessions:
- parametrize = pytest.mark.parametrize(
- "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
- )
-
- @parametrize
- async def test_method_create(self, async_client: AsyncOpenAI) -> None:
- session = await async_client.beta.realtime.sessions.create()
- assert_matches_type(SessionCreateResponse, session, path=["response"])
-
- @parametrize
- async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
- session = await async_client.beta.realtime.sessions.create(
- client_secret={
- "expires_after": {
- "anchor": "created_at",
- "seconds": 0,
- }
- },
- input_audio_format="pcm16",
- input_audio_noise_reduction={"type": "near_field"},
- input_audio_transcription={
- "language": "language",
- "model": "model",
- "prompt": "prompt",
- },
- instructions="instructions",
- max_response_output_tokens=0,
- modalities=["text"],
- model="gpt-4o-realtime-preview",
- output_audio_format="pcm16",
- speed=0.25,
- temperature=0,
- tool_choice="tool_choice",
- tools=[
- {
- "description": "description",
- "name": "name",
- "parameters": {},
- "type": "function",
- }
- ],
- tracing="auto",
- turn_detection={
- "create_response": True,
- "eagerness": "low",
- "interrupt_response": True,
- "prefix_padding_ms": 0,
- "silence_duration_ms": 0,
- "threshold": 0,
- "type": "server_vad",
- },
- voice="ash",
- )
- assert_matches_type(SessionCreateResponse, session, path=["response"])
-
- @parametrize
- async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
- response = await async_client.beta.realtime.sessions.with_raw_response.create()
-
- assert response.is_closed is True
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
- session = response.parse()
- assert_matches_type(SessionCreateResponse, session, path=["response"])
-
- @parametrize
- async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
- async with async_client.beta.realtime.sessions.with_streaming_response.create() as response:
- assert not response.is_closed
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
- session = await response.parse()
- assert_matches_type(SessionCreateResponse, session, path=["response"])
-
- assert cast(Any, response.is_closed) is True
tests/api_resources/beta/realtime/test_transcription_sessions.py
@@ -1,134 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from openai import OpenAI, AsyncOpenAI
-from tests.utils import assert_matches_type
-from openai.types.beta.realtime import TranscriptionSession
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestTranscriptionSessions:
- parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
- @parametrize
- def test_method_create(self, client: OpenAI) -> None:
- transcription_session = client.beta.realtime.transcription_sessions.create()
- assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
-
- @parametrize
- def test_method_create_with_all_params(self, client: OpenAI) -> None:
- transcription_session = client.beta.realtime.transcription_sessions.create(
- client_secret={
- "expires_at": {
- "anchor": "created_at",
- "seconds": 0,
- }
- },
- include=["string"],
- input_audio_format="pcm16",
- input_audio_noise_reduction={"type": "near_field"},
- input_audio_transcription={
- "language": "language",
- "model": "gpt-4o-transcribe",
- "prompt": "prompt",
- },
- modalities=["text"],
- turn_detection={
- "create_response": True,
- "eagerness": "low",
- "interrupt_response": True,
- "prefix_padding_ms": 0,
- "silence_duration_ms": 0,
- "threshold": 0,
- "type": "server_vad",
- },
- )
- assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
-
- @parametrize
- def test_raw_response_create(self, client: OpenAI) -> None:
- response = client.beta.realtime.transcription_sessions.with_raw_response.create()
-
- assert response.is_closed is True
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
- transcription_session = response.parse()
- assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
-
- @parametrize
- def test_streaming_response_create(self, client: OpenAI) -> None:
- with client.beta.realtime.transcription_sessions.with_streaming_response.create() as response:
- assert not response.is_closed
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
- transcription_session = response.parse()
- assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
-
- assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncTranscriptionSessions:
- parametrize = pytest.mark.parametrize(
- "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
- )
-
- @parametrize
- async def test_method_create(self, async_client: AsyncOpenAI) -> None:
- transcription_session = await async_client.beta.realtime.transcription_sessions.create()
- assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
-
- @parametrize
- async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
- transcription_session = await async_client.beta.realtime.transcription_sessions.create(
- client_secret={
- "expires_at": {
- "anchor": "created_at",
- "seconds": 0,
- }
- },
- include=["string"],
- input_audio_format="pcm16",
- input_audio_noise_reduction={"type": "near_field"},
- input_audio_transcription={
- "language": "language",
- "model": "gpt-4o-transcribe",
- "prompt": "prompt",
- },
- modalities=["text"],
- turn_detection={
- "create_response": True,
- "eagerness": "low",
- "interrupt_response": True,
- "prefix_padding_ms": 0,
- "silence_duration_ms": 0,
- "threshold": 0,
- "type": "server_vad",
- },
- )
- assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
-
- @parametrize
- async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
- response = await async_client.beta.realtime.transcription_sessions.with_raw_response.create()
-
- assert response.is_closed is True
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
- transcription_session = response.parse()
- assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
-
- @parametrize
- async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
- async with async_client.beta.realtime.transcription_sessions.with_streaming_response.create() as response:
- assert not response.is_closed
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
- transcription_session = await response.parse()
- assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
-
- assert cast(Any, response.is_closed) is True
tests/api_resources/beta/test_realtime.py
@@ -6,6 +6,8 @@ import os
import pytest
+# pyright: reportDeprecated=false
+
base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
tests/api_resources/beta/realtime/__init__.py → tests/api_resources/realtime/__init__.py
File renamed without changes
tests/api_resources/realtime/test_client_secrets.py
@@ -0,0 +1,208 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.realtime import ClientSecretCreateResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestClientSecrets:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_create(self, client: OpenAI) -> None:
+ client_secret = client.realtime.client_secrets.create()
+ assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+ @parametrize
+ def test_method_create_with_all_params(self, client: OpenAI) -> None:
+ client_secret = client.realtime.client_secrets.create(
+ expires_after={
+ "anchor": "created_at",
+ "seconds": 10,
+ },
+ session={
+ "model": "string",
+ "type": "realtime",
+ "audio": {
+ "input": {
+ "format": "pcm16",
+ "noise_reduction": {"type": "near_field"},
+ "transcription": {
+ "language": "language",
+ "model": "whisper-1",
+ "prompt": "prompt",
+ },
+ "turn_detection": {
+ "create_response": True,
+ "eagerness": "low",
+ "idle_timeout_ms": 0,
+ "interrupt_response": True,
+ "prefix_padding_ms": 0,
+ "silence_duration_ms": 0,
+ "threshold": 0,
+ "type": "server_vad",
+ },
+ },
+ "output": {
+ "format": "pcm16",
+ "speed": 0.25,
+ "voice": "ash",
+ },
+ },
+ "client_secret": {
+ "expires_after": {
+ "anchor": "created_at",
+ "seconds": 0,
+ }
+ },
+ "include": ["item.input_audio_transcription.logprobs"],
+ "instructions": "instructions",
+ "max_output_tokens": 0,
+ "output_modalities": ["text"],
+ "prompt": {
+ "id": "id",
+ "variables": {"foo": "string"},
+ "version": "version",
+ },
+ "temperature": 0,
+ "tool_choice": "none",
+ "tools": [
+ {
+ "description": "description",
+ "name": "name",
+ "parameters": {},
+ "type": "function",
+ }
+ ],
+ "tracing": "auto",
+ "truncation": "auto",
+ },
+ )
+ assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+ @parametrize
+ def test_raw_response_create(self, client: OpenAI) -> None:
+ response = client.realtime.client_secrets.with_raw_response.create()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ client_secret = response.parse()
+ assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+ @parametrize
+ def test_streaming_response_create(self, client: OpenAI) -> None:
+ with client.realtime.client_secrets.with_streaming_response.create() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ client_secret = response.parse()
+ assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncClientSecrets:
+ parametrize = pytest.mark.parametrize(
+ "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+ )
+
+ @parametrize
+ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+ client_secret = await async_client.realtime.client_secrets.create()
+ assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+ @parametrize
+ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ client_secret = await async_client.realtime.client_secrets.create(
+ expires_after={
+ "anchor": "created_at",
+ "seconds": 10,
+ },
+ session={
+ "model": "string",
+ "type": "realtime",
+ "audio": {
+ "input": {
+ "format": "pcm16",
+ "noise_reduction": {"type": "near_field"},
+ "transcription": {
+ "language": "language",
+ "model": "whisper-1",
+ "prompt": "prompt",
+ },
+ "turn_detection": {
+ "create_response": True,
+ "eagerness": "low",
+ "idle_timeout_ms": 0,
+ "interrupt_response": True,
+ "prefix_padding_ms": 0,
+ "silence_duration_ms": 0,
+ "threshold": 0,
+ "type": "server_vad",
+ },
+ },
+ "output": {
+ "format": "pcm16",
+ "speed": 0.25,
+ "voice": "ash",
+ },
+ },
+ "client_secret": {
+ "expires_after": {
+ "anchor": "created_at",
+ "seconds": 0,
+ }
+ },
+ "include": ["item.input_audio_transcription.logprobs"],
+ "instructions": "instructions",
+ "max_output_tokens": 0,
+ "output_modalities": ["text"],
+ "prompt": {
+ "id": "id",
+ "variables": {"foo": "string"},
+ "version": "version",
+ },
+ "temperature": 0,
+ "tool_choice": "none",
+ "tools": [
+ {
+ "description": "description",
+ "name": "name",
+ "parameters": {},
+ "type": "function",
+ }
+ ],
+ "tracing": "auto",
+ "truncation": "auto",
+ },
+ )
+ assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+ @parametrize
+ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.realtime.client_secrets.with_raw_response.create()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ client_secret = response.parse()
+ assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.realtime.client_secrets.with_streaming_response.create() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ client_secret = await response.parse()
+ assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
tests/api_resources/test_realtime.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+
+import pytest
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestRealtime:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+
+class TestAsyncRealtime:
+ parametrize = pytest.mark.parametrize(
+ "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+ )
.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 119
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-8517ffa1004e31ca2523d617629e64be6fe4f13403ddfd9db5b3be002656cbde.yml
-openapi_spec_hash: b64dd8c8b23082a7aa2a3e5c5fffd8bd
-config_hash: fe0ea26680ac2075a6cd66416aefe7db
+configured_endpoints: 118
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-356b4364203ff36d7724074cd04f6e684253bfcc3c9d969122d730aa7bc51b46.yml
+openapi_spec_hash: 4ab8e96f52699bc3d2b0c4432aa92af8
+config_hash: b854932c0ea24b400bdd64e4376936bd
api.md
@@ -431,6 +431,7 @@ from openai.types.webhooks import (
FineTuningJobCancelledWebhookEvent,
FineTuningJobFailedWebhookEvent,
FineTuningJobSucceededWebhookEvent,
+ RealtimeCallIncomingWebhookEvent,
ResponseCancelledWebhookEvent,
ResponseCompletedWebhookEvent,
ResponseFailedWebhookEvent,
@@ -832,6 +833,7 @@ from openai.types.responses import (
ToolChoiceMcp,
ToolChoiceOptions,
ToolChoiceTypes,
+ WebSearchPreviewTool,
WebSearchTool,
)
```
@@ -855,6 +857,115 @@ Methods:
- <code title="get /responses/{response_id}/input_items">client.responses.input_items.<a href="./src/openai/resources/responses/input_items.py">list</a>(response_id, \*\*<a href="src/openai/types/responses/input_item_list_params.py">params</a>) -> <a href="./src/openai/types/responses/response_item.py">SyncCursorPage[ResponseItem]</a></code>
+# Realtime
+
+Types:
+
+```python
+from openai.types.realtime import (
+ ConversationCreatedEvent,
+ ConversationItem,
+ ConversationItemAdded,
+ ConversationItemCreateEvent,
+ ConversationItemCreatedEvent,
+ ConversationItemDeleteEvent,
+ ConversationItemDeletedEvent,
+ ConversationItemDone,
+ ConversationItemInputAudioTranscriptionCompletedEvent,
+ ConversationItemInputAudioTranscriptionDeltaEvent,
+ ConversationItemInputAudioTranscriptionFailedEvent,
+ ConversationItemInputAudioTranscriptionSegment,
+ ConversationItemRetrieveEvent,
+ ConversationItemTruncateEvent,
+ ConversationItemTruncatedEvent,
+ ConversationItemWithReference,
+ InputAudioBufferAppendEvent,
+ InputAudioBufferClearEvent,
+ InputAudioBufferClearedEvent,
+ InputAudioBufferCommitEvent,
+ InputAudioBufferCommittedEvent,
+ InputAudioBufferSpeechStartedEvent,
+ InputAudioBufferSpeechStoppedEvent,
+ InputAudioBufferTimeoutTriggered,
+ LogProbProperties,
+ McpListToolsCompleted,
+ McpListToolsFailed,
+ McpListToolsInProgress,
+ OutputAudioBufferClearEvent,
+ RateLimitsUpdatedEvent,
+ RealtimeAudioConfig,
+ RealtimeClientEvent,
+ RealtimeClientSecretConfig,
+ RealtimeConversationItemAssistantMessage,
+ RealtimeConversationItemFunctionCall,
+ RealtimeConversationItemFunctionCallOutput,
+ RealtimeConversationItemSystemMessage,
+ RealtimeConversationItemUserMessage,
+ RealtimeError,
+ RealtimeErrorEvent,
+ RealtimeMcpApprovalRequest,
+ RealtimeMcpApprovalResponse,
+ RealtimeMcpListTools,
+ RealtimeMcpProtocolError,
+ RealtimeMcpToolCall,
+ RealtimeMcpToolExecutionError,
+ RealtimeMcphttpError,
+ RealtimeResponse,
+ RealtimeResponseStatus,
+ RealtimeResponseUsage,
+ RealtimeResponseUsageInputTokenDetails,
+ RealtimeResponseUsageOutputTokenDetails,
+ RealtimeServerEvent,
+ RealtimeSession,
+ RealtimeSessionCreateRequest,
+ RealtimeToolChoiceConfig,
+ RealtimeToolsConfig,
+ RealtimeToolsConfigUnion,
+ RealtimeTracingConfig,
+ RealtimeTranscriptionSessionCreateRequest,
+ RealtimeTruncation,
+ ResponseAudioDeltaEvent,
+ ResponseAudioDoneEvent,
+ ResponseAudioTranscriptDeltaEvent,
+ ResponseAudioTranscriptDoneEvent,
+ ResponseCancelEvent,
+ ResponseContentPartAddedEvent,
+ ResponseContentPartDoneEvent,
+ ResponseCreateEvent,
+ ResponseCreatedEvent,
+ ResponseDoneEvent,
+ ResponseFunctionCallArgumentsDeltaEvent,
+ ResponseFunctionCallArgumentsDoneEvent,
+ ResponseMcpCallArgumentsDelta,
+ ResponseMcpCallArgumentsDone,
+ ResponseMcpCallCompleted,
+ ResponseMcpCallFailed,
+ ResponseMcpCallInProgress,
+ ResponseOutputItemAddedEvent,
+ ResponseOutputItemDoneEvent,
+ ResponseTextDeltaEvent,
+ ResponseTextDoneEvent,
+ SessionCreatedEvent,
+ SessionUpdateEvent,
+ SessionUpdatedEvent,
+ TranscriptionSessionCreated,
+ TranscriptionSessionUpdate,
+ TranscriptionSessionUpdatedEvent,
+)
+```
+
+## ClientSecrets
+
+Types:
+
+```python
+from openai.types.realtime import RealtimeSessionCreateResponse, ClientSecretCreateResponse
+```
+
+Methods:
+
+- <code title="post /realtime/client_secrets">client.realtime.client_secrets.<a href="./src/openai/resources/realtime/client_secrets.py">create</a>(\*\*<a href="src/openai/types/realtime/client_secret_create_params.py">params</a>) -> <a href="./src/openai/types/realtime/client_secret_create_response.py">ClientSecretCreateResponse</a></code>
+
# Conversations
Types:
README.md
@@ -226,7 +226,7 @@ async def main():
asyncio.run(main())
```
-## Realtime API beta
+## Realtime API
The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as [function calling](https://platform.openai.com/docs/guides/function-calling) through a WebSocket connection.
@@ -243,7 +243,7 @@ from openai import AsyncOpenAI
async def main():
client = AsyncOpenAI()
- async with client.beta.realtime.connect(model="gpt-4o-realtime-preview") as connection:
+ async with client.realtime.connect(model="gpt-realtime") as connection:
await connection.session.update(session={'modalities': ['text']})
await connection.conversation.item.create(
@@ -277,7 +277,7 @@ Whenever an error occurs, the Realtime API will send an [`error` event](https://
```py
client = AsyncOpenAI()
-async with client.beta.realtime.connect(model="gpt-4o-realtime-preview") as connection:
+async with client.realtime.connect(model="gpt-realtime") as connection:
...
async for event in connection:
if event.type == 'error':