Commit 5fdba486
Changed files (70)
src
openai
lib
resources
beta
realtime
types
beta
realtime
tests
api_resources
beta
src/openai/lib/azure.py
@@ -76,6 +76,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
azure_ad_token: str | None = None,
azure_ad_token_provider: AzureADTokenProvider | None = None,
organization: str | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
@@ -94,6 +95,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
azure_ad_token: str | None = None,
azure_ad_token_provider: AzureADTokenProvider | None = None,
organization: str | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
@@ -112,6 +114,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
azure_ad_token: str | None = None,
azure_ad_token_provider: AzureADTokenProvider | None = None,
organization: str | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
@@ -131,6 +134,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
azure_ad_token_provider: AzureADTokenProvider | None = None,
organization: str | None = None,
project: str | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
base_url: str | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
@@ -214,6 +218,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
default_headers=default_headers,
default_query=default_query,
http_client=http_client,
+ websocket_base_url=websocket_base_url,
_strict_response_validation=_strict_response_validation,
)
self._api_version = api_version
@@ -227,6 +232,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
api_key: str | None = None,
organization: str | None = None,
project: str | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
api_version: str | None = None,
azure_ad_token: str | None = None,
azure_ad_token_provider: AzureADTokenProvider | None = None,
@@ -247,6 +253,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
api_key=api_key,
organization=organization,
project=project,
+ websocket_base_url=websocket_base_url,
base_url=base_url,
timeout=timeout,
http_client=http_client,
@@ -314,6 +321,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
organization: str | None = None,
project: str | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
@@ -333,6 +341,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
organization: str | None = None,
project: str | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
@@ -352,6 +361,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
organization: str | None = None,
project: str | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
@@ -372,6 +382,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
organization: str | None = None,
project: str | None = None,
base_url: str | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
@@ -454,6 +465,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
default_headers=default_headers,
default_query=default_query,
http_client=http_client,
+ websocket_base_url=websocket_base_url,
_strict_response_validation=_strict_response_validation,
)
self._api_version = api_version
@@ -467,6 +479,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
api_key: str | None = None,
organization: str | None = None,
project: str | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
api_version: str | None = None,
azure_ad_token: str | None = None,
azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
@@ -487,6 +500,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
api_key=api_key,
organization=organization,
project=project,
+ websocket_base_url=websocket_base_url,
base_url=base_url,
timeout=timeout,
http_client=http_client,
src/openai/resources/beta/realtime/realtime.py
@@ -2,6 +2,15 @@
from __future__ import annotations
+import json
+import logging
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Iterator, cast
+from typing_extensions import AsyncIterator
+
+import httpx
+from pydantic import BaseModel
+
from .sessions import (
Sessions,
AsyncSessions,
@@ -10,11 +19,34 @@ from .sessions import (
SessionsWithStreamingResponse,
AsyncSessionsWithStreamingResponse,
)
+from ...._types import NOT_GIVEN, Query, Headers, NotGiven
+from ...._utils import (
+ maybe_transform,
+ strip_not_given,
+ async_maybe_transform,
+)
from ...._compat import cached_property
+from ...._models import construct_type_unchecked
from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._exceptions import OpenAIError
+from ...._base_client import _merge_mappings
+from ....types.beta.realtime import session_update_event_param, response_create_event_param
+from ....types.websocket_connection_options import WebsocketConnectionOptions
+from ....types.beta.realtime.realtime_client_event import RealtimeClientEvent
+from ....types.beta.realtime.realtime_server_event import RealtimeServerEvent
+from ....types.beta.realtime.conversation_item_param import ConversationItemParam
+from ....types.beta.realtime.realtime_client_event_param import RealtimeClientEventParam
+
+if TYPE_CHECKING:
+ from websockets.sync.client import ClientConnection as WebsocketConnection
+ from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
+
+ from ...._client import OpenAI, AsyncOpenAI
__all__ = ["Realtime", "AsyncRealtime"]
+log: logging.Logger = logging.getLogger(__name__)
+
class Realtime(SyncAPIResource):
@cached_property
@@ -40,6 +72,33 @@ class Realtime(SyncAPIResource):
"""
return RealtimeWithStreamingResponse(self)
+ def connect(
+ self,
+ *,
+ model: str,
+ extra_query: Query = {},
+ extra_headers: Headers = {},
+ websocket_connection_options: WebsocketConnectionOptions = {},
+ ) -> RealtimeConnectionManager:
+ """
+ The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+ Some notable benefits of the API include:
+
+ - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+ - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+ - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+ The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+ """
+ return RealtimeConnectionManager(
+ client=self._client,
+ extra_query=extra_query,
+ extra_headers=extra_headers,
+ websocket_connection_options=websocket_connection_options,
+ model=model,
+ )
+
class AsyncRealtime(AsyncAPIResource):
@cached_property
@@ -65,6 +124,33 @@ class AsyncRealtime(AsyncAPIResource):
"""
return AsyncRealtimeWithStreamingResponse(self)
+ def connect(
+ self,
+ *,
+ model: str,
+ extra_query: Query = {},
+ extra_headers: Headers = {},
+ websocket_connection_options: WebsocketConnectionOptions = {},
+ ) -> AsyncRealtimeConnectionManager:
+ """
+ The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+ Some notable benefits of the API include:
+
+ - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+ - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+ - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+ The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+ """
+ return AsyncRealtimeConnectionManager(
+ client=self._client,
+ extra_query=extra_query,
+ extra_headers=extra_headers,
+ websocket_connection_options=websocket_connection_options,
+ model=model,
+ )
+
class RealtimeWithRawResponse:
def __init__(self, realtime: Realtime) -> None:
@@ -100,3 +186,769 @@ class AsyncRealtimeWithStreamingResponse:
@cached_property
def sessions(self) -> AsyncSessionsWithStreamingResponse:
return AsyncSessionsWithStreamingResponse(self._realtime.sessions)
+
+
+class AsyncRealtimeConnection:
+ """Represents a live websocket connection to the Realtime API"""
+
+ session: AsyncRealtimeSessionResource
+ response: AsyncRealtimeResponseResource
+ conversation: AsyncRealtimeConversationResource
+ input_audio_buffer: AsyncRealtimeInputAudioBufferResource
+
+ _connection: AsyncWebsocketConnection
+
+ def __init__(self, connection: AsyncWebsocketConnection) -> None:
+ self._connection = connection
+
+ self.session = AsyncRealtimeSessionResource(self)
+ self.response = AsyncRealtimeResponseResource(self)
+ self.conversation = AsyncRealtimeConversationResource(self)
+ self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
+
+ async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
+ """
+ An infinite-iterator that will continue to yield events until
+ the connection is closed.
+ """
+ from websockets.exceptions import ConnectionClosedOK
+
+ try:
+ while True:
+ yield await self.recv()
+ except ConnectionClosedOK:
+ return
+
+ async def recv(self) -> RealtimeServerEvent:
+ """
+ Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+ Canceling this method is safe. There's no risk of losing data.
+ """
+ return self.parse_event(await self.recv_bytes())
+
+ async def recv_bytes(self) -> bytes:
+ """Receive the next message from the connection as raw bytes.
+
+ Canceling this method is safe. There's no risk of losing data.
+
+ If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+ then you can call `.parse_event(data)`.
+ """
+ message = await self._connection.recv(decode=False)
+ log.debug(f"Received websocket message: %s", message)
+ if not isinstance(message, bytes):
+ # passing `decode=False` should always result in us getting `bytes` back
+ raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
+
+ return message
+
+ async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+ data = (
+ event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+ if isinstance(event, BaseModel)
+ else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
+ )
+ await self._connection.send(data)
+
+ async def close(self, *, code: int = 1000, reason: str = "") -> None:
+ await self._connection.close(code=code, reason=reason)
+
+ def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+ """
+ Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+ This is helpful if you're using `.recv_bytes()`.
+ """
+ return cast(
+ RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+ )
+
+
+class AsyncRealtimeConnectionManager:
+ """
+ Context manager over a `AsyncRealtimeConnection` that is returned by `beta.realtime.connect()`
+
+ This context manager ensures that the connection will be closed when it exits.
+
+ ---
+
+ Note that if your application doesn't work well with the context manager approach then you
+ can call the `.enter()` method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = await client.beta.realtime.connect(...).enter()
+ # ...
+ await connection.close()
+ ```
+ """
+
+ def __init__(
+ self,
+ *,
+ client: AsyncOpenAI,
+ model: str,
+ extra_query: Query,
+ extra_headers: Headers,
+ websocket_connection_options: WebsocketConnectionOptions,
+ ) -> None:
+ self.__client = client
+ self.__model = model
+ self.__connection: AsyncRealtimeConnection | None = None
+ self.__extra_query = extra_query
+ self.__extra_headers = extra_headers
+ self.__websocket_connection_options = websocket_connection_options
+
+ async def __aenter__(self) -> AsyncRealtimeConnection:
+ """
+ 👋 If your application doesn't work well with the context manager approach then you
+ can call this method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = await client.beta.realtime.connect(...).enter()
+ # ...
+ await connection.close()
+ ```
+ """
+ try:
+ from websockets.asyncio.client import connect
+ except ImportError as exc:
+ raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+ url = self._prepare_url().copy_with(
+ params={
+ **self.__client.base_url.params,
+ "model": self.__model,
+ **self.__extra_query,
+ },
+ )
+ log.debug("Connecting to %s", url)
+ if self.__websocket_connection_options:
+ log.debug("Connection options: %s", self.__websocket_connection_options)
+
+ self.__connection = AsyncRealtimeConnection(
+ await connect(
+ str(url),
+ user_agent_header=self.__client.user_agent,
+ additional_headers=_merge_mappings(
+ {
+ **self.__client.auth_headers,
+ "OpenAI-Beta": "realtime=v1",
+ },
+ self.__extra_headers,
+ ),
+ **self.__websocket_connection_options,
+ )
+ )
+
+ return self.__connection
+
+ enter = __aenter__
+
+ def _prepare_url(self) -> httpx.URL:
+ if self.__client.websocket_base_url is not None:
+ base_url = httpx.URL(self.__client.websocket_base_url)
+ else:
+ base_url = self.__client._base_url.copy_with(scheme="wss")
+
+ merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+ return base_url.copy_with(raw_path=merge_raw_path)
+
+ async def __aexit__(
+ self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+ ) -> None:
+ if self.__connection is not None:
+ await self.__connection.close()
+
+
+class RealtimeConnection:
+ """Represents a live websocket connection to the Realtime API"""
+
+ session: RealtimeSessionResource
+ response: RealtimeResponseResource
+ conversation: RealtimeConversationResource
+ input_audio_buffer: RealtimeInputAudioBufferResource
+
+ _connection: WebsocketConnection
+
+ def __init__(self, connection: WebsocketConnection) -> None:
+ self._connection = connection
+
+ self.session = RealtimeSessionResource(self)
+ self.response = RealtimeResponseResource(self)
+ self.conversation = RealtimeConversationResource(self)
+ self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
+
+ def __iter__(self) -> Iterator[RealtimeServerEvent]:
+ """
+ An infinite-iterator that will continue to yield events until
+ the connection is closed.
+ """
+ from websockets.exceptions import ConnectionClosedOK
+
+ try:
+ while True:
+ yield self.recv()
+ except ConnectionClosedOK:
+ return
+
+ def recv(self) -> RealtimeServerEvent:
+ """
+ Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+ Canceling this method is safe. There's no risk of losing data.
+ """
+ return self.parse_event(self.recv_bytes())
+
+ def recv_bytes(self) -> bytes:
+ """Receive the next message from the connection as raw bytes.
+
+ Canceling this method is safe. There's no risk of losing data.
+
+ If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+ then you can call `.parse_event(data)`.
+ """
+ message = self._connection.recv(decode=False)
+ log.debug(f"Received websocket message: %s", message)
+ if not isinstance(message, bytes):
+ # passing `decode=False` should always result in us getting `bytes` back
+ raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
+
+ return message
+
+ def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+ data = (
+ event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+ if isinstance(event, BaseModel)
+ else json.dumps(maybe_transform(event, RealtimeClientEventParam))
+ )
+ self._connection.send(data)
+
+ def close(self, *, code: int = 1000, reason: str = "") -> None:
+ self._connection.close(code=code, reason=reason)
+
+ def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+ """
+ Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+ This is helpful if you're using `.recv_bytes()`.
+ """
+ return cast(
+ RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+ )
+
+
+class RealtimeConnectionManager:
+ """
+ Context manager over a `RealtimeConnection` that is returned by `beta.realtime.connect()`
+
+ This context manager ensures that the connection will be closed when it exits.
+
+ ---
+
+ Note that if your application doesn't work well with the context manager approach then you
+ can call the `.enter()` method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = client.beta.realtime.connect(...).enter()
+ # ...
+ connection.close()
+ ```
+ """
+
+ def __init__(
+ self,
+ *,
+ client: OpenAI,
+ model: str,
+ extra_query: Query,
+ extra_headers: Headers,
+ websocket_connection_options: WebsocketConnectionOptions,
+ ) -> None:
+ self.__client = client
+ self.__model = model
+ self.__connection: RealtimeConnection | None = None
+ self.__extra_query = extra_query
+ self.__extra_headers = extra_headers
+ self.__websocket_connection_options = websocket_connection_options
+
+ def __enter__(self) -> RealtimeConnection:
+ """
+ 👋 If your application doesn't work well with the context manager approach then you
+ can call this method directly to initiate a connection.
+
+ **Warning**: You must remember to close the connection with `.close()`.
+
+ ```py
+ connection = client.beta.realtime.connect(...).enter()
+ # ...
+ connection.close()
+ ```
+ """
+ try:
+ from websockets.sync.client import connect
+ except ImportError as exc:
+ raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+ url = self._prepare_url().copy_with(
+ params={
+ **self.__client.base_url.params,
+ "model": self.__model,
+ **self.__extra_query,
+ },
+ )
+ log.debug("Connecting to %s", url)
+ if self.__websocket_connection_options:
+ log.debug("Connection options: %s", self.__websocket_connection_options)
+
+ self.__connection = RealtimeConnection(
+ connect(
+ str(url),
+ user_agent_header=self.__client.user_agent,
+ additional_headers=_merge_mappings(
+ {
+ **self.__client.auth_headers,
+ "OpenAI-Beta": "realtime=v1",
+ },
+ self.__extra_headers,
+ ),
+ **self.__websocket_connection_options,
+ )
+ )
+
+ return self.__connection
+
+ enter = __enter__
+
+ def _prepare_url(self) -> httpx.URL:
+ if self.__client.websocket_base_url is not None:
+ base_url = httpx.URL(self.__client.websocket_base_url)
+ else:
+ base_url = self.__client._base_url.copy_with(scheme="wss")
+
+ merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+ return base_url.copy_with(raw_path=merge_raw_path)
+
+ def __exit__(
+ self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+ ) -> None:
+ if self.__connection is not None:
+ self.__connection.close()
+
+
+class BaseRealtimeConnectionResource:
+ def __init__(self, connection: RealtimeConnection) -> None:
+ self._connection = connection
+
+
+class RealtimeSessionResource(BaseRealtimeConnectionResource):
+ def update(self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to update the session’s default configuration.
+
+ The client may
+ send this event at any time to update the session configuration, and any
+ field may be updated at any time, except for "voice". The server will respond
+ with a `session.updated` event that shows the full effective configuration.
+ Only fields that are present are updated, thus the correct way to clear a
+ field like "instructions" is to pass an empty string.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+ )
+ )
+
+
+class RealtimeResponseResource(BaseRealtimeConnectionResource):
+ def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to cancel an in-progress response.
+
+ The server will respond
+ with a `response.cancelled` event or an error if there is no response to
+ cancel.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+ )
+ )
+
+ def create(
+ self,
+ *,
+ event_id: str | NotGiven = NOT_GIVEN,
+ response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ This event instructs the server to create a Response, which means triggering
+ model inference. When in Server VAD mode, the server will create Responses
+ automatically.
+
+ A Response will include at least one Item, and may have two, in which case
+ the second will be a function call. These Items will be appended to the
+ conversation history.
+
+ The server will respond with a `response.created` event, events for Items
+ and content created, and finally a `response.done` event to indicate the
+ Response is complete.
+
+ The `response.create` event includes inference configuration like
+ `instructions`, and `temperature`. These fields will override the Session's
+ configuration for this Response only.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+ )
+ )
+
+
+class RealtimeConversationResource(BaseRealtimeConnectionResource):
+ @cached_property
+ def item(self) -> RealtimeConversationItemResource:
+ return RealtimeConversationItemResource(self._connection)
+
+
+class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
+ def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event when you want to remove any item from the conversation
+ history.
+
+ The server will respond with a `conversation.item.deleted` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+ def create(
+ self,
+ *,
+ item: ConversationItemParam,
+ event_id: str | NotGiven = NOT_GIVEN,
+ previous_item_id: str | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ Add a new Item to the Conversation's context, including messages, function
+ calls, and function call responses. This event can be used both to populate a
+ "history" of the conversation and to add new items mid-stream, but has the
+ current limitation that it cannot populate assistant audio messages.
+
+ If successful, the server will respond with a `conversation.item.created`
+ event, otherwise an `error` event will be sent.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.create",
+ "item": item,
+ "event_id": event_id,
+ "previous_item_id": previous_item_id,
+ }
+ ),
+ )
+ )
+
+ def truncate(
+ self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to truncate a previous assistant message’s audio.
+
+ The server
+ will produce audio faster than realtime, so this event is useful when the user
+ interrupts to truncate audio that has already been sent to the client but not
+ yet played. This will synchronize the server's understanding of the audio with
+ the client's playback.
+
+ Truncating audio will delete the server-side text transcript to ensure there
+ is not text in the context that hasn't been heard by the user.
+
+ If successful, the server will respond with a `conversation.item.truncated`
+ event.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.truncate",
+ "audio_end_ms": audio_end_ms,
+ "content_index": content_index,
+ "item_id": item_id,
+ "event_id": event_id,
+ }
+ ),
+ )
+ )
+
+
+class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
+ def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to clear the audio bytes in the buffer.
+
+ The server will
+ respond with an `input_audio_buffer.cleared` event.
+ """
+ self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+ )
+
+ def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event to commit the user input audio buffer, which will create a
+ new user message item in the conversation. This event will produce an error
+ if the input audio buffer is empty. When in Server VAD mode, the client does
+ not need to send this event, the server will commit the audio buffer
+ automatically.
+
+ Committing the input audio buffer will trigger input audio transcription
+ (if enabled in session configuration), but it will not create a response
+ from the model. The server will respond with an `input_audio_buffer.committed`
+ event.
+ """
+ self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+ )
+
+ def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to append audio bytes to the input audio buffer.
+
+ The audio
+ buffer is temporary storage you can write to and later commit. In Server VAD
+ mode, the audio buffer is used to detect speech and the server will decide
+ when to commit. When Server VAD is disabled, you must commit the audio buffer
+ manually.
+
+ The client may choose how much audio to place in each event up to a maximum
+ of 15 MiB, for example streaming smaller chunks from the client may allow the
+ VAD to be more responsive. Unlike made other client events, the server will
+ not send a confirmation response to this event.
+ """
+ self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+ )
+ )
+
+
+class BaseAsyncRealtimeConnectionResource:
+ def __init__(self, connection: AsyncRealtimeConnection) -> None:
+ self._connection = connection
+
+
+class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
+ async def update(
+ self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to update the session’s default configuration.
+
+ The client may
+ send this event at any time to update the session configuration, and any
+ field may be updated at any time, except for "voice". The server will respond
+ with a `session.updated` event that shows the full effective configuration.
+ Only fields that are present are updated, thus the correct way to clear a
+ field like "instructions" is to pass an empty string.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+ )
+ )
+
+
+class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
+ async def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to cancel an in-progress response.
+
+ The server will respond
+ with a `response.cancelled` event or an error if there is no response to
+ cancel.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+ )
+ )
+
+ async def create(
+ self,
+ *,
+ event_id: str | NotGiven = NOT_GIVEN,
+ response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ This event instructs the server to create a Response, which means triggering
+ model inference. When in Server VAD mode, the server will create Responses
+ automatically.
+
+ A Response will include at least one Item, and may have two, in which case
+ the second will be a function call. These Items will be appended to the
+ conversation history.
+
+ The server will respond with a `response.created` event, events for Items
+ and content created, and finally a `response.done` event to indicate the
+ Response is complete.
+
+ The `response.create` event includes inference configuration like
+ `instructions`, and `temperature`. These fields will override the Session's
+ configuration for this Response only.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+ )
+ )
+
+
+class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
+ @cached_property
+ def item(self) -> AsyncRealtimeConversationItemResource:
+ return AsyncRealtimeConversationItemResource(self._connection)
+
+
+class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
+ async def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event when you want to remove any item from the conversation
+ history.
+
+ The server will respond with a `conversation.item.deleted` event,
+ unless the item does not exist in the conversation history, in which case the
+ server will respond with an error.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+ )
+ )
+
+ async def create(
+ self,
+ *,
+ item: ConversationItemParam,
+ event_id: str | NotGiven = NOT_GIVEN,
+ previous_item_id: str | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """
+ Add a new Item to the Conversation's context, including messages, function
+ calls, and function call responses. This event can be used both to populate a
+ "history" of the conversation and to add new items mid-stream, but has the
+ current limitation that it cannot populate assistant audio messages.
+
+ If successful, the server will respond with a `conversation.item.created`
+ event, otherwise an `error` event will be sent.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.create",
+ "item": item,
+ "event_id": event_id,
+ "previous_item_id": previous_item_id,
+ }
+ ),
+ )
+ )
+
+ async def truncate(
+ self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+ ) -> None:
+ """Send this event to truncate a previous assistant message’s audio.
+
+ The server
+ will produce audio faster than realtime, so this event is useful when the user
+ interrupts to truncate audio that has already been sent to the client but not
+ yet played. This will synchronize the server's understanding of the audio with
+ the client's playback.
+
+ Truncating audio will delete the server-side text transcript to ensure there
+ is not text in the context that hasn't been heard by the user.
+
+ If successful, the server will respond with a `conversation.item.truncated`
+ event.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given(
+ {
+ "type": "conversation.item.truncate",
+ "audio_end_ms": audio_end_ms,
+ "content_index": content_index,
+ "item_id": item_id,
+ "event_id": event_id,
+ }
+ ),
+ )
+ )
+
+
+class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
+ async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to clear the audio bytes in the buffer.
+
+ The server will
+ respond with an `input_audio_buffer.cleared` event.
+ """
+ await self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+ )
+
+ async def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """
+ Send this event to commit the user input audio buffer, which will create a
+ new user message item in the conversation. This event will produce an error
+ if the input audio buffer is empty. When in Server VAD mode, the client does
+ not need to send this event, the server will commit the audio buffer
+ automatically.
+
+ Committing the input audio buffer will trigger input audio transcription
+ (if enabled in session configuration), but it will not create a response
+ from the model. The server will respond with an `input_audio_buffer.committed`
+ event.
+ """
+ await self._connection.send(
+ cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+ )
+
+ async def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+ """Send this event to append audio bytes to the input audio buffer.
+
+ The audio
+ buffer is temporary storage you can write to and later commit. In Server VAD
+ mode, the audio buffer is used to detect speech and the server will decide
+ when to commit. When Server VAD is disabled, you must commit the audio buffer
+ manually.
+
+ The client may choose how much audio to place in each event up to a maximum
+ of 15 MiB, for example streaming smaller chunks from the client may allow the
+ VAD to be more responsive. Unlike made other client events, the server will
+ not send a confirmation response to this event.
+ """
+ await self._connection.send(
+ cast(
+ RealtimeClientEventParam,
+ strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+ )
+ )
src/openai/types/beta/realtime/__init__.py
@@ -2,5 +2,79 @@
from __future__ import annotations
+from .session import Session as Session
+from .error_event import ErrorEvent as ErrorEvent
+from .conversation_item import ConversationItem as ConversationItem
+from .realtime_response import RealtimeResponse as RealtimeResponse
+from .response_done_event import ResponseDoneEvent as ResponseDoneEvent
+from .session_update_event import SessionUpdateEvent as SessionUpdateEvent
+from .realtime_client_event import RealtimeClientEvent as RealtimeClientEvent
+from .realtime_server_event import RealtimeServerEvent as RealtimeServerEvent
+from .response_cancel_event import ResponseCancelEvent as ResponseCancelEvent
+from .response_create_event import ResponseCreateEvent as ResponseCreateEvent
from .session_create_params import SessionCreateParams as SessionCreateParams
+from .session_created_event import SessionCreatedEvent as SessionCreatedEvent
+from .session_updated_event import SessionUpdatedEvent as SessionUpdatedEvent
+from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
+from .conversation_item_param import ConversationItemParam as ConversationItemParam
+from .realtime_connect_params import RealtimeConnectParams as RealtimeConnectParams
+from .realtime_response_usage import RealtimeResponseUsage as RealtimeResponseUsage
from .session_create_response import SessionCreateResponse as SessionCreateResponse
+from .realtime_response_status import RealtimeResponseStatus as RealtimeResponseStatus
+from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
+from .conversation_item_content import ConversationItemContent as ConversationItemContent
+from .rate_limits_updated_event import RateLimitsUpdatedEvent as RateLimitsUpdatedEvent
+from .response_audio_done_event import ResponseAudioDoneEvent as ResponseAudioDoneEvent
+from .response_text_delta_event import ResponseTextDeltaEvent as ResponseTextDeltaEvent
+from .conversation_created_event import ConversationCreatedEvent as ConversationCreatedEvent
+from .response_audio_delta_event import ResponseAudioDeltaEvent as ResponseAudioDeltaEvent
+from .session_update_event_param import SessionUpdateEventParam as SessionUpdateEventParam
+from .realtime_client_event_param import RealtimeClientEventParam as RealtimeClientEventParam
+from .response_cancel_event_param import ResponseCancelEventParam as ResponseCancelEventParam
+from .response_create_event_param import ResponseCreateEventParam as ResponseCreateEventParam
+from .conversation_item_create_event import ConversationItemCreateEvent as ConversationItemCreateEvent
+from .conversation_item_delete_event import ConversationItemDeleteEvent as ConversationItemDeleteEvent
+from .input_audio_buffer_clear_event import InputAudioBufferClearEvent as InputAudioBufferClearEvent
+from .conversation_item_content_param import ConversationItemContentParam as ConversationItemContentParam
+from .conversation_item_created_event import ConversationItemCreatedEvent as ConversationItemCreatedEvent
+from .conversation_item_deleted_event import ConversationItemDeletedEvent as ConversationItemDeletedEvent
+from .input_audio_buffer_append_event import InputAudioBufferAppendEvent as InputAudioBufferAppendEvent
+from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent as InputAudioBufferCommitEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent as ResponseOutputItemDoneEvent
+from .conversation_item_truncate_event import ConversationItemTruncateEvent as ConversationItemTruncateEvent
+from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent as InputAudioBufferClearedEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent as ResponseContentPartDoneEvent
+from .response_output_item_added_event import ResponseOutputItemAddedEvent as ResponseOutputItemAddedEvent
+from .conversation_item_truncated_event import ConversationItemTruncatedEvent as ConversationItemTruncatedEvent
+from .response_content_part_added_event import ResponseContentPartAddedEvent as ResponseContentPartAddedEvent
+from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent as InputAudioBufferCommittedEvent
+from .conversation_item_create_event_param import ConversationItemCreateEventParam as ConversationItemCreateEventParam
+from .conversation_item_delete_event_param import ConversationItemDeleteEventParam as ConversationItemDeleteEventParam
+from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam as InputAudioBufferClearEventParam
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
+from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam as InputAudioBufferAppendEventParam
+from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam as InputAudioBufferCommitEventParam
+from .response_audio_transcript_delta_event import (
+ ResponseAudioTranscriptDeltaEvent as ResponseAudioTranscriptDeltaEvent,
+)
+from .conversation_item_truncate_event_param import (
+ ConversationItemTruncateEventParam as ConversationItemTruncateEventParam,
+)
+from .input_audio_buffer_speech_started_event import (
+ InputAudioBufferSpeechStartedEvent as InputAudioBufferSpeechStartedEvent,
+)
+from .input_audio_buffer_speech_stopped_event import (
+ InputAudioBufferSpeechStoppedEvent as InputAudioBufferSpeechStoppedEvent,
+)
+from .response_function_call_arguments_done_event import (
+ ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
+)
+from .response_function_call_arguments_delta_event import (
+ ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
+)
+from .conversation_item_input_audio_transcription_failed_event import (
+ ConversationItemInputAudioTranscriptionFailedEvent as ConversationItemInputAudioTranscriptionFailedEvent,
+)
+from .conversation_item_input_audio_transcription_completed_event import (
+ ConversationItemInputAudioTranscriptionCompletedEvent as ConversationItemInputAudioTranscriptionCompletedEvent,
+)
src/openai/types/beta/realtime/conversation_created_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationCreatedEvent", "Conversation"]
+
+
+class Conversation(BaseModel):
+ id: Optional[str] = None
+ """The unique ID of the conversation."""
+
+ object: Optional[Literal["realtime.conversation"]] = None
+ """The object type, must be `realtime.conversation`."""
+
+
+class ConversationCreatedEvent(BaseModel):
+ conversation: Conversation
+ """The conversation resource."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ type: Literal["conversation.created"]
+ """The event type, must be `conversation.created`."""
src/openai/types/beta/realtime/conversation_item.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item_content import ConversationItemContent
+
+__all__ = ["ConversationItem"]
+
+
+class ConversationItem(BaseModel):
+ id: Optional[str] = None
+ """
+ The unique ID of the item, this can be generated by the client to help manage
+ server-side context, but is not required because the server will generate one if
+ not provided.
+ """
+
+ arguments: Optional[str] = None
+ """The arguments of the function call (for `function_call` items)."""
+
+ call_id: Optional[str] = None
+ """
+ The ID of the function call (for `function_call` and `function_call_output`
+ items). If passed on a `function_call_output` item, the server will check that a
+ `function_call` item with the same ID exists in the conversation history.
+ """
+
+ content: Optional[List[ConversationItemContent]] = None
+ """The content of the message, applicable for `message` items.
+
+ - Message items of role `system` support only `input_text` content
+ - Message items of role `user` support `input_text` and `input_audio` content
+ - Message items of role `assistant` support `text` content.
+ """
+
+ name: Optional[str] = None
+ """The name of the function being called (for `function_call` items)."""
+
+ object: Optional[Literal["realtime.item"]] = None
+ """Identifier for the API object being returned - always `realtime.item`."""
+
+ output: Optional[str] = None
+ """The output of the function call (for `function_call_output` items)."""
+
+ role: Optional[Literal["user", "assistant", "system"]] = None
+ """
+ The role of the message sender (`user`, `assistant`, `system`), only applicable
+ for `message` items.
+ """
+
+ status: Optional[Literal["completed", "incomplete"]] = None
+ """The status of the item (`completed`, `incomplete`).
+
+ These have no effect on the conversation, but are accepted for consistency with
+ the `conversation.item.created` event.
+ """
+
+ type: Optional[Literal["message", "function_call", "function_call_output"]] = None
+ """The type of the item (`message`, `function_call`, `function_call_output`)."""
src/openai/types/beta/realtime/conversation_item_content.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemContent"]
+
+
+class ConversationItemContent(BaseModel):
+ id: Optional[str] = None
+ """
+ ID of a previous conversation item (like a model response), used for
+ `item_reference` content types.
+ """
+
+ audio: Optional[str] = None
+ """Base64-encoded audio bytes, used for `input_audio` content type."""
+
+ text: Optional[str] = None
+ """The text content, used for `input_text` and `text` content types."""
+
+ transcript: Optional[str] = None
+ """The transcript of the audio, used for `input_audio` content type."""
+
+ type: Optional[Literal["input_text", "input_audio", "item_reference", "text"]] = None
+ """The content type (`input_text`, `input_audio`, `item_reference`, `text`)."""
src/openai/types/beta/realtime/conversation_item_content_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["ConversationItemContentParam"]
+
+
+class ConversationItemContentParam(TypedDict, total=False):
+ id: str
+ """
+ ID of a previous conversation item (like a model response), used for
+ `item_reference` content types.
+ """
+
+ audio: str
+ """Base64-encoded audio bytes, used for `input_audio` content type."""
+
+ text: str
+ """The text content, used for `input_text` and `text` content types."""
+
+ transcript: str
+ """The transcript of the audio, used for `input_audio` content type."""
+
+ type: Literal["input_text", "input_audio", "item_reference", "text"]
+ """The content type (`input_text`, `input_audio`, `item_reference`, `text`)."""
src/openai/types/beta/realtime/conversation_item_create_event.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemCreateEvent"]
+
+
+class ConversationItemCreateEvent(BaseModel):
+ item: ConversationItem
+ """The item to add to the conversation."""
+
+ type: Literal["conversation.item.create"]
+ """The event type, must be `conversation.item.create`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
+
+ previous_item_id: Optional[str] = None
+ """The ID of the preceding item after which the new item will be inserted.
+
+ If not set, the new item will be appended to the end of the conversation. If
+ set, it allows an item to be inserted mid-conversation. If the ID cannot be
+ found, an error will be returned and the item will not be added.
+ """
src/openai/types/beta/realtime/conversation_item_create_event_param.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .conversation_item_param import ConversationItemParam
+
+__all__ = ["ConversationItemCreateEventParam"]
+
+
+class ConversationItemCreateEventParam(TypedDict, total=False):
+ item: Required[ConversationItemParam]
+ """The item to add to the conversation."""
+
+ type: Required[Literal["conversation.item.create"]]
+ """The event type, must be `conversation.item.create`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
+
+ previous_item_id: str
+ """The ID of the preceding item after which the new item will be inserted.
+
+ If not set, the new item will be appended to the end of the conversation. If
+ set, it allows an item to be inserted mid-conversation. If the ID cannot be
+ found, an error will be returned and the item will not be added.
+ """
src/openai/types/beta/realtime/conversation_item_created_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemCreatedEvent"]
+
+
+class ConversationItemCreatedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item: ConversationItem
+ """The item to add to the conversation."""
+
+ previous_item_id: str
+ """
+ The ID of the preceding item in the Conversation context, allows the client to
+ understand the order of the conversation.
+ """
+
+ type: Literal["conversation.item.created"]
+ """The event type, must be `conversation.item.created`."""
src/openai/types/beta/realtime/conversation_item_delete_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemDeleteEvent"]
+
+
+class ConversationItemDeleteEvent(BaseModel):
+ item_id: str
+ """The ID of the item to delete."""
+
+ type: Literal["conversation.item.delete"]
+ """The event type, must be `conversation.item.delete`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/beta/realtime/conversation_item_delete_event_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemDeleteEventParam"]
+
+
+class ConversationItemDeleteEventParam(TypedDict, total=False):
+ item_id: Required[str]
+ """The ID of the item to delete."""
+
+ type: Required[Literal["conversation.item.delete"]]
+ """The event type, must be `conversation.item.delete`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/beta/realtime/conversation_item_deleted_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemDeletedEvent"]
+
+
+class ConversationItemDeletedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item that was deleted."""
+
+ type: Literal["conversation.item.deleted"]
+ """The event type, must be `conversation.item.deleted`."""
src/openai/types/beta/realtime/conversation_item_input_audio_transcription_completed_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemInputAudioTranscriptionCompletedEvent"]
+
+
+class ConversationItemInputAudioTranscriptionCompletedEvent(BaseModel):
+ content_index: int
+ """The index of the content part containing the audio."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the user message item containing the audio."""
+
+ transcript: str
+ """The transcribed text."""
+
+ type: Literal["conversation.item.input_audio_transcription.completed"]
+ """
+ The event type, must be `conversation.item.input_audio_transcription.completed`.
+ """
src/openai/types/beta/realtime/conversation_item_input_audio_transcription_failed_event.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemInputAudioTranscriptionFailedEvent", "Error"]
+
+
+class Error(BaseModel):
+ code: Optional[str] = None
+ """Error code, if any."""
+
+ message: Optional[str] = None
+ """A human-readable error message."""
+
+ param: Optional[str] = None
+ """Parameter related to the error, if any."""
+
+ type: Optional[str] = None
+ """The type of error."""
+
+
+class ConversationItemInputAudioTranscriptionFailedEvent(BaseModel):
+ content_index: int
+ """The index of the content part containing the audio."""
+
+ error: Error
+ """Details of the transcription error."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the user message item."""
+
+ type: Literal["conversation.item.input_audio_transcription.failed"]
+ """The event type, must be `conversation.item.input_audio_transcription.failed`."""
src/openai/types/beta/realtime/conversation_item_param.py
@@ -0,0 +1,62 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, TypedDict
+
+from .conversation_item_content_param import ConversationItemContentParam
+
+__all__ = ["ConversationItemParam"]
+
+
+class ConversationItemParam(TypedDict, total=False):
+ id: str
+ """
+ The unique ID of the item, this can be generated by the client to help manage
+ server-side context, but is not required because the server will generate one if
+ not provided.
+ """
+
+ arguments: str
+ """The arguments of the function call (for `function_call` items)."""
+
+ call_id: str
+ """
+ The ID of the function call (for `function_call` and `function_call_output`
+ items). If passed on a `function_call_output` item, the server will check that a
+ `function_call` item with the same ID exists in the conversation history.
+ """
+
+ content: Iterable[ConversationItemContentParam]
+ """The content of the message, applicable for `message` items.
+
+ - Message items of role `system` support only `input_text` content
+ - Message items of role `user` support `input_text` and `input_audio` content
+ - Message items of role `assistant` support `text` content.
+ """
+
+ name: str
+ """The name of the function being called (for `function_call` items)."""
+
+ object: Literal["realtime.item"]
+ """Identifier for the API object being returned - always `realtime.item`."""
+
+ output: str
+ """The output of the function call (for `function_call_output` items)."""
+
+ role: Literal["user", "assistant", "system"]
+ """
+ The role of the message sender (`user`, `assistant`, `system`), only applicable
+ for `message` items.
+ """
+
+ status: Literal["completed", "incomplete"]
+ """The status of the item (`completed`, `incomplete`).
+
+ These have no effect on the conversation, but are accepted for consistency with
+ the `conversation.item.created` event.
+ """
+
+ type: Literal["message", "function_call", "function_call_output"]
+ """The type of the item (`message`, `function_call`, `function_call_output`)."""
src/openai/types/beta/realtime/conversation_item_truncate_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemTruncateEvent"]
+
+
+class ConversationItemTruncateEvent(BaseModel):
+ audio_end_ms: int
+ """Inclusive duration up to which audio is truncated, in milliseconds.
+
+ If the audio_end_ms is greater than the actual audio duration, the server will
+ respond with an error.
+ """
+
+ content_index: int
+ """The index of the content part to truncate. Set this to 0."""
+
+ item_id: str
+ """The ID of the assistant message item to truncate.
+
+ Only assistant message items can be truncated.
+ """
+
+ type: Literal["conversation.item.truncate"]
+ """The event type, must be `conversation.item.truncate`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/beta/realtime/conversation_item_truncate_event_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemTruncateEventParam"]
+
+
+class ConversationItemTruncateEventParam(TypedDict, total=False):
+ audio_end_ms: Required[int]
+ """Inclusive duration up to which audio is truncated, in milliseconds.
+
+ If the audio_end_ms is greater than the actual audio duration, the server will
+ respond with an error.
+ """
+
+ content_index: Required[int]
+ """The index of the content part to truncate. Set this to 0."""
+
+ item_id: Required[str]
+ """The ID of the assistant message item to truncate.
+
+ Only assistant message items can be truncated.
+ """
+
+ type: Required[Literal["conversation.item.truncate"]]
+ """The event type, must be `conversation.item.truncate`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/beta/realtime/conversation_item_truncated_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemTruncatedEvent"]
+
+
+class ConversationItemTruncatedEvent(BaseModel):
+ audio_end_ms: int
+ """The duration up to which the audio was truncated, in milliseconds."""
+
+ content_index: int
+ """The index of the content part that was truncated."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the assistant message item that was truncated."""
+
+ type: Literal["conversation.item.truncated"]
+ """The event type, must be `conversation.item.truncated`."""
src/openai/types/beta/realtime/error_event.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ErrorEvent", "Error"]
+
+
+class Error(BaseModel):
+ message: str
+ """A human-readable error message."""
+
+ type: str
+ """The type of error (e.g., "invalid_request_error", "server_error")."""
+
+ code: Optional[str] = None
+ """Error code, if any."""
+
+ event_id: Optional[str] = None
+ """The event_id of the client event that caused the error, if applicable."""
+
+ param: Optional[str] = None
+ """Parameter related to the error, if any."""
+
+
+class ErrorEvent(BaseModel):
+ error: Error
+ """Details of the error."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ type: Literal["error"]
+ """The event type, must be `error`."""
src/openai/types/beta/realtime/input_audio_buffer_append_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferAppendEvent"]
+
+
+class InputAudioBufferAppendEvent(BaseModel):
+ audio: str
+ """Base64-encoded audio bytes.
+
+ This must be in the format specified by the `input_audio_format` field in the
+ session configuration.
+ """
+
+ type: Literal["input_audio_buffer.append"]
+ """The event type, must be `input_audio_buffer.append`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/beta/realtime/input_audio_buffer_append_event_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferAppendEventParam"]
+
+
+class InputAudioBufferAppendEventParam(TypedDict, total=False):
+ audio: Required[str]
+ """Base64-encoded audio bytes.
+
+ This must be in the format specified by the `input_audio_format` field in the
+ session configuration.
+ """
+
+ type: Required[Literal["input_audio_buffer.append"]]
+ """The event type, must be `input_audio_buffer.append`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/beta/realtime/input_audio_buffer_clear_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferClearEvent"]
+
+
+class InputAudioBufferClearEvent(BaseModel):
+ type: Literal["input_audio_buffer.clear"]
+ """The event type, must be `input_audio_buffer.clear`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/beta/realtime/input_audio_buffer_clear_event_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferClearEventParam"]
+
+
+class InputAudioBufferClearEventParam(TypedDict, total=False):
+ type: Required[Literal["input_audio_buffer.clear"]]
+ """The event type, must be `input_audio_buffer.clear`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/beta/realtime/input_audio_buffer_cleared_event.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferClearedEvent"]
+
+
+class InputAudioBufferClearedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ type: Literal["input_audio_buffer.cleared"]
+ """The event type, must be `input_audio_buffer.cleared`."""
src/openai/types/beta/realtime/input_audio_buffer_commit_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferCommitEvent"]
+
+
+class InputAudioBufferCommitEvent(BaseModel):
+ type: Literal["input_audio_buffer.commit"]
+ """The event type, must be `input_audio_buffer.commit`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/beta/realtime/input_audio_buffer_commit_event_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferCommitEventParam"]
+
+
+class InputAudioBufferCommitEventParam(TypedDict, total=False):
+ type: Required[Literal["input_audio_buffer.commit"]]
+ """The event type, must be `input_audio_buffer.commit`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/beta/realtime/input_audio_buffer_committed_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferCommittedEvent"]
+
+
+class InputAudioBufferCommittedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the user message item that will be created."""
+
+ previous_item_id: str
+ """The ID of the preceding item after which the new item will be inserted."""
+
+ type: Literal["input_audio_buffer.committed"]
+ """The event type, must be `input_audio_buffer.committed`."""
src/openai/types/beta/realtime/input_audio_buffer_speech_started_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferSpeechStartedEvent"]
+
+
+class InputAudioBufferSpeechStartedEvent(BaseModel):
+ audio_start_ms: int
+ """
+ Milliseconds from the start of all audio written to the buffer during the
+ session when speech was first detected. This will correspond to the beginning of
+ audio sent to the model, and thus includes the `prefix_padding_ms` configured in
+ the Session.
+ """
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the user message item that will be created when speech stops."""
+
+ type: Literal["input_audio_buffer.speech_started"]
+ """The event type, must be `input_audio_buffer.speech_started`."""
src/openai/types/beta/realtime/input_audio_buffer_speech_stopped_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferSpeechStoppedEvent"]
+
+
+class InputAudioBufferSpeechStoppedEvent(BaseModel):
+ audio_end_ms: int
+ """Milliseconds since the session started when speech stopped.
+
+ This will correspond to the end of audio sent to the model, and thus includes
+ the `min_silence_duration_ms` configured in the Session.
+ """
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the user message item that will be created."""
+
+ type: Literal["input_audio_buffer.speech_stopped"]
+ """The event type, must be `input_audio_buffer.speech_stopped`."""
src/openai/types/beta/realtime/rate_limits_updated_event.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RateLimitsUpdatedEvent", "RateLimit"]
+
+
+class RateLimit(BaseModel):
+ limit: Optional[int] = None
+ """The maximum allowed value for the rate limit."""
+
+ name: Optional[Literal["requests", "tokens"]] = None
+ """The name of the rate limit (`requests`, `tokens`)."""
+
+ remaining: Optional[int] = None
+ """The remaining value before the limit is reached."""
+
+ reset_seconds: Optional[float] = None
+ """Seconds until the rate limit resets."""
+
+
+class RateLimitsUpdatedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ rate_limits: List[RateLimit]
+ """List of rate limit information."""
+
+ type: Literal["rate_limits.updated"]
+ """The event type, must be `rate_limits.updated`."""
src/openai/types/beta/realtime/realtime_client_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from .session_update_event import SessionUpdateEvent
+from .response_cancel_event import ResponseCancelEvent
+from .response_create_event import ResponseCreateEvent
+from .conversation_item_create_event import ConversationItemCreateEvent
+from .conversation_item_delete_event import ConversationItemDeleteEvent
+from .input_audio_buffer_clear_event import InputAudioBufferClearEvent
+from .input_audio_buffer_append_event import InputAudioBufferAppendEvent
+from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent
+from .conversation_item_truncate_event import ConversationItemTruncateEvent
+
+__all__ = ["RealtimeClientEvent"]
+
+RealtimeClientEvent: TypeAlias = Annotated[
+ Union[
+ SessionUpdateEvent,
+ InputAudioBufferAppendEvent,
+ InputAudioBufferCommitEvent,
+ InputAudioBufferClearEvent,
+ ConversationItemCreateEvent,
+ ConversationItemTruncateEvent,
+ ConversationItemDeleteEvent,
+ ResponseCreateEvent,
+ ResponseCancelEvent,
+ ],
+ PropertyInfo(discriminator="type"),
+]
src/openai/types/beta/realtime/realtime_client_event_param.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .session_update_event_param import SessionUpdateEventParam
+from .response_cancel_event_param import ResponseCancelEventParam
+from .response_create_event_param import ResponseCreateEventParam
+from .conversation_item_create_event_param import ConversationItemCreateEventParam
+from .conversation_item_delete_event_param import ConversationItemDeleteEventParam
+from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam
+from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam
+from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam
+from .conversation_item_truncate_event_param import ConversationItemTruncateEventParam
+
+__all__ = ["RealtimeClientEventParam"]
+
+RealtimeClientEventParam: TypeAlias = Union[
+ SessionUpdateEventParam,
+ InputAudioBufferAppendEventParam,
+ InputAudioBufferCommitEventParam,
+ InputAudioBufferClearEventParam,
+ ConversationItemCreateEventParam,
+ ConversationItemTruncateEventParam,
+ ConversationItemDeleteEventParam,
+ ResponseCreateEventParam,
+ ResponseCancelEventParam,
+]
src/openai/types/beta/realtime/realtime_connect_params.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["RealtimeConnectParams"]
+
+
+class RealtimeConnectParams(TypedDict, total=False):
+ model: Required[str]
src/openai/types/beta/realtime/realtime_response.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+from .realtime_response_usage import RealtimeResponseUsage
+from .realtime_response_status import RealtimeResponseStatus
+
+__all__ = ["RealtimeResponse"]
+
+
+class RealtimeResponse(BaseModel):
+ id: Optional[str] = None
+ """The unique ID of the response."""
+
+ metadata: Optional[object] = None
+ """Developer-provided string key-value pairs associated with this response."""
+
+ object: Optional[Literal["realtime.response"]] = None
+ """The object type, must be `realtime.response`."""
+
+ output: Optional[List[ConversationItem]] = None
+ """The list of output items generated by the response."""
+
+ status: Optional[Literal["completed", "cancelled", "failed", "incomplete"]] = None
+ """
+ The final status of the response (`completed`, `cancelled`, `failed`, or
+ `incomplete`).
+ """
+
+ status_details: Optional[RealtimeResponseStatus] = None
+ """Additional details about the status."""
+
+ usage: Optional[RealtimeResponseUsage] = None
+ """Usage statistics for the Response, this will correspond to billing.
+
+ A Realtime API session will maintain a conversation context and append new Items
+ to the Conversation, thus output from previous turns (text and audio tokens)
+ will become the input for later turns.
+ """
src/openai/types/beta/realtime/realtime_response_status.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RealtimeResponseStatus", "Error"]
+
+
+class Error(BaseModel):
+ code: Optional[str] = None
+ """Error code, if any."""
+
+ type: Optional[str] = None
+ """The type of error."""
+
+
+class RealtimeResponseStatus(BaseModel):
+ error: Optional[Error] = None
+ """
+ A description of the error that caused the response to fail, populated when the
+ `status` is `failed`.
+ """
+
+ reason: Optional[Literal["turn_detected", "client_cancelled", "max_output_tokens", "content_filter"]] = None
+ """The reason the Response did not complete.
+
+ For a `cancelled` Response, one of `turn_detected` (the server VAD detected a
+ new start of speech) or `client_cancelled` (the client sent a cancel event). For
+ an `incomplete` Response, one of `max_output_tokens` or `content_filter` (the
+ server-side safety filter activated and cut off the response).
+ """
+
+ type: Optional[Literal["completed", "cancelled", "incomplete", "failed"]] = None
+ """
+ The type of error that caused the response to fail, corresponding with the
+ `status` field (`completed`, `cancelled`, `incomplete`, `failed`).
+ """
src/openai/types/beta/realtime/realtime_response_usage.py
@@ -0,0 +1,52 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ...._models import BaseModel
+
+__all__ = ["RealtimeResponseUsage", "InputTokenDetails", "OutputTokenDetails"]
+
+
+class InputTokenDetails(BaseModel):
+ audio_tokens: Optional[int] = None
+ """The number of audio tokens used in the Response."""
+
+ cached_tokens: Optional[int] = None
+ """The number of cached tokens used in the Response."""
+
+ text_tokens: Optional[int] = None
+ """The number of text tokens used in the Response."""
+
+
+class OutputTokenDetails(BaseModel):
+ audio_tokens: Optional[int] = None
+ """The number of audio tokens used in the Response."""
+
+ text_tokens: Optional[int] = None
+ """The number of text tokens used in the Response."""
+
+
+class RealtimeResponseUsage(BaseModel):
+ input_token_details: Optional[InputTokenDetails] = None
+ """Details about the input tokens used in the Response."""
+
+ input_tokens: Optional[int] = None
+ """
+ The number of input tokens used in the Response, including text and audio
+ tokens.
+ """
+
+ output_token_details: Optional[OutputTokenDetails] = None
+ """Details about the output tokens used in the Response."""
+
+ output_tokens: Optional[int] = None
+ """
+ The number of output tokens sent in the Response, including text and audio
+ tokens.
+ """
+
+ total_tokens: Optional[int] = None
+ """
+ The total number of tokens in the Response including input and output text and
+ audio tokens.
+ """
src/openai/types/beta/realtime/realtime_server_event.py
@@ -0,0 +1,72 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from .error_event import ErrorEvent
+from .response_done_event import ResponseDoneEvent
+from .session_created_event import SessionCreatedEvent
+from .session_updated_event import SessionUpdatedEvent
+from .response_created_event import ResponseCreatedEvent
+from .response_text_done_event import ResponseTextDoneEvent
+from .rate_limits_updated_event import RateLimitsUpdatedEvent
+from .response_audio_done_event import ResponseAudioDoneEvent
+from .response_text_delta_event import ResponseTextDeltaEvent
+from .conversation_created_event import ConversationCreatedEvent
+from .response_audio_delta_event import ResponseAudioDeltaEvent
+from .conversation_item_created_event import ConversationItemCreatedEvent
+from .conversation_item_deleted_event import ConversationItemDeletedEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent
+from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent
+from .response_output_item_added_event import ResponseOutputItemAddedEvent
+from .conversation_item_truncated_event import ConversationItemTruncatedEvent
+from .response_content_part_added_event import ResponseContentPartAddedEvent
+from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent
+from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent
+from .input_audio_buffer_speech_started_event import InputAudioBufferSpeechStartedEvent
+from .input_audio_buffer_speech_stopped_event import InputAudioBufferSpeechStoppedEvent
+from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
+from .conversation_item_input_audio_transcription_failed_event import ConversationItemInputAudioTranscriptionFailedEvent
+from .conversation_item_input_audio_transcription_completed_event import (
+ ConversationItemInputAudioTranscriptionCompletedEvent,
+)
+
+__all__ = ["RealtimeServerEvent"]
+
+RealtimeServerEvent: TypeAlias = Annotated[
+ Union[
+ ErrorEvent,
+ SessionCreatedEvent,
+ SessionUpdatedEvent,
+ ConversationCreatedEvent,
+ InputAudioBufferCommittedEvent,
+ InputAudioBufferClearedEvent,
+ InputAudioBufferSpeechStartedEvent,
+ InputAudioBufferSpeechStoppedEvent,
+ ConversationItemCreatedEvent,
+ ConversationItemInputAudioTranscriptionCompletedEvent,
+ ConversationItemInputAudioTranscriptionFailedEvent,
+ ConversationItemTruncatedEvent,
+ ConversationItemDeletedEvent,
+ ResponseCreatedEvent,
+ ResponseDoneEvent,
+ ResponseOutputItemAddedEvent,
+ ResponseOutputItemDoneEvent,
+ ResponseContentPartAddedEvent,
+ ResponseContentPartDoneEvent,
+ ResponseTextDeltaEvent,
+ ResponseTextDoneEvent,
+ ResponseAudioTranscriptDeltaEvent,
+ ResponseAudioTranscriptDoneEvent,
+ ResponseAudioDeltaEvent,
+ ResponseAudioDoneEvent,
+ ResponseFunctionCallArgumentsDeltaEvent,
+ ResponseFunctionCallArgumentsDoneEvent,
+ RateLimitsUpdatedEvent,
+ ],
+ PropertyInfo(discriminator="type"),
+]
src/openai/types/beta/realtime/response_audio_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioDeltaEvent"]
+
+
+class ResponseAudioDeltaEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ delta: str
+ """Base64-encoded audio data delta."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.audio.delta"]
+ """The event type, must be `response.audio.delta`."""
src/openai/types/beta/realtime/response_audio_done_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioDoneEvent"]
+
+
+class ResponseAudioDoneEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.audio.done"]
+ """The event type, must be `response.audio.done`."""
src/openai/types/beta/realtime/response_audio_transcript_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDeltaEvent"]
+
+
+class ResponseAudioTranscriptDeltaEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ delta: str
+ """The transcript delta."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.audio_transcript.delta"]
+ """The event type, must be `response.audio_transcript.delta`."""
src/openai/types/beta/realtime/response_audio_transcript_done_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDoneEvent"]
+
+
+class ResponseAudioTranscriptDoneEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ transcript: str
+ """The final transcript of the audio."""
+
+ type: Literal["response.audio_transcript.done"]
+ """The event type, must be `response.audio_transcript.done`."""
src/openai/types/beta/realtime/response_cancel_event.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseCancelEvent"]
+
+
+class ResponseCancelEvent(BaseModel):
+ type: Literal["response.cancel"]
+ """The event type, must be `response.cancel`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
+
+ response_id: Optional[str] = None
+ """
+ A specific response ID to cancel - if not provided, will cancel an in-progress
+ response in the default conversation.
+ """
src/openai/types/beta/realtime/response_cancel_event_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseCancelEventParam"]
+
+
+class ResponseCancelEventParam(TypedDict, total=False):
+ type: Required[Literal["response.cancel"]]
+ """The event type, must be `response.cancel`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
+
+ response_id: str
+ """
+ A specific response ID to cancel - if not provided, will cancel an in-progress
+ response in the default conversation.
+ """
src/openai/types/beta/realtime/response_content_part_added_event.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseContentPartAddedEvent", "Part"]
+
+
+class Part(BaseModel):
+ audio: Optional[str] = None
+ """Base64-encoded audio data (if type is "audio")."""
+
+ text: Optional[str] = None
+ """The text content (if type is "text")."""
+
+ transcript: Optional[str] = None
+ """The transcript of the audio (if type is "audio")."""
+
+ type: Optional[Literal["text", "audio"]] = None
+ """The content type ("text", "audio")."""
+
+
+class ResponseContentPartAddedEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item to which the content part was added."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ part: Part
+ """The content part that was added."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.content_part.added"]
+ """The event type, must be `response.content_part.added`."""
src/openai/types/beta/realtime/response_content_part_done_event.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseContentPartDoneEvent", "Part"]
+
+
+class Part(BaseModel):
+ audio: Optional[str] = None
+ """Base64-encoded audio data (if type is "audio")."""
+
+ text: Optional[str] = None
+ """The text content (if type is "text")."""
+
+ transcript: Optional[str] = None
+ """The transcript of the audio (if type is "audio")."""
+
+ type: Optional[Literal["text", "audio"]] = None
+ """The content type ("text", "audio")."""
+
+
+class ResponseContentPartDoneEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ part: Part
+ """The content part that is done."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.content_part.done"]
+ """The event type, must be `response.content_part.done`."""
src/openai/types/beta/realtime/response_create_event.py
@@ -0,0 +1,115 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseCreateEvent", "Response", "ResponseTool"]
+
+
+class ResponseTool(BaseModel):
+ description: Optional[str] = None
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: Optional[str] = None
+ """The name of the function."""
+
+ parameters: Optional[object] = None
+ """Parameters of the function in JSON Schema."""
+
+ type: Optional[Literal["function"]] = None
+ """The type of the tool, i.e. `function`."""
+
+
+class Response(BaseModel):
+ conversation: Union[str, Literal["auto", "none"], None] = None
+ """Controls which conversation the response is added to.
+
+ Currently supports `auto` and `none`, with `auto` as the default value. The
+ `auto` value means that the contents of the response will be added to the
+ default conversation. Set this to `none` to create an out-of-band response which
+ will not add items to default conversation.
+ """
+
+ input: Optional[List[ConversationItem]] = None
+ """Input items to include in the prompt for the model.
+
+ Creates a new context for this response, without including the default
+ conversation. Can include references to items from the default conversation.
+ """
+
+ instructions: Optional[str] = None
+ """The default system instructions (i.e.
+
+ system message) prepended to model calls. This field allows the client to guide
+ the model on desired responses. The model can be instructed on response content
+ and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+ good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+ into your voice", "laugh frequently"). The instructions are not guaranteed to be
+ followed by the model, but they provide guidance to the model on the desired
+ behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+ """
+
+ max_response_output_tokens: Union[int, Literal["inf"], None] = None
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+ """
+
+ metadata: Optional[object] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format. Keys can be a maximum of 64 characters long and values can be
+ a maximum of 512 characters long.
+ """
+
+ modalities: Optional[List[Literal["text", "audio"]]] = None
+ """The set of modalities the model can respond with.
+
+ To disable audio, set this to ["text"].
+ """
+
+ output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+ """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ temperature: Optional[float] = None
+ """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+ tool_choice: Optional[str] = None
+ """How the model chooses tools.
+
+ Options are `auto`, `none`, `required`, or specify a function.
+ """
+
+ tools: Optional[List[ResponseTool]] = None
+ """Tools (functions) available to the model."""
+
+ voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+ """The voice the model uses to respond.
+
+ Voice cannot be changed during the session once the model has responded with
+ audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+ `coral`, `echo` `sage`, `shimmer` and `verse`.
+ """
+
+
+class ResponseCreateEvent(BaseModel):
+ type: Literal["response.create"]
+ """The event type, must be `response.create`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
+
+ response: Optional[Response] = None
+ """Create a new Realtime response with these parameters"""
src/openai/types/beta/realtime/response_create_event_param.py
@@ -0,0 +1,116 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .conversation_item_param import ConversationItemParam
+
+__all__ = ["ResponseCreateEventParam", "Response", "ResponseTool"]
+
+
+class ResponseTool(TypedDict, total=False):
+ description: str
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: str
+ """The name of the function."""
+
+ parameters: object
+ """Parameters of the function in JSON Schema."""
+
+ type: Literal["function"]
+ """The type of the tool, i.e. `function`."""
+
+
+class Response(TypedDict, total=False):
+ conversation: Union[str, Literal["auto", "none"]]
+ """Controls which conversation the response is added to.
+
+ Currently supports `auto` and `none`, with `auto` as the default value. The
+ `auto` value means that the contents of the response will be added to the
+ default conversation. Set this to `none` to create an out-of-band response which
+ will not add items to default conversation.
+ """
+
+ input: Iterable[ConversationItemParam]
+ """Input items to include in the prompt for the model.
+
+ Creates a new context for this response, without including the default
+ conversation. Can include references to items from the default conversation.
+ """
+
+ instructions: str
+ """The default system instructions (i.e.
+
+ system message) prepended to model calls. This field allows the client to guide
+ the model on desired responses. The model can be instructed on response content
+ and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+ good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+ into your voice", "laugh frequently"). The instructions are not guaranteed to be
+ followed by the model, but they provide guidance to the model on the desired
+ behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+ """
+
+ max_response_output_tokens: Union[int, Literal["inf"]]
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+ """
+
+ metadata: Optional[object]
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format. Keys can be a maximum of 64 characters long and values can be
+ a maximum of 512 characters long.
+ """
+
+ modalities: List[Literal["text", "audio"]]
+ """The set of modalities the model can respond with.
+
+ To disable audio, set this to ["text"].
+ """
+
+ output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+ """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ temperature: float
+ """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+ tool_choice: str
+ """How the model chooses tools.
+
+ Options are `auto`, `none`, `required`, or specify a function.
+ """
+
+ tools: Iterable[ResponseTool]
+ """Tools (functions) available to the model."""
+
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]
+ """The voice the model uses to respond.
+
+ Voice cannot be changed during the session once the model has responded with
+ audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+ `coral`, `echo` `sage`, `shimmer` and `verse`.
+ """
+
+
+class ResponseCreateEventParam(TypedDict, total=False):
+ type: Required[Literal["response.create"]]
+ """The event type, must be `response.create`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
+
+ response: Response
+ """Create a new Realtime response with these parameters"""
src/openai/types/beta/realtime/response_created_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .realtime_response import RealtimeResponse
+
+__all__ = ["ResponseCreatedEvent"]
+
+
+class ResponseCreatedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ response: RealtimeResponse
+ """The response resource."""
+
+ type: Literal["response.created"]
+ """The event type, must be `response.created`."""
src/openai/types/beta/realtime/response_done_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .realtime_response import RealtimeResponse
+
+__all__ = ["ResponseDoneEvent"]
+
+
+class ResponseDoneEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ response: RealtimeResponse
+ """The response resource."""
+
+ type: Literal["response.done"]
+ """The event type, must be `response.done`."""
src/openai/types/beta/realtime/response_function_call_arguments_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDeltaEvent"]
+
+
+class ResponseFunctionCallArgumentsDeltaEvent(BaseModel):
+ call_id: str
+ """The ID of the function call."""
+
+ delta: str
+ """The arguments delta as a JSON string."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the function call item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.function_call_arguments.delta"]
+ """The event type, must be `response.function_call_arguments.delta`."""
src/openai/types/beta/realtime/response_function_call_arguments_done_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDoneEvent"]
+
+
+class ResponseFunctionCallArgumentsDoneEvent(BaseModel):
+ arguments: str
+ """The final arguments as a JSON string."""
+
+ call_id: str
+ """The ID of the function call."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the function call item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.function_call_arguments.done"]
+ """The event type, must be `response.function_call_arguments.done`."""
src/openai/types/beta/realtime/response_output_item_added_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseOutputItemAddedEvent"]
+
+
+class ResponseOutputItemAddedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item: ConversationItem
+ """The item to add to the conversation."""
+
+ output_index: int
+ """The index of the output item in the Response."""
+
+ response_id: str
+ """The ID of the Response to which the item belongs."""
+
+ type: Literal["response.output_item.added"]
+ """The event type, must be `response.output_item.added`."""
src/openai/types/beta/realtime/response_output_item_done_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseOutputItemDoneEvent"]
+
+
+class ResponseOutputItemDoneEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ item: ConversationItem
+ """The item to add to the conversation."""
+
+ output_index: int
+ """The index of the output item in the Response."""
+
+ response_id: str
+ """The ID of the Response to which the item belongs."""
+
+ type: Literal["response.output_item.done"]
+ """The event type, must be `response.output_item.done`."""
src/openai/types/beta/realtime/response_text_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseTextDeltaEvent"]
+
+
+class ResponseTextDeltaEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ delta: str
+ """The text delta."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ type: Literal["response.text.delta"]
+ """The event type, must be `response.text.delta`."""
src/openai/types/beta/realtime/response_text_done_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseTextDoneEvent"]
+
+
+class ResponseTextDoneEvent(BaseModel):
+ content_index: int
+ """The index of the content part in the item's content array."""
+
+ event_id: str
+ """The unique ID of the server event."""
+
+ item_id: str
+ """The ID of the item."""
+
+ output_index: int
+ """The index of the output item in the response."""
+
+ response_id: str
+ """The ID of the response."""
+
+ text: str
+ """The final text content."""
+
+ type: Literal["response.text.done"]
+ """The event type, must be `response.text.done`."""
src/openai/types/beta/realtime/session.py
@@ -0,0 +1,148 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["Session", "InputAudioTranscription", "Tool", "TurnDetection"]
+
+
+class InputAudioTranscription(BaseModel):
+ model: Optional[str] = None
+ """
+ The model to use for transcription, `whisper-1` is the only currently supported
+ model.
+ """
+
+
+class Tool(BaseModel):
+ description: Optional[str] = None
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: Optional[str] = None
+ """The name of the function."""
+
+ parameters: Optional[object] = None
+ """Parameters of the function in JSON Schema."""
+
+ type: Optional[Literal["function"]] = None
+ """The type of the tool, i.e. `function`."""
+
+
+class TurnDetection(BaseModel):
+ prefix_padding_ms: Optional[int] = None
+ """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+ Defaults to 300ms.
+ """
+
+ silence_duration_ms: Optional[int] = None
+ """Duration of silence to detect speech stop (in milliseconds).
+
+ Defaults to 500ms. With shorter values the model will respond more quickly, but
+ may jump in on short pauses from the user.
+ """
+
+ threshold: Optional[float] = None
+ """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+ A higher threshold will require louder audio to activate the model, and thus
+ might perform better in noisy environments.
+ """
+
+ type: Optional[Literal["server_vad"]] = None
+ """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class Session(BaseModel):
+ id: Optional[str] = None
+ """Unique identifier for the session object."""
+
+ input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+ """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ input_audio_transcription: Optional[InputAudioTranscription] = None
+ """
+ Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through Whisper and should be treated as rough guidance rather
+ than the representation understood by the model.
+ """
+
+ instructions: Optional[str] = None
+ """The default system instructions (i.e.
+
+ system message) prepended to model calls. This field allows the client to guide
+ the model on desired responses. The model can be instructed on response content
+ and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+ good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+ into your voice", "laugh frequently"). The instructions are not guaranteed to be
+ followed by the model, but they provide guidance to the model on the desired
+ behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+ """
+
+ max_response_output_tokens: Union[int, Literal["inf"], None] = None
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+ """
+
+ modalities: Optional[List[Literal["text", "audio"]]] = None
+ """The set of modalities the model can respond with.
+
+ To disable audio, set this to ["text"].
+ """
+
+ model: Union[
+ str,
+ Literal[
+ "gpt-4o-realtime-preview",
+ "gpt-4o-realtime-preview-2024-10-01",
+ "gpt-4o-realtime-preview-2024-12-17",
+ "gpt-4o-mini-realtime-preview",
+ "gpt-4o-mini-realtime-preview-2024-12-17",
+ ],
+ None,
+ ] = None
+ """The Realtime model used for this session."""
+
+ output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+ """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ temperature: Optional[float] = None
+ """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+ tool_choice: Optional[str] = None
+ """How the model chooses tools.
+
+ Options are `auto`, `none`, `required`, or specify a function.
+ """
+
+ tools: Optional[List[Tool]] = None
+ """Tools (functions) available to the model."""
+
+ turn_detection: Optional[TurnDetection] = None
+ """Configuration for turn detection.
+
+ Can be set to `null` to turn off. Server VAD means that the model will detect
+ the start and end of speech based on audio volume and respond at the end of user
+ speech.
+ """
+
+ voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+ """The voice the model uses to respond.
+
+ Voice cannot be changed during the session once the model has responded with
+ audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+ `coral`, `echo` `sage`, `shimmer` and `verse`.
+ """
src/openai/types/beta/realtime/session_created_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .session import Session
+from ...._models import BaseModel
+
+__all__ = ["SessionCreatedEvent"]
+
+
+class SessionCreatedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ session: Session
+ """Realtime session object configuration."""
+
+ type: Literal["session.created"]
+ """The event type, must be `session.created`."""
src/openai/types/beta/realtime/session_update_event.py
@@ -0,0 +1,158 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["SessionUpdateEvent", "Session", "SessionInputAudioTranscription", "SessionTool", "SessionTurnDetection"]
+
+
+class SessionInputAudioTranscription(BaseModel):
+ model: Optional[str] = None
+ """
+ The model to use for transcription, `whisper-1` is the only currently supported
+ model.
+ """
+
+
+class SessionTool(BaseModel):
+ description: Optional[str] = None
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: Optional[str] = None
+ """The name of the function."""
+
+ parameters: Optional[object] = None
+ """Parameters of the function in JSON Schema."""
+
+ type: Optional[Literal["function"]] = None
+ """The type of the tool, i.e. `function`."""
+
+
+class SessionTurnDetection(BaseModel):
+ create_response: Optional[bool] = None
+ """Whether or not to automatically generate a response when VAD is enabled.
+
+ `true` by default.
+ """
+
+ prefix_padding_ms: Optional[int] = None
+ """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+ Defaults to 300ms.
+ """
+
+ silence_duration_ms: Optional[int] = None
+ """Duration of silence to detect speech stop (in milliseconds).
+
+ Defaults to 500ms. With shorter values the model will respond more quickly, but
+ may jump in on short pauses from the user.
+ """
+
+ threshold: Optional[float] = None
+ """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+ A higher threshold will require louder audio to activate the model, and thus
+ might perform better in noisy environments.
+ """
+
+ type: Optional[str] = None
+ """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class Session(BaseModel):
+ model: Literal[
+ "gpt-4o-realtime-preview",
+ "gpt-4o-realtime-preview-2024-10-01",
+ "gpt-4o-realtime-preview-2024-12-17",
+ "gpt-4o-mini-realtime-preview",
+ "gpt-4o-mini-realtime-preview-2024-12-17",
+ ]
+ """The Realtime model used for this session."""
+
+ input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+ """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ input_audio_transcription: Optional[SessionInputAudioTranscription] = None
+ """
+ Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through Whisper and should be treated as rough guidance rather
+ than the representation understood by the model.
+ """
+
+ instructions: Optional[str] = None
+ """The default system instructions (i.e.
+
+ system message) prepended to model calls. This field allows the client to guide
+ the model on desired responses. The model can be instructed on response content
+ and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+ good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+ into your voice", "laugh frequently"). The instructions are not guaranteed to be
+ followed by the model, but they provide guidance to the model on the desired
+ behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+ """
+
+ max_response_output_tokens: Union[int, Literal["inf"], None] = None
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+ """
+
+ modalities: Optional[List[Literal["text", "audio"]]] = None
+ """The set of modalities the model can respond with.
+
+ To disable audio, set this to ["text"].
+ """
+
+ output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+ """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ temperature: Optional[float] = None
+ """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+ tool_choice: Optional[str] = None
+ """How the model chooses tools.
+
+ Options are `auto`, `none`, `required`, or specify a function.
+ """
+
+ tools: Optional[List[SessionTool]] = None
+ """Tools (functions) available to the model."""
+
+ turn_detection: Optional[SessionTurnDetection] = None
+ """Configuration for turn detection.
+
+ Can be set to `null` to turn off. Server VAD means that the model will detect
+ the start and end of speech based on audio volume and respond at the end of user
+ speech.
+ """
+
+ voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+ """The voice the model uses to respond.
+
+ Voice cannot be changed during the session once the model has responded with
+ audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+ `coral`, `echo` `sage`, `shimmer` and `verse`.
+ """
+
+
+class SessionUpdateEvent(BaseModel):
+ session: Session
+ """Realtime session object configuration."""
+
+ type: Literal["session.update"]
+ """The event type, must be `session.update`."""
+
+ event_id: Optional[str] = None
+ """Optional client-generated ID used to identify this event."""
src/openai/types/beta/realtime/session_update_event_param.py
@@ -0,0 +1,166 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = [
+ "SessionUpdateEventParam",
+ "Session",
+ "SessionInputAudioTranscription",
+ "SessionTool",
+ "SessionTurnDetection",
+]
+
+
+class SessionInputAudioTranscription(TypedDict, total=False):
+ model: str
+ """
+ The model to use for transcription, `whisper-1` is the only currently supported
+ model.
+ """
+
+
+class SessionTool(TypedDict, total=False):
+ description: str
+ """
+ The description of the function, including guidance on when and how to call it,
+ and guidance about what to tell the user when calling (if anything).
+ """
+
+ name: str
+ """The name of the function."""
+
+ parameters: object
+ """Parameters of the function in JSON Schema."""
+
+ type: Literal["function"]
+ """The type of the tool, i.e. `function`."""
+
+
+class SessionTurnDetection(TypedDict, total=False):
+ create_response: bool
+ """Whether or not to automatically generate a response when VAD is enabled.
+
+ `true` by default.
+ """
+
+ prefix_padding_ms: int
+ """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+ Defaults to 300ms.
+ """
+
+ silence_duration_ms: int
+ """Duration of silence to detect speech stop (in milliseconds).
+
+ Defaults to 500ms. With shorter values the model will respond more quickly, but
+ may jump in on short pauses from the user.
+ """
+
+ threshold: float
+ """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+ A higher threshold will require louder audio to activate the model, and thus
+ might perform better in noisy environments.
+ """
+
+ type: str
+ """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class Session(TypedDict, total=False):
+ model: Required[
+ Literal[
+ "gpt-4o-realtime-preview",
+ "gpt-4o-realtime-preview-2024-10-01",
+ "gpt-4o-realtime-preview-2024-12-17",
+ "gpt-4o-mini-realtime-preview",
+ "gpt-4o-mini-realtime-preview-2024-12-17",
+ ]
+ ]
+ """The Realtime model used for this session."""
+
+ input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+ """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ input_audio_transcription: SessionInputAudioTranscription
+ """
+ Configuration for input audio transcription, defaults to off and can be set to
+ `null` to turn off once on. Input audio transcription is not native to the
+ model, since the model consumes audio directly. Transcription runs
+ asynchronously through Whisper and should be treated as rough guidance rather
+ than the representation understood by the model.
+ """
+
+ instructions: str
+ """The default system instructions (i.e.
+
+ system message) prepended to model calls. This field allows the client to guide
+ the model on desired responses. The model can be instructed on response content
+ and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+ good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+ into your voice", "laugh frequently"). The instructions are not guaranteed to be
+ followed by the model, but they provide guidance to the model on the desired
+ behavior.
+
+ Note that the server sets default instructions which will be used if this field
+ is not set and are visible in the `session.created` event at the start of the
+ session.
+ """
+
+ max_response_output_tokens: Union[int, Literal["inf"]]
+ """
+ Maximum number of output tokens for a single assistant response, inclusive of
+ tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+ `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+ """
+
+ modalities: List[Literal["text", "audio"]]
+ """The set of modalities the model can respond with.
+
+ To disable audio, set this to ["text"].
+ """
+
+ output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+ """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+ temperature: float
+ """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+ tool_choice: str
+ """How the model chooses tools.
+
+ Options are `auto`, `none`, `required`, or specify a function.
+ """
+
+ tools: Iterable[SessionTool]
+ """Tools (functions) available to the model."""
+
+ turn_detection: SessionTurnDetection
+ """Configuration for turn detection.
+
+ Can be set to `null` to turn off. Server VAD means that the model will detect
+ the start and end of speech based on audio volume and respond at the end of user
+ speech.
+ """
+
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]
+ """The voice the model uses to respond.
+
+ Voice cannot be changed during the session once the model has responded with
+ audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+ `coral`, `echo` `sage`, `shimmer` and `verse`.
+ """
+
+
+class SessionUpdateEventParam(TypedDict, total=False):
+ session: Required[Session]
+ """Realtime session object configuration."""
+
+ type: Required[Literal["session.update"]]
+ """The event type, must be `session.update`."""
+
+ event_id: str
+ """Optional client-generated ID used to identify this event."""
src/openai/types/beta/realtime/session_updated_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .session import Session
+from ...._models import BaseModel
+
+__all__ = ["SessionUpdatedEvent"]
+
+
+class SessionUpdatedEvent(BaseModel):
+ event_id: str
+ """The unique ID of the server event."""
+
+ session: Session
+ """Realtime session object configuration."""
+
+ type: Literal["session.updated"]
+ """The event type, must be `session.updated`."""
src/openai/types/__init__.py
@@ -47,6 +47,7 @@ from .moderation_create_params import ModerationCreateParams as ModerationCreate
from .create_embedding_response import CreateEmbeddingResponse as CreateEmbeddingResponse
from .moderation_create_response import ModerationCreateResponse as ModerationCreateResponse
from .moderation_text_input_param import ModerationTextInputParam as ModerationTextInputParam
+from .websocket_connection_options import WebsocketConnectionOptions as WebsocketConnectionOptions
from .image_create_variation_params import ImageCreateVariationParams as ImageCreateVariationParams
from .moderation_image_url_input_param import ModerationImageURLInputParam as ModerationImageURLInputParam
from .moderation_multi_modal_input_param import ModerationMultiModalInputParam as ModerationMultiModalInputParam
src/openai/types/websocket_connection_options.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing_extensions import Sequence, TypedDict
+
+if TYPE_CHECKING:
+ from websockets import Subprotocol
+ from websockets.extensions import ClientExtensionFactory
+
+
+class WebsocketConnectionOptions(TypedDict, total=False):
+ """Websocket connection options copied from `websockets`.
+
+ For example: https://websockets.readthedocs.io/en/stable/reference/asyncio/client.html#websockets.asyncio.client.connect
+ """
+
+ extensions: Sequence[ClientExtensionFactory] | None
+ """List of supported extensions, in order in which they should be negotiated and run."""
+
+ subprotocols: Sequence[Subprotocol] | None
+ """List of supported subprotocols, in order of decreasing preference."""
+
+ compression: str | None
+ """The “permessage-deflate” extension is enabled by default. Set compression to None to disable it. See the [compression guide](https://websockets.readthedocs.io/en/stable/topics/compression.html) for details."""
+
+ # limits
+ max_size: int | None
+ """Maximum size of incoming messages in bytes. None disables the limit."""
+
+ max_queue: int | None | tuple[int | None, int | None]
+ """High-water mark of the buffer where frames are received. It defaults to 16 frames. The low-water mark defaults to max_queue // 4. You may pass a (high, low) tuple to set the high-water and low-water marks. If you want to disable flow control entirely, you may set it to None, although that’s a bad idea."""
+
+ write_limit: int | tuple[int, int | None]
+ """High-water mark of write buffer in bytes. It is passed to set_write_buffer_limits(). It defaults to 32 KiB. You may pass a (high, low) tuple to set the high-water and low-water marks."""
src/openai/_client.py
@@ -63,6 +63,14 @@ class OpenAI(SyncAPIClient):
organization: str | None
project: str | None
+ websocket_base_url: str | httpx.URL | None
+ """Base URL for WebSocket connections.
+
+ If not specified, the default base URL will be used, with 'wss://' replacing the
+ 'http://' or 'https://' scheme. For example: 'http://example.com' becomes
+ 'wss://example.com'
+ """
+
def __init__(
self,
*,
@@ -70,6 +78,7 @@ class OpenAI(SyncAPIClient):
organization: str | None = None,
project: str | None = None,
base_url: str | httpx.URL | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
@@ -111,6 +120,8 @@ class OpenAI(SyncAPIClient):
project = os.environ.get("OPENAI_PROJECT_ID")
self.project = project
+ self.websocket_base_url = websocket_base_url
+
if base_url is None:
base_url = os.environ.get("OPENAI_BASE_URL")
if base_url is None:
@@ -172,6 +183,7 @@ class OpenAI(SyncAPIClient):
api_key: str | None = None,
organization: str | None = None,
project: str | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
http_client: httpx.Client | None = None,
@@ -208,6 +220,7 @@ class OpenAI(SyncAPIClient):
api_key=api_key or self.api_key,
organization=organization or self.organization,
project=project or self.project,
+ websocket_base_url=websocket_base_url or self.websocket_base_url,
base_url=base_url or self.base_url,
timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
http_client=http_client,
@@ -277,6 +290,14 @@ class AsyncOpenAI(AsyncAPIClient):
organization: str | None
project: str | None
+ websocket_base_url: str | httpx.URL | None
+ """Base URL for WebSocket connections.
+
+ If not specified, the default base URL will be used, with 'wss://' replacing the
+ 'http://' or 'https://' scheme. For example: 'http://example.com' becomes
+ 'wss://example.com'
+ """
+
def __init__(
self,
*,
@@ -284,6 +305,7 @@ class AsyncOpenAI(AsyncAPIClient):
organization: str | None = None,
project: str | None = None,
base_url: str | httpx.URL | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
@@ -325,6 +347,8 @@ class AsyncOpenAI(AsyncAPIClient):
project = os.environ.get("OPENAI_PROJECT_ID")
self.project = project
+ self.websocket_base_url = websocket_base_url
+
if base_url is None:
base_url = os.environ.get("OPENAI_BASE_URL")
if base_url is None:
@@ -386,6 +410,7 @@ class AsyncOpenAI(AsyncAPIClient):
api_key: str | None = None,
organization: str | None = None,
project: str | None = None,
+ websocket_base_url: str | httpx.URL | None = None,
base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
http_client: httpx.AsyncClient | None = None,
@@ -422,6 +447,7 @@ class AsyncOpenAI(AsyncAPIClient):
api_key=api_key or self.api_key,
organization=organization or self.organization,
project=project or self.project,
+ websocket_base_url=websocket_base_url or self.websocket_base_url,
base_url=base_url or self.base_url,
timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
http_client=http_client,
tests/api_resources/beta/test_realtime.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+
+import pytest
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestRealtime:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+
+class TestAsyncRealtime:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
.stats.yml
@@ -1,2 +1,2 @@
configured_endpoints: 69
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-779ea2754025daf5e18eb8ceb203ec321692636bc3a999338556a479178efa6c.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-0d64ca9e45f51b4279f87b205eeb3a3576df98407698ce053f2e2302c1c08df1.yml
api.md
@@ -239,6 +239,57 @@ Methods:
## Realtime
+Types:
+
+```python
+from openai.types.beta.realtime import (
+ ConversationCreatedEvent,
+ ConversationItem,
+ ConversationItemContent,
+ ConversationItemCreateEvent,
+ ConversationItemCreatedEvent,
+ ConversationItemDeleteEvent,
+ ConversationItemDeletedEvent,
+ ConversationItemInputAudioTranscriptionCompletedEvent,
+ ConversationItemInputAudioTranscriptionFailedEvent,
+ ConversationItemTruncateEvent,
+ ConversationItemTruncatedEvent,
+ ErrorEvent,
+ InputAudioBufferAppendEvent,
+ InputAudioBufferClearEvent,
+ InputAudioBufferClearedEvent,
+ InputAudioBufferCommitEvent,
+ InputAudioBufferCommittedEvent,
+ InputAudioBufferSpeechStartedEvent,
+ InputAudioBufferSpeechStoppedEvent,
+ RateLimitsUpdatedEvent,
+ RealtimeClientEvent,
+ RealtimeResponse,
+ RealtimeResponseStatus,
+ RealtimeResponseUsage,
+ RealtimeServerEvent,
+ ResponseAudioDeltaEvent,
+ ResponseAudioDoneEvent,
+ ResponseAudioTranscriptDeltaEvent,
+ ResponseAudioTranscriptDoneEvent,
+ ResponseCancelEvent,
+ ResponseContentPartAddedEvent,
+ ResponseContentPartDoneEvent,
+ ResponseCreateEvent,
+ ResponseCreatedEvent,
+ ResponseDoneEvent,
+ ResponseFunctionCallArgumentsDeltaEvent,
+ ResponseFunctionCallArgumentsDoneEvent,
+ ResponseOutputItemAddedEvent,
+ ResponseOutputItemDoneEvent,
+ ResponseTextDeltaEvent,
+ ResponseTextDoneEvent,
+ SessionCreatedEvent,
+ SessionUpdateEvent,
+ SessionUpdatedEvent,
+)
+```
+
### Sessions
Types:
pyproject.toml
@@ -35,9 +35,6 @@ classifiers = [
"License :: OSI Approved :: Apache Software License"
]
-[project.optional-dependencies]
-datalib = ["numpy >= 1", "pandas >= 1.2.3", "pandas-stubs >= 1.1.0.11"]
-
[project.urls]
Homepage = "https://github.com/openai/openai-python"
Repository = "https://github.com/openai/openai-python"
@@ -45,6 +42,10 @@ Repository = "https://github.com/openai/openai-python"
[project.scripts]
openai = "openai.cli:main"
+[project.optional-dependencies]
+realtime = ["websockets >= 13, < 15"]
+datalib = ["numpy >= 1", "pandas >= 1.2.3", "pandas-stubs >= 1.1.0.11"]
+
[tool.rye]
managed = true
# version pins are in requirements-dev.lock
requirements-dev.lock
@@ -185,5 +185,7 @@ urllib3==2.2.1
# via requests
virtualenv==20.24.5
# via nox
+websockets==14.1
+ # via openai
zipp==3.17.0
# via importlib-metadata
requirements.lock
@@ -64,3 +64,5 @@ typing-extensions==4.12.2
# via pydantic-core
tzdata==2024.1
# via pandas
+websockets==14.1
+ # via openai