Commit `5fdba486`

@@ -76,6 +76,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
         organization: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -94,6 +95,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
         organization: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -112,6 +114,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
         organization: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -131,6 +134,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
         azure_ad_token_provider: AzureADTokenProvider | None = None,
         organization: str | None = None,
         project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         base_url: str | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -214,6 +218,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
             default_headers=default_headers,
             default_query=default_query,
             http_client=http_client,
+            websocket_base_url=websocket_base_url,
             _strict_response_validation=_strict_response_validation,
         )
         self._api_version = api_version
@@ -227,6 +232,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         api_version: str | None = None,
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
@@ -247,6 +253,7 @@ class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
             api_key=api_key,
             organization=organization,
             project=project,
+            websocket_base_url=websocket_base_url,
             base_url=base_url,
             timeout=timeout,
             http_client=http_client,
@@ -314,6 +321,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
         project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -333,6 +341,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
         project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -352,6 +361,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
         project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -372,6 +382,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
         organization: str | None = None,
         project: str | None = None,
         base_url: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -454,6 +465,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
             default_headers=default_headers,
             default_query=default_query,
             http_client=http_client,
+            websocket_base_url=websocket_base_url,
             _strict_response_validation=_strict_response_validation,
         )
         self._api_version = api_version
@@ -467,6 +479,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         api_version: str | None = None,
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
@@ -487,6 +500,7 @@ class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], Asy
             api_key=api_key,
             organization=organization,
             project=project,
+            websocket_base_url=websocket_base_url,
             base_url=base_url,
             timeout=timeout,
             http_client=http_client,

@@ -2,6 +2,15 @@
 
 from __future__ import annotations
 
+import json
+import logging
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Iterator, cast
+from typing_extensions import AsyncIterator
+
+import httpx
+from pydantic import BaseModel
+
 from .sessions import (
     Sessions,
     AsyncSessions,
@@ -10,11 +19,34 @@ from .sessions import (
     SessionsWithStreamingResponse,
     AsyncSessionsWithStreamingResponse,
 )
+from ...._types import NOT_GIVEN, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    strip_not_given,
+    async_maybe_transform,
+)
 from ...._compat import cached_property
+from ...._models import construct_type_unchecked
 from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._exceptions import OpenAIError
+from ...._base_client import _merge_mappings
+from ....types.beta.realtime import session_update_event_param, response_create_event_param
+from ....types.websocket_connection_options import WebsocketConnectionOptions
+from ....types.beta.realtime.realtime_client_event import RealtimeClientEvent
+from ....types.beta.realtime.realtime_server_event import RealtimeServerEvent
+from ....types.beta.realtime.conversation_item_param import ConversationItemParam
+from ....types.beta.realtime.realtime_client_event_param import RealtimeClientEventParam
+
+if TYPE_CHECKING:
+    from websockets.sync.client import ClientConnection as WebsocketConnection
+    from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
+
+    from ...._client import OpenAI, AsyncOpenAI
 
 __all__ = ["Realtime", "AsyncRealtime"]
 
+log: logging.Logger = logging.getLogger(__name__)
+
 
 class Realtime(SyncAPIResource):
     @cached_property
@@ -40,6 +72,33 @@ class Realtime(SyncAPIResource):
         """
         return RealtimeWithStreamingResponse(self)
 
+    def connect(
+        self,
+        *,
+        model: str,
+        extra_query: Query = {},
+        extra_headers: Headers = {},
+        websocket_connection_options: WebsocketConnectionOptions = {},
+    ) -> RealtimeConnectionManager:
+        """
+        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+        Some notable benefits of the API include:
+
+        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+        """
+        return RealtimeConnectionManager(
+            client=self._client,
+            extra_query=extra_query,
+            extra_headers=extra_headers,
+            websocket_connection_options=websocket_connection_options,
+            model=model,
+        )
+
 
 class AsyncRealtime(AsyncAPIResource):
     @cached_property
@@ -65,6 +124,33 @@ class AsyncRealtime(AsyncAPIResource):
         """
         return AsyncRealtimeWithStreamingResponse(self)
 
+    def connect(
+        self,
+        *,
+        model: str,
+        extra_query: Query = {},
+        extra_headers: Headers = {},
+        websocket_connection_options: WebsocketConnectionOptions = {},
+    ) -> AsyncRealtimeConnectionManager:
+        """
+        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+        Some notable benefits of the API include:
+
+        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+        """
+        return AsyncRealtimeConnectionManager(
+            client=self._client,
+            extra_query=extra_query,
+            extra_headers=extra_headers,
+            websocket_connection_options=websocket_connection_options,
+            model=model,
+        )
+
 
 class RealtimeWithRawResponse:
     def __init__(self, realtime: Realtime) -> None:
@@ -100,3 +186,769 @@ class AsyncRealtimeWithStreamingResponse:
     @cached_property
     def sessions(self) -> AsyncSessionsWithStreamingResponse:
         return AsyncSessionsWithStreamingResponse(self._realtime.sessions)
+
+
+class AsyncRealtimeConnection:
+    """Represents a live websocket connection to the Realtime API"""
+
+    session: AsyncRealtimeSessionResource
+    response: AsyncRealtimeResponseResource
+    conversation: AsyncRealtimeConversationResource
+    input_audio_buffer: AsyncRealtimeInputAudioBufferResource
+
+    _connection: AsyncWebsocketConnection
+
+    def __init__(self, connection: AsyncWebsocketConnection) -> None:
+        self._connection = connection
+
+        self.session = AsyncRealtimeSessionResource(self)
+        self.response = AsyncRealtimeResponseResource(self)
+        self.conversation = AsyncRealtimeConversationResource(self)
+        self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
+
+    async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
+        """
+        An infinite-iterator that will continue to yield events until
+        the connection is closed.
+        """
+        from websockets.exceptions import ConnectionClosedOK
+
+        try:
+            while True:
+                yield await self.recv()
+        except ConnectionClosedOK:
+            return
+
+    async def recv(self) -> RealtimeServerEvent:
+        """
+        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+        Canceling this method is safe. There's no risk of losing data.
+        """
+        return self.parse_event(await self.recv_bytes())
+
+    async def recv_bytes(self) -> bytes:
+        """Receive the next message from the connection as raw bytes.
+
+        Canceling this method is safe. There's no risk of losing data.
+
+        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+        then you can call `.parse_event(data)`.
+        """
+        message = await self._connection.recv(decode=False)
+        log.debug(f"Received websocket message: %s", message)
+        if not isinstance(message, bytes):
+            # passing `decode=False` should always result in us getting `bytes` back
+            raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
+
+        return message
+
+    async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+        data = (
+            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+            if isinstance(event, BaseModel)
+            else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
+        )
+        await self._connection.send(data)
+
+    async def close(self, *, code: int = 1000, reason: str = "") -> None:
+        await self._connection.close(code=code, reason=reason)
+
+    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+        """
+        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+        This is helpful if you're using `.recv_bytes()`.
+        """
+        return cast(
+            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+        )
+
+
+class AsyncRealtimeConnectionManager:
+    """
+    Context manager over a `AsyncRealtimeConnection` that is returned by `beta.realtime.connect()`
+
+    This context manager ensures that the connection will be closed when it exits.
+
+    ---
+
+    Note that if your application doesn't work well with the context manager approach then you
+    can call the `.enter()` method directly to initiate a connection.
+
+    **Warning**: You must remember to close the connection with `.close()`.
+
+    ```py
+    connection = await client.beta.realtime.connect(...).enter()
+    # ...
+    await connection.close()
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        client: AsyncOpenAI,
+        model: str,
+        extra_query: Query,
+        extra_headers: Headers,
+        websocket_connection_options: WebsocketConnectionOptions,
+    ) -> None:
+        self.__client = client
+        self.__model = model
+        self.__connection: AsyncRealtimeConnection | None = None
+        self.__extra_query = extra_query
+        self.__extra_headers = extra_headers
+        self.__websocket_connection_options = websocket_connection_options
+
+    async def __aenter__(self) -> AsyncRealtimeConnection:
+        """
+        👋 If your application doesn't work well with the context manager approach then you
+        can call this method directly to initiate a connection.
+
+        **Warning**: You must remember to close the connection with `.close()`.
+
+        ```py
+        connection = await client.beta.realtime.connect(...).enter()
+        # ...
+        await connection.close()
+        ```
+        """
+        try:
+            from websockets.asyncio.client import connect
+        except ImportError as exc:
+            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+        url = self._prepare_url().copy_with(
+            params={
+                **self.__client.base_url.params,
+                "model": self.__model,
+                **self.__extra_query,
+            },
+        )
+        log.debug("Connecting to %s", url)
+        if self.__websocket_connection_options:
+            log.debug("Connection options: %s", self.__websocket_connection_options)
+
+        self.__connection = AsyncRealtimeConnection(
+            await connect(
+                str(url),
+                user_agent_header=self.__client.user_agent,
+                additional_headers=_merge_mappings(
+                    {
+                        **self.__client.auth_headers,
+                        "OpenAI-Beta": "realtime=v1",
+                    },
+                    self.__extra_headers,
+                ),
+                **self.__websocket_connection_options,
+            )
+        )
+
+        return self.__connection
+
+    enter = __aenter__
+
+    def _prepare_url(self) -> httpx.URL:
+        if self.__client.websocket_base_url is not None:
+            base_url = httpx.URL(self.__client.websocket_base_url)
+        else:
+            base_url = self.__client._base_url.copy_with(scheme="wss")
+
+        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+        return base_url.copy_with(raw_path=merge_raw_path)
+
+    async def __aexit__(
+        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+    ) -> None:
+        if self.__connection is not None:
+            await self.__connection.close()
+
+
+class RealtimeConnection:
+    """Represents a live websocket connection to the Realtime API"""
+
+    session: RealtimeSessionResource
+    response: RealtimeResponseResource
+    conversation: RealtimeConversationResource
+    input_audio_buffer: RealtimeInputAudioBufferResource
+
+    _connection: WebsocketConnection
+
+    def __init__(self, connection: WebsocketConnection) -> None:
+        self._connection = connection
+
+        self.session = RealtimeSessionResource(self)
+        self.response = RealtimeResponseResource(self)
+        self.conversation = RealtimeConversationResource(self)
+        self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
+
+    def __iter__(self) -> Iterator[RealtimeServerEvent]:
+        """
+        An infinite-iterator that will continue to yield events until
+        the connection is closed.
+        """
+        from websockets.exceptions import ConnectionClosedOK
+
+        try:
+            while True:
+                yield self.recv()
+        except ConnectionClosedOK:
+            return
+
+    def recv(self) -> RealtimeServerEvent:
+        """
+        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+        Canceling this method is safe. There's no risk of losing data.
+        """
+        return self.parse_event(self.recv_bytes())
+
+    def recv_bytes(self) -> bytes:
+        """Receive the next message from the connection as raw bytes.
+
+        Canceling this method is safe. There's no risk of losing data.
+
+        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+        then you can call `.parse_event(data)`.
+        """
+        message = self._connection.recv(decode=False)
+        log.debug(f"Received websocket message: %s", message)
+        if not isinstance(message, bytes):
+            # passing `decode=False` should always result in us getting `bytes` back
+            raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
+
+        return message
+
+    def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+        data = (
+            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+            if isinstance(event, BaseModel)
+            else json.dumps(maybe_transform(event, RealtimeClientEventParam))
+        )
+        self._connection.send(data)
+
+    def close(self, *, code: int = 1000, reason: str = "") -> None:
+        self._connection.close(code=code, reason=reason)
+
+    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+        """
+        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+        This is helpful if you're using `.recv_bytes()`.
+        """
+        return cast(
+            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+        )
+
+
+class RealtimeConnectionManager:
+    """
+    Context manager over a `RealtimeConnection` that is returned by `beta.realtime.connect()`
+
+    This context manager ensures that the connection will be closed when it exits.
+
+    ---
+
+    Note that if your application doesn't work well with the context manager approach then you
+    can call the `.enter()` method directly to initiate a connection.
+
+    **Warning**: You must remember to close the connection with `.close()`.
+
+    ```py
+    connection = client.beta.realtime.connect(...).enter()
+    # ...
+    connection.close()
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        client: OpenAI,
+        model: str,
+        extra_query: Query,
+        extra_headers: Headers,
+        websocket_connection_options: WebsocketConnectionOptions,
+    ) -> None:
+        self.__client = client
+        self.__model = model
+        self.__connection: RealtimeConnection | None = None
+        self.__extra_query = extra_query
+        self.__extra_headers = extra_headers
+        self.__websocket_connection_options = websocket_connection_options
+
+    def __enter__(self) -> RealtimeConnection:
+        """
+        👋 If your application doesn't work well with the context manager approach then you
+        can call this method directly to initiate a connection.
+
+        **Warning**: You must remember to close the connection with `.close()`.
+
+        ```py
+        connection = client.beta.realtime.connect(...).enter()
+        # ...
+        connection.close()
+        ```
+        """
+        try:
+            from websockets.sync.client import connect
+        except ImportError as exc:
+            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+        url = self._prepare_url().copy_with(
+            params={
+                **self.__client.base_url.params,
+                "model": self.__model,
+                **self.__extra_query,
+            },
+        )
+        log.debug("Connecting to %s", url)
+        if self.__websocket_connection_options:
+            log.debug("Connection options: %s", self.__websocket_connection_options)
+
+        self.__connection = RealtimeConnection(
+            connect(
+                str(url),
+                user_agent_header=self.__client.user_agent,
+                additional_headers=_merge_mappings(
+                    {
+                        **self.__client.auth_headers,
+                        "OpenAI-Beta": "realtime=v1",
+                    },
+                    self.__extra_headers,
+                ),
+                **self.__websocket_connection_options,
+            )
+        )
+
+        return self.__connection
+
+    enter = __enter__
+
+    def _prepare_url(self) -> httpx.URL:
+        if self.__client.websocket_base_url is not None:
+            base_url = httpx.URL(self.__client.websocket_base_url)
+        else:
+            base_url = self.__client._base_url.copy_with(scheme="wss")
+
+        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+        return base_url.copy_with(raw_path=merge_raw_path)
+
+    def __exit__(
+        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+    ) -> None:
+        if self.__connection is not None:
+            self.__connection.close()
+
+
+class BaseRealtimeConnectionResource:
+    def __init__(self, connection: RealtimeConnection) -> None:
+        self._connection = connection
+
+
+class RealtimeSessionResource(BaseRealtimeConnectionResource):
+    def update(self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to update the session’s default configuration.
+
+        The client may
+        send this event at any time to update the session configuration, and any
+        field may be updated at any time, except for "voice". The server will respond
+        with a `session.updated` event that shows the full effective configuration.
+        Only fields that are present are updated, thus the correct way to clear a
+        field like "instructions" is to pass an empty string.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+            )
+        )
+
+
+class RealtimeResponseResource(BaseRealtimeConnectionResource):
+    def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to cancel an in-progress response.
+
+        The server will respond
+        with a `response.cancelled` event or an error if there is no response to
+        cancel.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+            )
+        )
+
+    def create(
+        self,
+        *,
+        event_id: str | NotGiven = NOT_GIVEN,
+        response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        This event instructs the server to create a Response, which means triggering
+        model inference. When in Server VAD mode, the server will create Responses
+        automatically.
+
+        A Response will include at least one Item, and may have two, in which case
+        the second will be a function call. These Items will be appended to the
+        conversation history.
+
+        The server will respond with a `response.created` event, events for Items
+        and content created, and finally a `response.done` event to indicate the
+        Response is complete.
+
+        The `response.create` event includes inference configuration like
+        `instructions`, and `temperature`. These fields will override the Session's
+        configuration for this Response only.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+            )
+        )
+
+
+class RealtimeConversationResource(BaseRealtimeConnectionResource):
+    @cached_property
+    def item(self) -> RealtimeConversationItemResource:
+        return RealtimeConversationItemResource(self._connection)
+
+
+class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
+    def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event when you want to remove any item from the conversation
+        history.
+
+        The server will respond with a `conversation.item.deleted` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+    def create(
+        self,
+        *,
+        item: ConversationItemParam,
+        event_id: str | NotGiven = NOT_GIVEN,
+        previous_item_id: str | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Add a new Item to the Conversation's context, including messages, function
+        calls, and function call responses. This event can be used both to populate a
+        "history" of the conversation and to add new items mid-stream, but has the
+        current limitation that it cannot populate assistant audio messages.
+
+        If successful, the server will respond with a `conversation.item.created`
+        event, otherwise an `error` event will be sent.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.create",
+                        "item": item,
+                        "event_id": event_id,
+                        "previous_item_id": previous_item_id,
+                    }
+                ),
+            )
+        )
+
+    def truncate(
+        self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """Send this event to truncate a previous assistant message’s audio.
+
+        The server
+        will produce audio faster than realtime, so this event is useful when the user
+        interrupts to truncate audio that has already been sent to the client but not
+        yet played. This will synchronize the server's understanding of the audio with
+        the client's playback.
+
+        Truncating audio will delete the server-side text transcript to ensure there
+        is not text in the context that hasn't been heard by the user.
+
+        If successful, the server will respond with a `conversation.item.truncated`
+        event.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.truncate",
+                        "audio_end_ms": audio_end_ms,
+                        "content_index": content_index,
+                        "item_id": item_id,
+                        "event_id": event_id,
+                    }
+                ),
+            )
+        )
+
+
+class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
+    def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to clear the audio bytes in the buffer.
+
+        The server will
+        respond with an `input_audio_buffer.cleared` event.
+        """
+        self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+        )
+
+    def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event to commit the user input audio buffer, which will create a
+        new user message item in the conversation. This event will produce an error
+        if the input audio buffer is empty. When in Server VAD mode, the client does
+        not need to send this event, the server will commit the audio buffer
+        automatically.
+
+        Committing the input audio buffer will trigger input audio transcription
+        (if enabled in session configuration), but it will not create a response
+        from the model. The server will respond with an `input_audio_buffer.committed`
+        event.
+        """
+        self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+        )
+
+    def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to append audio bytes to the input audio buffer.
+
+        The audio
+        buffer is temporary storage you can write to and later commit. In Server VAD
+        mode, the audio buffer is used to detect speech and the server will decide
+        when to commit. When Server VAD is disabled, you must commit the audio buffer
+        manually.
+
+        The client may choose how much audio to place in each event up to a maximum
+        of 15 MiB, for example streaming smaller chunks from the client may allow the
+        VAD to be more responsive. Unlike made other client events, the server will
+        not send a confirmation response to this event.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+            )
+        )
+
+
+class BaseAsyncRealtimeConnectionResource:
+    def __init__(self, connection: AsyncRealtimeConnection) -> None:
+        self._connection = connection
+
+
+class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
+    async def update(
+        self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """Send this event to update the session’s default configuration.
+
+        The client may
+        send this event at any time to update the session configuration, and any
+        field may be updated at any time, except for "voice". The server will respond
+        with a `session.updated` event that shows the full effective configuration.
+        Only fields that are present are updated, thus the correct way to clear a
+        field like "instructions" is to pass an empty string.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+            )
+        )
+
+
+class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
+    async def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to cancel an in-progress response.
+
+        The server will respond
+        with a `response.cancelled` event or an error if there is no response to
+        cancel.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+            )
+        )
+
+    async def create(
+        self,
+        *,
+        event_id: str | NotGiven = NOT_GIVEN,
+        response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        This event instructs the server to create a Response, which means triggering
+        model inference. When in Server VAD mode, the server will create Responses
+        automatically.
+
+        A Response will include at least one Item, and may have two, in which case
+        the second will be a function call. These Items will be appended to the
+        conversation history.
+
+        The server will respond with a `response.created` event, events for Items
+        and content created, and finally a `response.done` event to indicate the
+        Response is complete.
+
+        The `response.create` event includes inference configuration like
+        `instructions`, and `temperature`. These fields will override the Session's
+        configuration for this Response only.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+            )
+        )
+
+
+class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
+    @cached_property
+    def item(self) -> AsyncRealtimeConversationItemResource:
+        return AsyncRealtimeConversationItemResource(self._connection)
+
+
+class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
+    async def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event when you want to remove any item from the conversation
+        history.
+
+        The server will respond with a `conversation.item.deleted` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+    async def create(
+        self,
+        *,
+        item: ConversationItemParam,
+        event_id: str | NotGiven = NOT_GIVEN,
+        previous_item_id: str | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Add a new Item to the Conversation's context, including messages, function
+        calls, and function call responses. This event can be used both to populate a
+        "history" of the conversation and to add new items mid-stream, but has the
+        current limitation that it cannot populate assistant audio messages.
+
+        If successful, the server will respond with a `conversation.item.created`
+        event, otherwise an `error` event will be sent.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.create",
+                        "item": item,
+                        "event_id": event_id,
+                        "previous_item_id": previous_item_id,
+                    }
+                ),
+            )
+        )
+
+    async def truncate(
+        self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """Send this event to truncate a previous assistant message’s audio.
+
+        The server
+        will produce audio faster than realtime, so this event is useful when the user
+        interrupts to truncate audio that has already been sent to the client but not
+        yet played. This will synchronize the server's understanding of the audio with
+        the client's playback.
+
+        Truncating audio will delete the server-side text transcript to ensure there
+        is not text in the context that hasn't been heard by the user.
+
+        If successful, the server will respond with a `conversation.item.truncated`
+        event.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.truncate",
+                        "audio_end_ms": audio_end_ms,
+                        "content_index": content_index,
+                        "item_id": item_id,
+                        "event_id": event_id,
+                    }
+                ),
+            )
+        )
+
+
+class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
+    async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to clear the audio bytes in the buffer.
+
+        The server will
+        respond with an `input_audio_buffer.cleared` event.
+        """
+        await self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+        )
+
+    async def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event to commit the user input audio buffer, which will create a
+        new user message item in the conversation. This event will produce an error
+        if the input audio buffer is empty. When in Server VAD mode, the client does
+        not need to send this event, the server will commit the audio buffer
+        automatically.
+
+        Committing the input audio buffer will trigger input audio transcription
+        (if enabled in session configuration), but it will not create a response
+        from the model. The server will respond with an `input_audio_buffer.committed`
+        event.
+        """
+        await self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+        )
+
+    async def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to append audio bytes to the input audio buffer.
+
+        The audio
+        buffer is temporary storage you can write to and later commit. In Server VAD
+        mode, the audio buffer is used to detect speech and the server will decide
+        when to commit. When Server VAD is disabled, you must commit the audio buffer
+        manually.
+
+        The client may choose how much audio to place in each event up to a maximum
+        of 15 MiB, for example streaming smaller chunks from the client may allow the
+        VAD to be more responsive. Unlike made other client events, the server will
+        not send a confirmation response to this event.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+            )
+        )

@@ -2,5 +2,79 @@
 
 from __future__ import annotations
 
+from .session import Session as Session
+from .error_event import ErrorEvent as ErrorEvent
+from .conversation_item import ConversationItem as ConversationItem
+from .realtime_response import RealtimeResponse as RealtimeResponse
+from .response_done_event import ResponseDoneEvent as ResponseDoneEvent
+from .session_update_event import SessionUpdateEvent as SessionUpdateEvent
+from .realtime_client_event import RealtimeClientEvent as RealtimeClientEvent
+from .realtime_server_event import RealtimeServerEvent as RealtimeServerEvent
+from .response_cancel_event import ResponseCancelEvent as ResponseCancelEvent
+from .response_create_event import ResponseCreateEvent as ResponseCreateEvent
 from .session_create_params import SessionCreateParams as SessionCreateParams
+from .session_created_event import SessionCreatedEvent as SessionCreatedEvent
+from .session_updated_event import SessionUpdatedEvent as SessionUpdatedEvent
+from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
+from .conversation_item_param import ConversationItemParam as ConversationItemParam
+from .realtime_connect_params import RealtimeConnectParams as RealtimeConnectParams
+from .realtime_response_usage import RealtimeResponseUsage as RealtimeResponseUsage
 from .session_create_response import SessionCreateResponse as SessionCreateResponse
+from .realtime_response_status import RealtimeResponseStatus as RealtimeResponseStatus
+from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
+from .conversation_item_content import ConversationItemContent as ConversationItemContent
+from .rate_limits_updated_event import RateLimitsUpdatedEvent as RateLimitsUpdatedEvent
+from .response_audio_done_event import ResponseAudioDoneEvent as ResponseAudioDoneEvent
+from .response_text_delta_event import ResponseTextDeltaEvent as ResponseTextDeltaEvent
+from .conversation_created_event import ConversationCreatedEvent as ConversationCreatedEvent
+from .response_audio_delta_event import ResponseAudioDeltaEvent as ResponseAudioDeltaEvent
+from .session_update_event_param import SessionUpdateEventParam as SessionUpdateEventParam
+from .realtime_client_event_param import RealtimeClientEventParam as RealtimeClientEventParam
+from .response_cancel_event_param import ResponseCancelEventParam as ResponseCancelEventParam
+from .response_create_event_param import ResponseCreateEventParam as ResponseCreateEventParam
+from .conversation_item_create_event import ConversationItemCreateEvent as ConversationItemCreateEvent
+from .conversation_item_delete_event import ConversationItemDeleteEvent as ConversationItemDeleteEvent
+from .input_audio_buffer_clear_event import InputAudioBufferClearEvent as InputAudioBufferClearEvent
+from .conversation_item_content_param import ConversationItemContentParam as ConversationItemContentParam
+from .conversation_item_created_event import ConversationItemCreatedEvent as ConversationItemCreatedEvent
+from .conversation_item_deleted_event import ConversationItemDeletedEvent as ConversationItemDeletedEvent
+from .input_audio_buffer_append_event import InputAudioBufferAppendEvent as InputAudioBufferAppendEvent
+from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent as InputAudioBufferCommitEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent as ResponseOutputItemDoneEvent
+from .conversation_item_truncate_event import ConversationItemTruncateEvent as ConversationItemTruncateEvent
+from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent as InputAudioBufferClearedEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent as ResponseContentPartDoneEvent
+from .response_output_item_added_event import ResponseOutputItemAddedEvent as ResponseOutputItemAddedEvent
+from .conversation_item_truncated_event import ConversationItemTruncatedEvent as ConversationItemTruncatedEvent
+from .response_content_part_added_event import ResponseContentPartAddedEvent as ResponseContentPartAddedEvent
+from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent as InputAudioBufferCommittedEvent
+from .conversation_item_create_event_param import ConversationItemCreateEventParam as ConversationItemCreateEventParam
+from .conversation_item_delete_event_param import ConversationItemDeleteEventParam as ConversationItemDeleteEventParam
+from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam as InputAudioBufferClearEventParam
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
+from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam as InputAudioBufferAppendEventParam
+from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam as InputAudioBufferCommitEventParam
+from .response_audio_transcript_delta_event import (
+    ResponseAudioTranscriptDeltaEvent as ResponseAudioTranscriptDeltaEvent,
+)
+from .conversation_item_truncate_event_param import (
+    ConversationItemTruncateEventParam as ConversationItemTruncateEventParam,
+)
+from .input_audio_buffer_speech_started_event import (
+    InputAudioBufferSpeechStartedEvent as InputAudioBufferSpeechStartedEvent,
+)
+from .input_audio_buffer_speech_stopped_event import (
+    InputAudioBufferSpeechStoppedEvent as InputAudioBufferSpeechStoppedEvent,
+)
+from .response_function_call_arguments_done_event import (
+    ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
+)
+from .response_function_call_arguments_delta_event import (
+    ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
+)
+from .conversation_item_input_audio_transcription_failed_event import (
+    ConversationItemInputAudioTranscriptionFailedEvent as ConversationItemInputAudioTranscriptionFailedEvent,
+)
+from .conversation_item_input_audio_transcription_completed_event import (
+    ConversationItemInputAudioTranscriptionCompletedEvent as ConversationItemInputAudioTranscriptionCompletedEvent,
+)

@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationCreatedEvent", "Conversation"]
+
+
+class Conversation(BaseModel):
+    id: Optional[str] = None
+    """The unique ID of the conversation."""
+
+    object: Optional[Literal["realtime.conversation"]] = None
+    """The object type, must be `realtime.conversation`."""
+
+
+class ConversationCreatedEvent(BaseModel):
+    conversation: Conversation
+    """The conversation resource."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    type: Literal["conversation.created"]
+    """The event type, must be `conversation.created`."""

@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item_content import ConversationItemContent
+
+__all__ = ["ConversationItem"]
+
+
+class ConversationItem(BaseModel):
+    id: Optional[str] = None
+    """
+    The unique ID of the item, this can be generated by the client to help manage
+    server-side context, but is not required because the server will generate one if
+    not provided.
+    """
+
+    arguments: Optional[str] = None
+    """The arguments of the function call (for `function_call` items)."""
+
+    call_id: Optional[str] = None
+    """
+    The ID of the function call (for `function_call` and `function_call_output`
+    items). If passed on a `function_call_output` item, the server will check that a
+    `function_call` item with the same ID exists in the conversation history.
+    """
+
+    content: Optional[List[ConversationItemContent]] = None
+    """The content of the message, applicable for `message` items.
+
+    - Message items of role `system` support only `input_text` content
+    - Message items of role `user` support `input_text` and `input_audio` content
+    - Message items of role `assistant` support `text` content.
+    """
+
+    name: Optional[str] = None
+    """The name of the function being called (for `function_call` items)."""
+
+    object: Optional[Literal["realtime.item"]] = None
+    """Identifier for the API object being returned - always `realtime.item`."""
+
+    output: Optional[str] = None
+    """The output of the function call (for `function_call_output` items)."""
+
+    role: Optional[Literal["user", "assistant", "system"]] = None
+    """
+    The role of the message sender (`user`, `assistant`, `system`), only applicable
+    for `message` items.
+    """
+
+    status: Optional[Literal["completed", "incomplete"]] = None
+    """The status of the item (`completed`, `incomplete`).
+
+    These have no effect on the conversation, but are accepted for consistency with
+    the `conversation.item.created` event.
+    """
+
+    type: Optional[Literal["message", "function_call", "function_call_output"]] = None
+    """The type of the item (`message`, `function_call`, `function_call_output`)."""

@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemContent"]
+
+
+class ConversationItemContent(BaseModel):
+    id: Optional[str] = None
+    """
+    ID of a previous conversation item (like a model response), used for
+    `item_reference` content types.
+    """
+
+    audio: Optional[str] = None
+    """Base64-encoded audio bytes, used for `input_audio` content type."""
+
+    text: Optional[str] = None
+    """The text content, used for `input_text` and `text` content types."""
+
+    transcript: Optional[str] = None
+    """The transcript of the audio, used for `input_audio` content type."""
+
+    type: Optional[Literal["input_text", "input_audio", "item_reference", "text"]] = None
+    """The content type (`input_text`, `input_audio`, `item_reference`, `text`)."""

@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["ConversationItemContentParam"]
+
+
+class ConversationItemContentParam(TypedDict, total=False):
+    id: str
+    """
+    ID of a previous conversation item (like a model response), used for
+    `item_reference` content types.
+    """
+
+    audio: str
+    """Base64-encoded audio bytes, used for `input_audio` content type."""
+
+    text: str
+    """The text content, used for `input_text` and `text` content types."""
+
+    transcript: str
+    """The transcript of the audio, used for `input_audio` content type."""
+
+    type: Literal["input_text", "input_audio", "item_reference", "text"]
+    """The content type (`input_text`, `input_audio`, `item_reference`, `text`)."""

@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemCreateEvent"]
+
+
+class ConversationItemCreateEvent(BaseModel):
+    item: ConversationItem
+    """The item to add to the conversation."""
+
+    type: Literal["conversation.item.create"]
+    """The event type, must be `conversation.item.create`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
+
+    previous_item_id: Optional[str] = None
+    """The ID of the preceding item after which the new item will be inserted.
+
+    If not set, the new item will be appended to the end of the conversation. If
+    set, it allows an item to be inserted mid-conversation. If the ID cannot be
+    found, an error will be returned and the item will not be added.
+    """

@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .conversation_item_param import ConversationItemParam
+
+__all__ = ["ConversationItemCreateEventParam"]
+
+
+class ConversationItemCreateEventParam(TypedDict, total=False):
+    item: Required[ConversationItemParam]
+    """The item to add to the conversation."""
+
+    type: Required[Literal["conversation.item.create"]]
+    """The event type, must be `conversation.item.create`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
+
+    previous_item_id: str
+    """The ID of the preceding item after which the new item will be inserted.
+
+    If not set, the new item will be appended to the end of the conversation. If
+    set, it allows an item to be inserted mid-conversation. If the ID cannot be
+    found, an error will be returned and the item will not be added.
+    """

@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemCreatedEvent"]
+
+
+class ConversationItemCreatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """The item to add to the conversation."""
+
+    previous_item_id: str
+    """
+    The ID of the preceding item in the Conversation context, allows the client to
+    understand the order of the conversation.
+    """
+
+    type: Literal["conversation.item.created"]
+    """The event type, must be `conversation.item.created`."""

@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemDeleteEvent"]
+
+
+class ConversationItemDeleteEvent(BaseModel):
+    item_id: str
+    """The ID of the item to delete."""
+
+    type: Literal["conversation.item.delete"]
+    """The event type, must be `conversation.item.delete`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""

@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemDeleteEventParam"]
+
+
+class ConversationItemDeleteEventParam(TypedDict, total=False):
+    item_id: Required[str]
+    """The ID of the item to delete."""
+
+    type: Required[Literal["conversation.item.delete"]]
+    """The event type, must be `conversation.item.delete`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""

@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemDeletedEvent"]
+
+
+class ConversationItemDeletedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item that was deleted."""
+
+    type: Literal["conversation.item.deleted"]
+    """The event type, must be `conversation.item.deleted`."""

@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemInputAudioTranscriptionCompletedEvent"]
+
+
+class ConversationItemInputAudioTranscriptionCompletedEvent(BaseModel):
+    content_index: int
+    """The index of the content part containing the audio."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item containing the audio."""
+
+    transcript: str
+    """The transcribed text."""
+
+    type: Literal["conversation.item.input_audio_transcription.completed"]
+    """
+    The event type, must be `conversation.item.input_audio_transcription.completed`.
+    """

@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemInputAudioTranscriptionFailedEvent", "Error"]
+
+
+class Error(BaseModel):
+    code: Optional[str] = None
+    """Error code, if any."""
+
+    message: Optional[str] = None
+    """A human-readable error message."""
+
+    param: Optional[str] = None
+    """Parameter related to the error, if any."""
+
+    type: Optional[str] = None
+    """The type of error."""
+
+
+class ConversationItemInputAudioTranscriptionFailedEvent(BaseModel):
+    content_index: int
+    """The index of the content part containing the audio."""
+
+    error: Error
+    """Details of the transcription error."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item."""
+
+    type: Literal["conversation.item.input_audio_transcription.failed"]
+    """The event type, must be `conversation.item.input_audio_transcription.failed`."""

@@ -0,0 +1,62 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, TypedDict
+
+from .conversation_item_content_param import ConversationItemContentParam
+
+__all__ = ["ConversationItemParam"]
+
+
+class ConversationItemParam(TypedDict, total=False):
+    id: str
+    """
+    The unique ID of the item, this can be generated by the client to help manage
+    server-side context, but is not required because the server will generate one if
+    not provided.
+    """
+
+    arguments: str
+    """The arguments of the function call (for `function_call` items)."""
+
+    call_id: str
+    """
+    The ID of the function call (for `function_call` and `function_call_output`
+    items). If passed on a `function_call_output` item, the server will check that a
+    `function_call` item with the same ID exists in the conversation history.
+    """
+
+    content: Iterable[ConversationItemContentParam]
+    """The content of the message, applicable for `message` items.
+
+    - Message items of role `system` support only `input_text` content
+    - Message items of role `user` support `input_text` and `input_audio` content
+    - Message items of role `assistant` support `text` content.
+    """
+
+    name: str
+    """The name of the function being called (for `function_call` items)."""
+
+    object: Literal["realtime.item"]
+    """Identifier for the API object being returned - always `realtime.item`."""
+
+    output: str
+    """The output of the function call (for `function_call_output` items)."""
+
+    role: Literal["user", "assistant", "system"]
+    """
+    The role of the message sender (`user`, `assistant`, `system`), only applicable
+    for `message` items.
+    """
+
+    status: Literal["completed", "incomplete"]
+    """The status of the item (`completed`, `incomplete`).
+
+    These have no effect on the conversation, but are accepted for consistency with
+    the `conversation.item.created` event.
+    """
+
+    type: Literal["message", "function_call", "function_call_output"]
+    """The type of the item (`message`, `function_call`, `function_call_output`)."""

@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemTruncateEvent"]
+
+
+class ConversationItemTruncateEvent(BaseModel):
+    audio_end_ms: int
+    """Inclusive duration up to which audio is truncated, in milliseconds.
+
+    If the audio_end_ms is greater than the actual audio duration, the server will
+    respond with an error.
+    """
+
+    content_index: int
+    """The index of the content part to truncate. Set this to 0."""
+
+    item_id: str
+    """The ID of the assistant message item to truncate.
+
+    Only assistant message items can be truncated.
+    """
+
+    type: Literal["conversation.item.truncate"]
+    """The event type, must be `conversation.item.truncate`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""

@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemTruncateEventParam"]
+
+
+class ConversationItemTruncateEventParam(TypedDict, total=False):
+    audio_end_ms: Required[int]
+    """Inclusive duration up to which audio is truncated, in milliseconds.
+
+    If the audio_end_ms is greater than the actual audio duration, the server will
+    respond with an error.
+    """
+
+    content_index: Required[int]
+    """The index of the content part to truncate. Set this to 0."""
+
+    item_id: Required[str]
+    """The ID of the assistant message item to truncate.
+
+    Only assistant message items can be truncated.
+    """
+
+    type: Required[Literal["conversation.item.truncate"]]
+    """The event type, must be `conversation.item.truncate`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""

@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemTruncatedEvent"]
+
+
+class ConversationItemTruncatedEvent(BaseModel):
+    audio_end_ms: int
+    """The duration up to which the audio was truncated, in milliseconds."""
+
+    content_index: int
+    """The index of the content part that was truncated."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the assistant message item that was truncated."""
+
+    type: Literal["conversation.item.truncated"]
+    """The event type, must be `conversation.item.truncated`."""

@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ErrorEvent", "Error"]
+
+
+class Error(BaseModel):
+    message: str
+    """A human-readable error message."""
+
+    type: str
+    """The type of error (e.g., "invalid_request_error", "server_error")."""
+
+    code: Optional[str] = None
+    """Error code, if any."""
+
+    event_id: Optional[str] = None
+    """The event_id of the client event that caused the error, if applicable."""
+
+    param: Optional[str] = None
+    """Parameter related to the error, if any."""
+
+
+class ErrorEvent(BaseModel):
+    error: Error
+    """Details of the error."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    type: Literal["error"]
+    """The event type, must be `error`."""

@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferAppendEvent"]
+
+
+class InputAudioBufferAppendEvent(BaseModel):
+    audio: str
+    """Base64-encoded audio bytes.
+
+    This must be in the format specified by the `input_audio_format` field in the
+    session configuration.
+    """
+
+    type: Literal["input_audio_buffer.append"]
+    """The event type, must be `input_audio_buffer.append`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""

@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferAppendEventParam"]
+
+
+class InputAudioBufferAppendEventParam(TypedDict, total=False):
+    audio: Required[str]
+    """Base64-encoded audio bytes.
+
+    This must be in the format specified by the `input_audio_format` field in the
+    session configuration.
+    """
+
+    type: Required[Literal["input_audio_buffer.append"]]
+    """The event type, must be `input_audio_buffer.append`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""

@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferClearEvent"]
+
+
+class InputAudioBufferClearEvent(BaseModel):
+    type: Literal["input_audio_buffer.clear"]
+    """The event type, must be `input_audio_buffer.clear`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""

@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferClearEventParam"]
+
+
+class InputAudioBufferClearEventParam(TypedDict, total=False):
+    type: Required[Literal["input_audio_buffer.clear"]]
+    """The event type, must be `input_audio_buffer.clear`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""

@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferClearedEvent"]
+
+
+class InputAudioBufferClearedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    type: Literal["input_audio_buffer.cleared"]
+    """The event type, must be `input_audio_buffer.cleared`."""

@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferCommitEvent"]
+
+
+class InputAudioBufferCommitEvent(BaseModel):
+    type: Literal["input_audio_buffer.commit"]
+    """The event type, must be `input_audio_buffer.commit`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""

@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferCommitEventParam"]
+
+
+class InputAudioBufferCommitEventParam(TypedDict, total=False):
+    type: Required[Literal["input_audio_buffer.commit"]]
+    """The event type, must be `input_audio_buffer.commit`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""

@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferCommittedEvent"]
+
+
+class InputAudioBufferCommittedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item that will be created."""
+
+    previous_item_id: str
+    """The ID of the preceding item after which the new item will be inserted."""
+
+    type: Literal["input_audio_buffer.committed"]
+    """The event type, must be `input_audio_buffer.committed`."""

@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferSpeechStartedEvent"]
+
+
+class InputAudioBufferSpeechStartedEvent(BaseModel):
+    audio_start_ms: int
+    """
+    Milliseconds from the start of all audio written to the buffer during the
+    session when speech was first detected. This will correspond to the beginning of
+    audio sent to the model, and thus includes the `prefix_padding_ms` configured in
+    the Session.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item that will be created when speech stops."""
+
+    type: Literal["input_audio_buffer.speech_started"]
+    """The event type, must be `input_audio_buffer.speech_started`."""

@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferSpeechStoppedEvent"]
+
+
+class InputAudioBufferSpeechStoppedEvent(BaseModel):
+    audio_end_ms: int
+    """Milliseconds since the session started when speech stopped.
+
+    This will correspond to the end of audio sent to the model, and thus includes
+    the `min_silence_duration_ms` configured in the Session.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item that will be created."""
+
+    type: Literal["input_audio_buffer.speech_stopped"]
+    """The event type, must be `input_audio_buffer.speech_stopped`."""

@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RateLimitsUpdatedEvent", "RateLimit"]
+
+
+class RateLimit(BaseModel):
+    limit: Optional[int] = None
+    """The maximum allowed value for the rate limit."""
+
+    name: Optional[Literal["requests", "tokens"]] = None
+    """The name of the rate limit (`requests`, `tokens`)."""
+
+    remaining: Optional[int] = None
+    """The remaining value before the limit is reached."""
+
+    reset_seconds: Optional[float] = None
+    """Seconds until the rate limit resets."""
+
+
+class RateLimitsUpdatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    rate_limits: List[RateLimit]
+    """List of rate limit information."""
+
+    type: Literal["rate_limits.updated"]
+    """The event type, must be `rate_limits.updated`."""

@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from .session_update_event import SessionUpdateEvent
+from .response_cancel_event import ResponseCancelEvent
+from .response_create_event import ResponseCreateEvent
+from .conversation_item_create_event import ConversationItemCreateEvent
+from .conversation_item_delete_event import ConversationItemDeleteEvent
+from .input_audio_buffer_clear_event import InputAudioBufferClearEvent
+from .input_audio_buffer_append_event import InputAudioBufferAppendEvent
+from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent
+from .conversation_item_truncate_event import ConversationItemTruncateEvent
+
+__all__ = ["RealtimeClientEvent"]
+
+RealtimeClientEvent: TypeAlias = Annotated[
+    Union[
+        SessionUpdateEvent,
+        InputAudioBufferAppendEvent,
+        InputAudioBufferCommitEvent,
+        InputAudioBufferClearEvent,
+        ConversationItemCreateEvent,
+        ConversationItemTruncateEvent,
+        ConversationItemDeleteEvent,
+        ResponseCreateEvent,
+        ResponseCancelEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .session_update_event_param import SessionUpdateEventParam
+from .response_cancel_event_param import ResponseCancelEventParam
+from .response_create_event_param import ResponseCreateEventParam
+from .conversation_item_create_event_param import ConversationItemCreateEventParam
+from .conversation_item_delete_event_param import ConversationItemDeleteEventParam
+from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam
+from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam
+from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam
+from .conversation_item_truncate_event_param import ConversationItemTruncateEventParam
+
+__all__ = ["RealtimeClientEventParam"]
+
+RealtimeClientEventParam: TypeAlias = Union[
+    SessionUpdateEventParam,
+    InputAudioBufferAppendEventParam,
+    InputAudioBufferCommitEventParam,
+    InputAudioBufferClearEventParam,
+    ConversationItemCreateEventParam,
+    ConversationItemTruncateEventParam,
+    ConversationItemDeleteEventParam,
+    ResponseCreateEventParam,
+    ResponseCancelEventParam,
+]

@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["RealtimeConnectParams"]
+
+
+class RealtimeConnectParams(TypedDict, total=False):
+    model: Required[str]

@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+from .realtime_response_usage import RealtimeResponseUsage
+from .realtime_response_status import RealtimeResponseStatus
+
+__all__ = ["RealtimeResponse"]
+
+
+class RealtimeResponse(BaseModel):
+    id: Optional[str] = None
+    """The unique ID of the response."""
+
+    metadata: Optional[object] = None
+    """Developer-provided string key-value pairs associated with this response."""
+
+    object: Optional[Literal["realtime.response"]] = None
+    """The object type, must be `realtime.response`."""
+
+    output: Optional[List[ConversationItem]] = None
+    """The list of output items generated by the response."""
+
+    status: Optional[Literal["completed", "cancelled", "failed", "incomplete"]] = None
+    """
+    The final status of the response (`completed`, `cancelled`, `failed`, or
+    `incomplete`).
+    """
+
+    status_details: Optional[RealtimeResponseStatus] = None
+    """Additional details about the status."""
+
+    usage: Optional[RealtimeResponseUsage] = None
+    """Usage statistics for the Response, this will correspond to billing.
+
+    A Realtime API session will maintain a conversation context and append new Items
+    to the Conversation, thus output from previous turns (text and audio tokens)
+    will become the input for later turns.
+    """

@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RealtimeResponseStatus", "Error"]
+
+
+class Error(BaseModel):
+    code: Optional[str] = None
+    """Error code, if any."""
+
+    type: Optional[str] = None
+    """The type of error."""
+
+
+class RealtimeResponseStatus(BaseModel):
+    error: Optional[Error] = None
+    """
+    A description of the error that caused the response to fail, populated when the
+    `status` is `failed`.
+    """
+
+    reason: Optional[Literal["turn_detected", "client_cancelled", "max_output_tokens", "content_filter"]] = None
+    """The reason the Response did not complete.
+
+    For a `cancelled` Response, one of `turn_detected` (the server VAD detected a
+    new start of speech) or `client_cancelled` (the client sent a cancel event). For
+    an `incomplete` Response, one of `max_output_tokens` or `content_filter` (the
+    server-side safety filter activated and cut off the response).
+    """
+
+    type: Optional[Literal["completed", "cancelled", "incomplete", "failed"]] = None
+    """
+    The type of error that caused the response to fail, corresponding with the
+    `status` field (`completed`, `cancelled`, `incomplete`, `failed`).
+    """

@@ -0,0 +1,52 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ...._models import BaseModel
+
+__all__ = ["RealtimeResponseUsage", "InputTokenDetails", "OutputTokenDetails"]
+
+
+class InputTokenDetails(BaseModel):
+    audio_tokens: Optional[int] = None
+    """The number of audio tokens used in the Response."""
+
+    cached_tokens: Optional[int] = None
+    """The number of cached tokens used in the Response."""
+
+    text_tokens: Optional[int] = None
+    """The number of text tokens used in the Response."""
+
+
+class OutputTokenDetails(BaseModel):
+    audio_tokens: Optional[int] = None
+    """The number of audio tokens used in the Response."""
+
+    text_tokens: Optional[int] = None
+    """The number of text tokens used in the Response."""
+
+
+class RealtimeResponseUsage(BaseModel):
+    input_token_details: Optional[InputTokenDetails] = None
+    """Details about the input tokens used in the Response."""
+
+    input_tokens: Optional[int] = None
+    """
+    The number of input tokens used in the Response, including text and audio
+    tokens.
+    """
+
+    output_token_details: Optional[OutputTokenDetails] = None
+    """Details about the output tokens used in the Response."""
+
+    output_tokens: Optional[int] = None
+    """
+    The number of output tokens sent in the Response, including text and audio
+    tokens.
+    """
+
+    total_tokens: Optional[int] = None
+    """
+    The total number of tokens in the Response including input and output text and
+    audio tokens.
+    """

@@ -0,0 +1,72 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from .error_event import ErrorEvent
+from .response_done_event import ResponseDoneEvent
+from .session_created_event import SessionCreatedEvent
+from .session_updated_event import SessionUpdatedEvent
+from .response_created_event import ResponseCreatedEvent
+from .response_text_done_event import ResponseTextDoneEvent
+from .rate_limits_updated_event import RateLimitsUpdatedEvent
+from .response_audio_done_event import ResponseAudioDoneEvent
+from .response_text_delta_event import ResponseTextDeltaEvent
+from .conversation_created_event import ConversationCreatedEvent
+from .response_audio_delta_event import ResponseAudioDeltaEvent
+from .conversation_item_created_event import ConversationItemCreatedEvent
+from .conversation_item_deleted_event import ConversationItemDeletedEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent
+from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent
+from .response_output_item_added_event import ResponseOutputItemAddedEvent
+from .conversation_item_truncated_event import ConversationItemTruncatedEvent
+from .response_content_part_added_event import ResponseContentPartAddedEvent
+from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent
+from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent
+from .input_audio_buffer_speech_started_event import InputAudioBufferSpeechStartedEvent
+from .input_audio_buffer_speech_stopped_event import InputAudioBufferSpeechStoppedEvent
+from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
+from .conversation_item_input_audio_transcription_failed_event import ConversationItemInputAudioTranscriptionFailedEvent
+from .conversation_item_input_audio_transcription_completed_event import (
+    ConversationItemInputAudioTranscriptionCompletedEvent,
+)
+
+__all__ = ["RealtimeServerEvent"]
+
+RealtimeServerEvent: TypeAlias = Annotated[
+    Union[
+        ErrorEvent,
+        SessionCreatedEvent,
+        SessionUpdatedEvent,
+        ConversationCreatedEvent,
+        InputAudioBufferCommittedEvent,
+        InputAudioBufferClearedEvent,
+        InputAudioBufferSpeechStartedEvent,
+        InputAudioBufferSpeechStoppedEvent,
+        ConversationItemCreatedEvent,
+        ConversationItemInputAudioTranscriptionCompletedEvent,
+        ConversationItemInputAudioTranscriptionFailedEvent,
+        ConversationItemTruncatedEvent,
+        ConversationItemDeletedEvent,
+        ResponseCreatedEvent,
+        ResponseDoneEvent,
+        ResponseOutputItemAddedEvent,
+        ResponseOutputItemDoneEvent,
+        ResponseContentPartAddedEvent,
+        ResponseContentPartDoneEvent,
+        ResponseTextDeltaEvent,
+        ResponseTextDoneEvent,
+        ResponseAudioTranscriptDeltaEvent,
+        ResponseAudioTranscriptDoneEvent,
+        ResponseAudioDeltaEvent,
+        ResponseAudioDoneEvent,
+        ResponseFunctionCallArgumentsDeltaEvent,
+        ResponseFunctionCallArgumentsDoneEvent,
+        RateLimitsUpdatedEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioDeltaEvent"]
+
+
+class ResponseAudioDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    delta: str
+    """Base64-encoded audio data delta."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.audio.delta"]
+    """The event type, must be `response.audio.delta`."""

@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioDoneEvent"]
+
+
+class ResponseAudioDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.audio.done"]
+    """The event type, must be `response.audio.done`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDeltaEvent"]
+
+
+class ResponseAudioTranscriptDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    delta: str
+    """The transcript delta."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.audio_transcript.delta"]
+    """The event type, must be `response.audio_transcript.delta`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDoneEvent"]
+
+
+class ResponseAudioTranscriptDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    transcript: str
+    """The final transcript of the audio."""
+
+    type: Literal["response.audio_transcript.done"]
+    """The event type, must be `response.audio_transcript.done`."""

@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseCancelEvent"]
+
+
+class ResponseCancelEvent(BaseModel):
+    type: Literal["response.cancel"]
+    """The event type, must be `response.cancel`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
+
+    response_id: Optional[str] = None
+    """
+    A specific response ID to cancel - if not provided, will cancel an in-progress
+    response in the default conversation.
+    """

@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseCancelEventParam"]
+
+
+class ResponseCancelEventParam(TypedDict, total=False):
+    type: Required[Literal["response.cancel"]]
+    """The event type, must be `response.cancel`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
+
+    response_id: str
+    """
+    A specific response ID to cancel - if not provided, will cancel an in-progress
+    response in the default conversation.
+    """

@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseContentPartAddedEvent", "Part"]
+
+
+class Part(BaseModel):
+    audio: Optional[str] = None
+    """Base64-encoded audio data (if type is "audio")."""
+
+    text: Optional[str] = None
+    """The text content (if type is "text")."""
+
+    transcript: Optional[str] = None
+    """The transcript of the audio (if type is "audio")."""
+
+    type: Optional[Literal["text", "audio"]] = None
+    """The content type ("text", "audio")."""
+
+
+class ResponseContentPartAddedEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item to which the content part was added."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    part: Part
+    """The content part that was added."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.content_part.added"]
+    """The event type, must be `response.content_part.added`."""

@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseContentPartDoneEvent", "Part"]
+
+
+class Part(BaseModel):
+    audio: Optional[str] = None
+    """Base64-encoded audio data (if type is "audio")."""
+
+    text: Optional[str] = None
+    """The text content (if type is "text")."""
+
+    transcript: Optional[str] = None
+    """The transcript of the audio (if type is "audio")."""
+
+    type: Optional[Literal["text", "audio"]] = None
+    """The content type ("text", "audio")."""
+
+
+class ResponseContentPartDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    part: Part
+    """The content part that is done."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.content_part.done"]
+    """The event type, must be `response.content_part.done`."""

@@ -0,0 +1,115 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseCreateEvent", "Response", "ResponseTool"]
+
+
+class ResponseTool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
+
+
+class Response(BaseModel):
+    conversation: Union[str, Literal["auto", "none"], None] = None
+    """Controls which conversation the response is added to.
+
+    Currently supports `auto` and `none`, with `auto` as the default value. The
+    `auto` value means that the contents of the response will be added to the
+    default conversation. Set this to `none` to create an out-of-band response which
+    will not add items to default conversation.
+    """
+
+    input: Optional[List[ConversationItem]] = None
+    """Input items to include in the prompt for the model.
+
+    Creates a new context for this response, without including the default
+    conversation. Can include references to items from the default conversation.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    metadata: Optional[object] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maximum of 512 characters long.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: Optional[str] = None
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Optional[List[ResponseTool]] = None
+    """Tools (functions) available to the model."""
+
+    voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
+
+
+class ResponseCreateEvent(BaseModel):
+    type: Literal["response.create"]
+    """The event type, must be `response.create`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
+
+    response: Optional[Response] = None
+    """Create a new Realtime response with these parameters"""

@@ -0,0 +1,116 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .conversation_item_param import ConversationItemParam
+
+__all__ = ["ResponseCreateEventParam", "Response", "ResponseTool"]
+
+
+class ResponseTool(TypedDict, total=False):
+    description: str
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: str
+    """The name of the function."""
+
+    parameters: object
+    """Parameters of the function in JSON Schema."""
+
+    type: Literal["function"]
+    """The type of the tool, i.e. `function`."""
+
+
+class Response(TypedDict, total=False):
+    conversation: Union[str, Literal["auto", "none"]]
+    """Controls which conversation the response is added to.
+
+    Currently supports `auto` and `none`, with `auto` as the default value. The
+    `auto` value means that the contents of the response will be added to the
+    default conversation. Set this to `none` to create an out-of-band response which
+    will not add items to default conversation.
+    """
+
+    input: Iterable[ConversationItemParam]
+    """Input items to include in the prompt for the model.
+
+    Creates a new context for this response, without including the default
+    conversation. Can include references to items from the default conversation.
+    """
+
+    instructions: str
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"]]
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maximum of 512 characters long.
+    """
+
+    modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: float
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: str
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Iterable[ResponseTool]
+    """Tools (functions) available to the model."""
+
+    voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
+
+
+class ResponseCreateEventParam(TypedDict, total=False):
+    type: Required[Literal["response.create"]]
+    """The event type, must be `response.create`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
+
+    response: Response
+    """Create a new Realtime response with these parameters"""

@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .realtime_response import RealtimeResponse
+
+__all__ = ["ResponseCreatedEvent"]
+
+
+class ResponseCreatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    response: RealtimeResponse
+    """The response resource."""
+
+    type: Literal["response.created"]
+    """The event type, must be `response.created`."""

@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .realtime_response import RealtimeResponse
+
+__all__ = ["ResponseDoneEvent"]
+
+
+class ResponseDoneEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    response: RealtimeResponse
+    """The response resource."""
+
+    type: Literal["response.done"]
+    """The event type, must be `response.done`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDeltaEvent"]
+
+
+class ResponseFunctionCallArgumentsDeltaEvent(BaseModel):
+    call_id: str
+    """The ID of the function call."""
+
+    delta: str
+    """The arguments delta as a JSON string."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the function call item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.function_call_arguments.delta"]
+    """The event type, must be `response.function_call_arguments.delta`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDoneEvent"]
+
+
+class ResponseFunctionCallArgumentsDoneEvent(BaseModel):
+    arguments: str
+    """The final arguments as a JSON string."""
+
+    call_id: str
+    """The ID of the function call."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the function call item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.function_call_arguments.done"]
+    """The event type, must be `response.function_call_arguments.done`."""

@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseOutputItemAddedEvent"]
+
+
+class ResponseOutputItemAddedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """The item to add to the conversation."""
+
+    output_index: int
+    """The index of the output item in the Response."""
+
+    response_id: str
+    """The ID of the Response to which the item belongs."""
+
+    type: Literal["response.output_item.added"]
+    """The event type, must be `response.output_item.added`."""

@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseOutputItemDoneEvent"]
+
+
+class ResponseOutputItemDoneEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """The item to add to the conversation."""
+
+    output_index: int
+    """The index of the output item in the Response."""
+
+    response_id: str
+    """The ID of the Response to which the item belongs."""
+
+    type: Literal["response.output_item.done"]
+    """The event type, must be `response.output_item.done`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseTextDeltaEvent"]
+
+
+class ResponseTextDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    delta: str
+    """The text delta."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.text.delta"]
+    """The event type, must be `response.text.delta`."""

@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseTextDoneEvent"]
+
+
+class ResponseTextDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    text: str
+    """The final text content."""
+
+    type: Literal["response.text.done"]
+    """The event type, must be `response.text.done`."""

@@ -0,0 +1,148 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["Session", "InputAudioTranscription", "Tool", "TurnDetection"]
+
+
+class InputAudioTranscription(BaseModel):
+    model: Optional[str] = None
+    """
+    The model to use for transcription, `whisper-1` is the only currently supported
+    model.
+    """
+
+
+class Tool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
+
+
+class TurnDetection(BaseModel):
+    prefix_padding_ms: Optional[int] = None
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: Optional[Literal["server_vad"]] = None
+    """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class Session(BaseModel):
+    id: Optional[str] = None
+    """Unique identifier for the session object."""
+
+    input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    input_audio_transcription: Optional[InputAudioTranscription] = None
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through Whisper and should be treated as rough guidance rather
+    than the representation understood by the model.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    model: Union[
+        str,
+        Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ],
+        None,
+    ] = None
+    """The Realtime model used for this session."""
+
+    output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: Optional[str] = None
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Optional[List[Tool]] = None
+    """Tools (functions) available to the model."""
+
+    turn_detection: Optional[TurnDetection] = None
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+    voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """

@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .session import Session
+from ...._models import BaseModel
+
+__all__ = ["SessionCreatedEvent"]
+
+
+class SessionCreatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    session: Session
+    """Realtime session object configuration."""
+
+    type: Literal["session.created"]
+    """The event type, must be `session.created`."""

@@ -0,0 +1,158 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["SessionUpdateEvent", "Session", "SessionInputAudioTranscription", "SessionTool", "SessionTurnDetection"]
+
+
+class SessionInputAudioTranscription(BaseModel):
+    model: Optional[str] = None
+    """
+    The model to use for transcription, `whisper-1` is the only currently supported
+    model.
+    """
+
+
+class SessionTool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
+
+
+class SessionTurnDetection(BaseModel):
+    create_response: Optional[bool] = None
+    """Whether or not to automatically generate a response when VAD is enabled.
+
+    `true` by default.
+    """
+
+    prefix_padding_ms: Optional[int] = None
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: Optional[str] = None
+    """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class Session(BaseModel):
+    model: Literal[
+        "gpt-4o-realtime-preview",
+        "gpt-4o-realtime-preview-2024-10-01",
+        "gpt-4o-realtime-preview-2024-12-17",
+        "gpt-4o-mini-realtime-preview",
+        "gpt-4o-mini-realtime-preview-2024-12-17",
+    ]
+    """The Realtime model used for this session."""
+
+    input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    input_audio_transcription: Optional[SessionInputAudioTranscription] = None
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through Whisper and should be treated as rough guidance rather
+    than the representation understood by the model.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: Optional[str] = None
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Optional[List[SessionTool]] = None
+    """Tools (functions) available to the model."""
+
+    turn_detection: Optional[SessionTurnDetection] = None
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+    voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
+
+
+class SessionUpdateEvent(BaseModel):
+    session: Session
+    """Realtime session object configuration."""
+
+    type: Literal["session.update"]
+    """The event type, must be `session.update`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""

@@ -0,0 +1,166 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = [
+    "SessionUpdateEventParam",
+    "Session",
+    "SessionInputAudioTranscription",
+    "SessionTool",
+    "SessionTurnDetection",
+]
+
+
+class SessionInputAudioTranscription(TypedDict, total=False):
+    model: str
+    """
+    The model to use for transcription, `whisper-1` is the only currently supported
+    model.
+    """
+
+
+class SessionTool(TypedDict, total=False):
+    description: str
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: str
+    """The name of the function."""
+
+    parameters: object
+    """Parameters of the function in JSON Schema."""
+
+    type: Literal["function"]
+    """The type of the tool, i.e. `function`."""
+
+
+class SessionTurnDetection(TypedDict, total=False):
+    create_response: bool
+    """Whether or not to automatically generate a response when VAD is enabled.
+
+    `true` by default.
+    """
+
+    prefix_padding_ms: int
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: int
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: float
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: str
+    """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class Session(TypedDict, total=False):
+    model: Required[
+        Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+    ]
+    """The Realtime model used for this session."""
+
+    input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    input_audio_transcription: SessionInputAudioTranscription
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through Whisper and should be treated as rough guidance rather
+    than the representation understood by the model.
+    """
+
+    instructions: str
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"]]
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: float
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: str
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Iterable[SessionTool]
+    """Tools (functions) available to the model."""
+
+    turn_detection: SessionTurnDetection
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+    voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
+
+
+class SessionUpdateEventParam(TypedDict, total=False):
+    session: Required[Session]
+    """Realtime session object configuration."""
+
+    type: Required[Literal["session.update"]]
+    """The event type, must be `session.update`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""

@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .session import Session
+from ...._models import BaseModel
+
+__all__ = ["SessionUpdatedEvent"]
+
+
+class SessionUpdatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    session: Session
+    """Realtime session object configuration."""
+
+    type: Literal["session.updated"]
+    """The event type, must be `session.updated`."""

@@ -47,6 +47,7 @@ from .moderation_create_params import ModerationCreateParams as ModerationCreate
 from .create_embedding_response import CreateEmbeddingResponse as CreateEmbeddingResponse
 from .moderation_create_response import ModerationCreateResponse as ModerationCreateResponse
 from .moderation_text_input_param import ModerationTextInputParam as ModerationTextInputParam
+from .websocket_connection_options import WebsocketConnectionOptions as WebsocketConnectionOptions
 from .image_create_variation_params import ImageCreateVariationParams as ImageCreateVariationParams
 from .moderation_image_url_input_param import ModerationImageURLInputParam as ModerationImageURLInputParam
 from .moderation_multi_modal_input_param import ModerationMultiModalInputParam as ModerationMultiModalInputParam

@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing_extensions import Sequence, TypedDict
+
+if TYPE_CHECKING:
+    from websockets import Subprotocol
+    from websockets.extensions import ClientExtensionFactory
+
+
+class WebsocketConnectionOptions(TypedDict, total=False):
+    """Websocket connection options copied from `websockets`.
+
+    For example: https://websockets.readthedocs.io/en/stable/reference/asyncio/client.html#websockets.asyncio.client.connect
+    """
+
+    extensions: Sequence[ClientExtensionFactory] | None
+    """List of supported extensions, in order in which they should be negotiated and run."""
+
+    subprotocols: Sequence[Subprotocol] | None
+    """List of supported subprotocols, in order of decreasing preference."""
+
+    compression: str | None
+    """The “permessage-deflate” extension is enabled by default. Set compression to None to disable it. See the [compression guide](https://websockets.readthedocs.io/en/stable/topics/compression.html) for details."""
+
+    # limits
+    max_size: int | None
+    """Maximum size of incoming messages in bytes. None disables the limit."""
+
+    max_queue: int | None | tuple[int | None, int | None]
+    """High-water mark of the buffer where frames are received. It defaults to 16 frames. The low-water mark defaults to max_queue // 4. You may pass a (high, low) tuple to set the high-water and low-water marks. If you want to disable flow control entirely, you may set it to None, although that’s a bad idea."""
+
+    write_limit: int | tuple[int, int | None]
+    """High-water mark of write buffer in bytes. It is passed to set_write_buffer_limits(). It defaults to 32 KiB. You may pass a (high, low) tuple to set the high-water and low-water marks."""

@@ -63,6 +63,14 @@ class OpenAI(SyncAPIClient):
     organization: str | None
     project: str | None
 
+    websocket_base_url: str | httpx.URL | None
+    """Base URL for WebSocket connections.
+
+    If not specified, the default base URL will be used, with 'wss://' replacing the
+    'http://' or 'https://' scheme. For example: 'http://example.com' becomes
+    'wss://example.com'
+    """
+
     def __init__(
         self,
         *,
@@ -70,6 +78,7 @@ class OpenAI(SyncAPIClient):
         organization: str | None = None,
         project: str | None = None,
         base_url: str | httpx.URL | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -111,6 +120,8 @@ class OpenAI(SyncAPIClient):
             project = os.environ.get("OPENAI_PROJECT_ID")
         self.project = project
 
+        self.websocket_base_url = websocket_base_url
+
         if base_url is None:
             base_url = os.environ.get("OPENAI_BASE_URL")
         if base_url is None:
@@ -172,6 +183,7 @@ class OpenAI(SyncAPIClient):
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.Client | None = None,
@@ -208,6 +220,7 @@ class OpenAI(SyncAPIClient):
             api_key=api_key or self.api_key,
             organization=organization or self.organization,
             project=project or self.project,
+            websocket_base_url=websocket_base_url or self.websocket_base_url,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
@@ -277,6 +290,14 @@ class AsyncOpenAI(AsyncAPIClient):
     organization: str | None
     project: str | None
 
+    websocket_base_url: str | httpx.URL | None
+    """Base URL for WebSocket connections.
+
+    If not specified, the default base URL will be used, with 'wss://' replacing the
+    'http://' or 'https://' scheme. For example: 'http://example.com' becomes
+    'wss://example.com'
+    """
+
     def __init__(
         self,
         *,
@@ -284,6 +305,7 @@ class AsyncOpenAI(AsyncAPIClient):
         organization: str | None = None,
         project: str | None = None,
         base_url: str | httpx.URL | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -325,6 +347,8 @@ class AsyncOpenAI(AsyncAPIClient):
             project = os.environ.get("OPENAI_PROJECT_ID")
         self.project = project
 
+        self.websocket_base_url = websocket_base_url
+
         if base_url is None:
             base_url = os.environ.get("OPENAI_BASE_URL")
         if base_url is None:
@@ -386,6 +410,7 @@ class AsyncOpenAI(AsyncAPIClient):
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.AsyncClient | None = None,
@@ -422,6 +447,7 @@ class AsyncOpenAI(AsyncAPIClient):
             api_key=api_key or self.api_key,
             organization=organization or self.organization,
             project=project or self.project,
+            websocket_base_url=websocket_base_url or self.websocket_base_url,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,

@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+
+import pytest
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestRealtime:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+
+class TestAsyncRealtime:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])

@@ -1,2 +1,2 @@
 configured_endpoints: 69
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-779ea2754025daf5e18eb8ceb203ec321692636bc3a999338556a479178efa6c.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-0d64ca9e45f51b4279f87b205eeb3a3576df98407698ce053f2e2302c1c08df1.yml

@@ -239,6 +239,57 @@ Methods:
 
 ## Realtime
 
+Types:
+
+```python
+from openai.types.beta.realtime import (
+    ConversationCreatedEvent,
+    ConversationItem,
+    ConversationItemContent,
+    ConversationItemCreateEvent,
+    ConversationItemCreatedEvent,
+    ConversationItemDeleteEvent,
+    ConversationItemDeletedEvent,
+    ConversationItemInputAudioTranscriptionCompletedEvent,
+    ConversationItemInputAudioTranscriptionFailedEvent,
+    ConversationItemTruncateEvent,
+    ConversationItemTruncatedEvent,
+    ErrorEvent,
+    InputAudioBufferAppendEvent,
+    InputAudioBufferClearEvent,
+    InputAudioBufferClearedEvent,
+    InputAudioBufferCommitEvent,
+    InputAudioBufferCommittedEvent,
+    InputAudioBufferSpeechStartedEvent,
+    InputAudioBufferSpeechStoppedEvent,
+    RateLimitsUpdatedEvent,
+    RealtimeClientEvent,
+    RealtimeResponse,
+    RealtimeResponseStatus,
+    RealtimeResponseUsage,
+    RealtimeServerEvent,
+    ResponseAudioDeltaEvent,
+    ResponseAudioDoneEvent,
+    ResponseAudioTranscriptDeltaEvent,
+    ResponseAudioTranscriptDoneEvent,
+    ResponseCancelEvent,
+    ResponseContentPartAddedEvent,
+    ResponseContentPartDoneEvent,
+    ResponseCreateEvent,
+    ResponseCreatedEvent,
+    ResponseDoneEvent,
+    ResponseFunctionCallArgumentsDeltaEvent,
+    ResponseFunctionCallArgumentsDoneEvent,
+    ResponseOutputItemAddedEvent,
+    ResponseOutputItemDoneEvent,
+    ResponseTextDeltaEvent,
+    ResponseTextDoneEvent,
+    SessionCreatedEvent,
+    SessionUpdateEvent,
+    SessionUpdatedEvent,
+)
+```
+
 ### Sessions
 
 Types:

@@ -35,9 +35,6 @@ classifiers = [
   "License :: OSI Approved :: Apache Software License"
 ]
 
-[project.optional-dependencies]
-datalib = ["numpy >= 1", "pandas >= 1.2.3", "pandas-stubs >= 1.1.0.11"]
-
 [project.urls]
 Homepage = "https://github.com/openai/openai-python"
 Repository = "https://github.com/openai/openai-python"
@@ -45,6 +42,10 @@ Repository = "https://github.com/openai/openai-python"
 [project.scripts]
 openai = "openai.cli:main"
 
+[project.optional-dependencies]
+realtime = ["websockets >= 13, < 15"]
+datalib = ["numpy >= 1", "pandas >= 1.2.3", "pandas-stubs >= 1.1.0.11"]
+
 [tool.rye]
 managed = true
 # version pins are in requirements-dev.lock

@@ -185,5 +185,7 @@ urllib3==2.2.1
     # via requests
 virtualenv==20.24.5
     # via nox
+websockets==14.1
+    # via openai
 zipp==3.17.0
     # via importlib-metadata

@@ -64,3 +64,5 @@ typing-extensions==4.12.2
     # via pydantic-core
 tzdata==2024.1
     # via pandas
+websockets==14.1
+    # via openai

Commit 5fdba486

Commit `5fdba486`