openai-python/src/openai/resources/realtime/realtime.py at main

   1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
   2
   3from __future__ import annotations
   4
   5import json
   6import logging
   7from types import TracebackType
   8from typing import TYPE_CHECKING, Any, Iterator, cast
   9from typing_extensions import AsyncIterator
  10
  11import httpx
  12from pydantic import BaseModel
  13
  14from .calls import (
  15    Calls,
  16    AsyncCalls,
  17    CallsWithRawResponse,
  18    AsyncCallsWithRawResponse,
  19    CallsWithStreamingResponse,
  20    AsyncCallsWithStreamingResponse,
  21)
  22from ..._types import Omit, Query, Headers, omit
  23from ..._utils import (
  24    is_azure_client,
  25    maybe_transform,
  26    strip_not_given,
  27    async_maybe_transform,
  28    is_async_azure_client,
  29)
  30from ..._compat import cached_property
  31from ..._models import construct_type_unchecked
  32from ..._resource import SyncAPIResource, AsyncAPIResource
  33from ..._exceptions import OpenAIError
  34from ..._base_client import _merge_mappings
  35from .client_secrets import (
  36    ClientSecrets,
  37    AsyncClientSecrets,
  38    ClientSecretsWithRawResponse,
  39    AsyncClientSecretsWithRawResponse,
  40    ClientSecretsWithStreamingResponse,
  41    AsyncClientSecretsWithStreamingResponse,
  42)
  43from ...types.realtime import session_update_event_param
  44from ...types.websocket_connection_options import WebsocketConnectionOptions
  45from ...types.realtime.realtime_client_event import RealtimeClientEvent
  46from ...types.realtime.realtime_server_event import RealtimeServerEvent
  47from ...types.realtime.conversation_item_param import ConversationItemParam
  48from ...types.realtime.realtime_client_event_param import RealtimeClientEventParam
  49from ...types.realtime.realtime_response_create_params_param import RealtimeResponseCreateParamsParam
  50
  51if TYPE_CHECKING:
  52    from websockets.sync.client import ClientConnection as WebsocketConnection
  53    from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
  54
  55    from ..._client import OpenAI, AsyncOpenAI
  56
  57__all__ = ["Realtime", "AsyncRealtime"]
  58
  59log: logging.Logger = logging.getLogger(__name__)
  60
  61
  62class Realtime(SyncAPIResource):
  63    @cached_property
  64    def client_secrets(self) -> ClientSecrets:
  65        return ClientSecrets(self._client)
  66
  67    @cached_property
  68    def calls(self) -> Calls:
  69        from ...lib._realtime import _Calls
  70
  71        return _Calls(self._client)
  72
  73    @cached_property
  74    def with_raw_response(self) -> RealtimeWithRawResponse:
  75        """
  76        This property can be used as a prefix for any HTTP method call to return
  77        the raw response object instead of the parsed content.
  78
  79        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  80        """
  81        return RealtimeWithRawResponse(self)
  82
  83    @cached_property
  84    def with_streaming_response(self) -> RealtimeWithStreamingResponse:
  85        """
  86        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  87
  88        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  89        """
  90        return RealtimeWithStreamingResponse(self)
  91
  92    def connect(
  93        self,
  94        *,
  95        call_id: str | Omit = omit,
  96        model: str | Omit = omit,
  97        extra_query: Query = {},
  98        extra_headers: Headers = {},
  99        websocket_connection_options: WebsocketConnectionOptions = {},
 100    ) -> RealtimeConnectionManager:
 101        """
 102        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
 103
 104        Some notable benefits of the API include:
 105
 106        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
 107        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
 108        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
 109
 110        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
 111        """
 112        return RealtimeConnectionManager(
 113            client=self._client,
 114            extra_query=extra_query,
 115            extra_headers=extra_headers,
 116            websocket_connection_options=websocket_connection_options,
 117            call_id=call_id,
 118            model=model,
 119        )
 120
 121
 122class AsyncRealtime(AsyncAPIResource):
 123    @cached_property
 124    def client_secrets(self) -> AsyncClientSecrets:
 125        return AsyncClientSecrets(self._client)
 126
 127    @cached_property
 128    def calls(self) -> AsyncCalls:
 129        from ...lib._realtime import _AsyncCalls
 130
 131        return _AsyncCalls(self._client)
 132
 133    @cached_property
 134    def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
 135        """
 136        This property can be used as a prefix for any HTTP method call to return
 137        the raw response object instead of the parsed content.
 138
 139        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
 140        """
 141        return AsyncRealtimeWithRawResponse(self)
 142
 143    @cached_property
 144    def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
 145        """
 146        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 147
 148        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
 149        """
 150        return AsyncRealtimeWithStreamingResponse(self)
 151
 152    def connect(
 153        self,
 154        *,
 155        call_id: str | Omit = omit,
 156        model: str | Omit = omit,
 157        extra_query: Query = {},
 158        extra_headers: Headers = {},
 159        websocket_connection_options: WebsocketConnectionOptions = {},
 160    ) -> AsyncRealtimeConnectionManager:
 161        """
 162        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
 163
 164        Some notable benefits of the API include:
 165
 166        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
 167        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
 168        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
 169
 170        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
 171        """
 172        return AsyncRealtimeConnectionManager(
 173            client=self._client,
 174            extra_query=extra_query,
 175            extra_headers=extra_headers,
 176            websocket_connection_options=websocket_connection_options,
 177            call_id=call_id,
 178            model=model,
 179        )
 180
 181
 182class RealtimeWithRawResponse:
 183    def __init__(self, realtime: Realtime) -> None:
 184        self._realtime = realtime
 185
 186    @cached_property
 187    def client_secrets(self) -> ClientSecretsWithRawResponse:
 188        return ClientSecretsWithRawResponse(self._realtime.client_secrets)
 189
 190    @cached_property
 191    def calls(self) -> CallsWithRawResponse:
 192        return CallsWithRawResponse(self._realtime.calls)
 193
 194
 195class AsyncRealtimeWithRawResponse:
 196    def __init__(self, realtime: AsyncRealtime) -> None:
 197        self._realtime = realtime
 198
 199    @cached_property
 200    def client_secrets(self) -> AsyncClientSecretsWithRawResponse:
 201        return AsyncClientSecretsWithRawResponse(self._realtime.client_secrets)
 202
 203    @cached_property
 204    def calls(self) -> AsyncCallsWithRawResponse:
 205        return AsyncCallsWithRawResponse(self._realtime.calls)
 206
 207
 208class RealtimeWithStreamingResponse:
 209    def __init__(self, realtime: Realtime) -> None:
 210        self._realtime = realtime
 211
 212    @cached_property
 213    def client_secrets(self) -> ClientSecretsWithStreamingResponse:
 214        return ClientSecretsWithStreamingResponse(self._realtime.client_secrets)
 215
 216    @cached_property
 217    def calls(self) -> CallsWithStreamingResponse:
 218        return CallsWithStreamingResponse(self._realtime.calls)
 219
 220
 221class AsyncRealtimeWithStreamingResponse:
 222    def __init__(self, realtime: AsyncRealtime) -> None:
 223        self._realtime = realtime
 224
 225    @cached_property
 226    def client_secrets(self) -> AsyncClientSecretsWithStreamingResponse:
 227        return AsyncClientSecretsWithStreamingResponse(self._realtime.client_secrets)
 228
 229    @cached_property
 230    def calls(self) -> AsyncCallsWithStreamingResponse:
 231        return AsyncCallsWithStreamingResponse(self._realtime.calls)
 232
 233
 234class AsyncRealtimeConnection:
 235    """Represents a live websocket connection to the Realtime API"""
 236
 237    session: AsyncRealtimeSessionResource
 238    response: AsyncRealtimeResponseResource
 239    input_audio_buffer: AsyncRealtimeInputAudioBufferResource
 240    conversation: AsyncRealtimeConversationResource
 241    output_audio_buffer: AsyncRealtimeOutputAudioBufferResource
 242
 243    _connection: AsyncWebsocketConnection
 244
 245    def __init__(self, connection: AsyncWebsocketConnection) -> None:
 246        self._connection = connection
 247
 248        self.session = AsyncRealtimeSessionResource(self)
 249        self.response = AsyncRealtimeResponseResource(self)
 250        self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
 251        self.conversation = AsyncRealtimeConversationResource(self)
 252        self.output_audio_buffer = AsyncRealtimeOutputAudioBufferResource(self)
 253
 254    async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
 255        """
 256        An infinite-iterator that will continue to yield events until
 257        the connection is closed.
 258        """
 259        from websockets.exceptions import ConnectionClosedOK
 260
 261        try:
 262            while True:
 263                yield await self.recv()
 264        except ConnectionClosedOK:
 265            return
 266
 267    async def recv(self) -> RealtimeServerEvent:
 268        """
 269        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
 270
 271        Canceling this method is safe. There's no risk of losing data.
 272        """
 273        return self.parse_event(await self.recv_bytes())
 274
 275    async def recv_bytes(self) -> bytes:
 276        """Receive the next message from the connection as raw bytes.
 277
 278        Canceling this method is safe. There's no risk of losing data.
 279
 280        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
 281        then you can call `.parse_event(data)`.
 282        """
 283        message = await self._connection.recv(decode=False)
 284        log.debug(f"Received websocket message: %s", message)
 285        return message
 286
 287    async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
 288        data = (
 289            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
 290            if isinstance(event, BaseModel)
 291            else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
 292        )
 293        await self._connection.send(data)
 294
 295    async def close(self, *, code: int = 1000, reason: str = "") -> None:
 296        await self._connection.close(code=code, reason=reason)
 297
 298    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
 299        """
 300        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
 301
 302        This is helpful if you're using `.recv_bytes()`.
 303        """
 304        return cast(
 305            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
 306        )
 307
 308
 309class AsyncRealtimeConnectionManager:
 310    """
 311    Context manager over a `AsyncRealtimeConnection` that is returned by `realtime.connect()`
 312
 313    This context manager ensures that the connection will be closed when it exits.
 314
 315    ---
 316
 317    Note that if your application doesn't work well with the context manager approach then you
 318    can call the `.enter()` method directly to initiate a connection.
 319
 320    **Warning**: You must remember to close the connection with `.close()`.
 321
 322    ```py
 323    connection = await client.realtime.connect(...).enter()
 324    # ...
 325    await connection.close()
 326    ```
 327    """
 328
 329    def __init__(
 330        self,
 331        *,
 332        client: AsyncOpenAI,
 333        call_id: str | Omit = omit,
 334        model: str | Omit = omit,
 335        extra_query: Query,
 336        extra_headers: Headers,
 337        websocket_connection_options: WebsocketConnectionOptions,
 338    ) -> None:
 339        self.__client = client
 340        self.__call_id = call_id
 341        self.__model = model
 342        self.__connection: AsyncRealtimeConnection | None = None
 343        self.__extra_query = extra_query
 344        self.__extra_headers = extra_headers
 345        self.__websocket_connection_options = websocket_connection_options
 346
 347    async def __aenter__(self) -> AsyncRealtimeConnection:
 348        """
 349        👋 If your application doesn't work well with the context manager approach then you
 350        can call this method directly to initiate a connection.
 351
 352        **Warning**: You must remember to close the connection with `.close()`.
 353
 354        ```py
 355        connection = await client.realtime.connect(...).enter()
 356        # ...
 357        await connection.close()
 358        ```
 359        """
 360        try:
 361            from websockets.asyncio.client import connect
 362        except ImportError as exc:
 363            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
 364
 365        extra_query = self.__extra_query
 366        await self.__client._refresh_api_key()
 367        auth_headers = self.__client.auth_headers
 368        extra_query = self.__extra_query
 369        if self.__call_id is not omit:
 370            extra_query = {**extra_query, "call_id": self.__call_id}
 371        if is_async_azure_client(self.__client):
 372            model = self.__model
 373            if not model:
 374                raise OpenAIError("`model` is required for Azure Realtime API")
 375            else:
 376                url, auth_headers = await self.__client._configure_realtime(model, extra_query)
 377        else:
 378            url = self._prepare_url().copy_with(
 379                params={
 380                    **self.__client.base_url.params,
 381                    **({"model": self.__model} if self.__model is not omit else {}),
 382                    **extra_query,
 383                },
 384            )
 385        log.debug("Connecting to %s", url)
 386        if self.__websocket_connection_options:
 387            log.debug("Connection options: %s", self.__websocket_connection_options)
 388
 389        self.__connection = AsyncRealtimeConnection(
 390            await connect(
 391                str(url),
 392                user_agent_header=self.__client.user_agent,
 393                additional_headers=_merge_mappings(
 394                    {
 395                        **auth_headers,
 396                    },
 397                    self.__extra_headers,
 398                ),
 399                **self.__websocket_connection_options,
 400            )
 401        )
 402
 403        return self.__connection
 404
 405    enter = __aenter__
 406
 407    def _prepare_url(self) -> httpx.URL:
 408        if self.__client.websocket_base_url is not None:
 409            base_url = httpx.URL(self.__client.websocket_base_url)
 410        else:
 411            base_url = self.__client._base_url.copy_with(scheme="wss")
 412
 413        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
 414        return base_url.copy_with(raw_path=merge_raw_path)
 415
 416    async def __aexit__(
 417        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
 418    ) -> None:
 419        if self.__connection is not None:
 420            await self.__connection.close()
 421
 422
 423class RealtimeConnection:
 424    """Represents a live websocket connection to the Realtime API"""
 425
 426    session: RealtimeSessionResource
 427    response: RealtimeResponseResource
 428    input_audio_buffer: RealtimeInputAudioBufferResource
 429    conversation: RealtimeConversationResource
 430    output_audio_buffer: RealtimeOutputAudioBufferResource
 431
 432    _connection: WebsocketConnection
 433
 434    def __init__(self, connection: WebsocketConnection) -> None:
 435        self._connection = connection
 436
 437        self.session = RealtimeSessionResource(self)
 438        self.response = RealtimeResponseResource(self)
 439        self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
 440        self.conversation = RealtimeConversationResource(self)
 441        self.output_audio_buffer = RealtimeOutputAudioBufferResource(self)
 442
 443    def __iter__(self) -> Iterator[RealtimeServerEvent]:
 444        """
 445        An infinite-iterator that will continue to yield events until
 446        the connection is closed.
 447        """
 448        from websockets.exceptions import ConnectionClosedOK
 449
 450        try:
 451            while True:
 452                yield self.recv()
 453        except ConnectionClosedOK:
 454            return
 455
 456    def recv(self) -> RealtimeServerEvent:
 457        """
 458        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
 459
 460        Canceling this method is safe. There's no risk of losing data.
 461        """
 462        return self.parse_event(self.recv_bytes())
 463
 464    def recv_bytes(self) -> bytes:
 465        """Receive the next message from the connection as raw bytes.
 466
 467        Canceling this method is safe. There's no risk of losing data.
 468
 469        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
 470        then you can call `.parse_event(data)`.
 471        """
 472        message = self._connection.recv(decode=False)
 473        log.debug(f"Received websocket message: %s", message)
 474        return message
 475
 476    def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
 477        data = (
 478            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
 479            if isinstance(event, BaseModel)
 480            else json.dumps(maybe_transform(event, RealtimeClientEventParam))
 481        )
 482        self._connection.send(data)
 483
 484    def close(self, *, code: int = 1000, reason: str = "") -> None:
 485        self._connection.close(code=code, reason=reason)
 486
 487    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
 488        """
 489        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
 490
 491        This is helpful if you're using `.recv_bytes()`.
 492        """
 493        return cast(
 494            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
 495        )
 496
 497
 498class RealtimeConnectionManager:
 499    """
 500    Context manager over a `RealtimeConnection` that is returned by `realtime.connect()`
 501
 502    This context manager ensures that the connection will be closed when it exits.
 503
 504    ---
 505
 506    Note that if your application doesn't work well with the context manager approach then you
 507    can call the `.enter()` method directly to initiate a connection.
 508
 509    **Warning**: You must remember to close the connection with `.close()`.
 510
 511    ```py
 512    connection = client.realtime.connect(...).enter()
 513    # ...
 514    connection.close()
 515    ```
 516    """
 517
 518    def __init__(
 519        self,
 520        *,
 521        client: OpenAI,
 522        call_id: str | Omit = omit,
 523        model: str | Omit = omit,
 524        extra_query: Query,
 525        extra_headers: Headers,
 526        websocket_connection_options: WebsocketConnectionOptions,
 527    ) -> None:
 528        self.__client = client
 529        self.__call_id = call_id
 530        self.__model = model
 531        self.__connection: RealtimeConnection | None = None
 532        self.__extra_query = extra_query
 533        self.__extra_headers = extra_headers
 534        self.__websocket_connection_options = websocket_connection_options
 535
 536    def __enter__(self) -> RealtimeConnection:
 537        """
 538        👋 If your application doesn't work well with the context manager approach then you
 539        can call this method directly to initiate a connection.
 540
 541        **Warning**: You must remember to close the connection with `.close()`.
 542
 543        ```py
 544        connection = client.realtime.connect(...).enter()
 545        # ...
 546        connection.close()
 547        ```
 548        """
 549        try:
 550            from websockets.sync.client import connect
 551        except ImportError as exc:
 552            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
 553
 554        extra_query = self.__extra_query
 555        self.__client._refresh_api_key()
 556        auth_headers = self.__client.auth_headers
 557        extra_query = self.__extra_query
 558        if self.__call_id is not omit:
 559            extra_query = {**extra_query, "call_id": self.__call_id}
 560        if is_azure_client(self.__client):
 561            model = self.__model
 562            if not model:
 563                raise OpenAIError("`model` is required for Azure Realtime API")
 564            else:
 565                url, auth_headers = self.__client._configure_realtime(model, extra_query)
 566        else:
 567            url = self._prepare_url().copy_with(
 568                params={
 569                    **self.__client.base_url.params,
 570                    **({"model": self.__model} if self.__model is not omit else {}),
 571                    **extra_query,
 572                },
 573            )
 574        log.debug("Connecting to %s", url)
 575        if self.__websocket_connection_options:
 576            log.debug("Connection options: %s", self.__websocket_connection_options)
 577
 578        self.__connection = RealtimeConnection(
 579            connect(
 580                str(url),
 581                user_agent_header=self.__client.user_agent,
 582                additional_headers=_merge_mappings(
 583                    {
 584                        **auth_headers,
 585                    },
 586                    self.__extra_headers,
 587                ),
 588                **self.__websocket_connection_options,
 589            )
 590        )
 591
 592        return self.__connection
 593
 594    enter = __enter__
 595
 596    def _prepare_url(self) -> httpx.URL:
 597        if self.__client.websocket_base_url is not None:
 598            base_url = httpx.URL(self.__client.websocket_base_url)
 599        else:
 600            base_url = self.__client._base_url.copy_with(scheme="wss")
 601
 602        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
 603        return base_url.copy_with(raw_path=merge_raw_path)
 604
 605    def __exit__(
 606        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
 607    ) -> None:
 608        if self.__connection is not None:
 609            self.__connection.close()
 610
 611
 612class BaseRealtimeConnectionResource:
 613    def __init__(self, connection: RealtimeConnection) -> None:
 614        self._connection = connection
 615
 616
 617class RealtimeSessionResource(BaseRealtimeConnectionResource):
 618    def update(self, *, session: session_update_event_param.Session, event_id: str | Omit = omit) -> None:
 619        """
 620        Send this event to update the session’s configuration.
 621        The client may send this event at any time to update any field
 622        except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
 623
 624        When the server receives a `session.update`, it will respond
 625        with a `session.updated` event showing the full, effective configuration.
 626        Only the fields that are present in the `session.update` are updated. To clear a field like
 627        `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array.
 628        To clear a field like `turn_detection`, pass `null`.
 629        """
 630        self._connection.send(
 631            cast(
 632                RealtimeClientEventParam,
 633                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
 634            )
 635        )
 636
 637
 638class RealtimeResponseResource(BaseRealtimeConnectionResource):
 639    def create(self, *, event_id: str | Omit = omit, response: RealtimeResponseCreateParamsParam | Omit = omit) -> None:
 640        """
 641        This event instructs the server to create a Response, which means triggering
 642        model inference. When in Server VAD mode, the server will create Responses
 643        automatically.
 644
 645        A Response will include at least one Item, and may have two, in which case
 646        the second will be a function call. These Items will be appended to the
 647        conversation history by default.
 648
 649        The server will respond with a `response.created` event, events for Items
 650        and content created, and finally a `response.done` event to indicate the
 651        Response is complete.
 652
 653        The `response.create` event includes inference configuration like
 654        `instructions` and `tools`. If these are set, they will override the Session's
 655        configuration for this Response only.
 656
 657        Responses can be created out-of-band of the default Conversation, meaning that they can
 658        have arbitrary input, and it's possible to disable writing the output to the Conversation.
 659        Only one Response can write to the default Conversation at a time, but otherwise multiple
 660        Responses can be created in parallel. The `metadata` field is a good way to disambiguate
 661        multiple simultaneous Responses.
 662
 663        Clients can set `conversation` to `none` to create a Response that does not write to the default
 664        Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting
 665        raw Items and references to existing Items.
 666        """
 667        self._connection.send(
 668            cast(
 669                RealtimeClientEventParam,
 670                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
 671            )
 672        )
 673
 674    def cancel(self, *, event_id: str | Omit = omit, response_id: str | Omit = omit) -> None:
 675        """Send this event to cancel an in-progress response.
 676
 677        The server will respond
 678        with a `response.done` event with a status of `response.status=cancelled`. If
 679        there is no response to cancel, the server will respond with an error. It's safe
 680        to call `response.cancel` even if no response is in progress, an error will be
 681        returned the session will remain unaffected.
 682        """
 683        self._connection.send(
 684            cast(
 685                RealtimeClientEventParam,
 686                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
 687            )
 688        )
 689
 690
 691class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
 692    def clear(self, *, event_id: str | Omit = omit) -> None:
 693        """Send this event to clear the audio bytes in the buffer.
 694
 695        The server will
 696        respond with an `input_audio_buffer.cleared` event.
 697        """
 698        self._connection.send(
 699            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
 700        )
 701
 702    def commit(self, *, event_id: str | Omit = omit) -> None:
 703        """
 704        Send this event to commit the user input audio buffer, which will create a  new user message item in the conversation. This event will produce an error  if the input audio buffer is empty. When in Server VAD mode, the client does  not need to send this event, the server will commit the audio buffer  automatically.
 705
 706        Committing the input audio buffer will trigger input audio transcription  (if enabled in session configuration), but it will not create a response  from the model. The server will respond with an `input_audio_buffer.committed` event.
 707        """
 708        self._connection.send(
 709            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
 710        )
 711
 712    def append(self, *, audio: str, event_id: str | Omit = omit) -> None:
 713        """Send this event to append audio bytes to the input audio buffer.
 714
 715        The audio
 716        buffer is temporary storage you can write to and later commit. A "commit" will create a new
 717        user message item in the conversation history from the buffer content and clear the buffer.
 718        Input audio transcription (if enabled) will be generated when the buffer is committed.
 719
 720        If VAD is enabled the audio buffer is used to detect speech and the server will decide
 721        when to commit. When Server VAD is disabled, you must commit the audio buffer
 722        manually. Input audio noise reduction operates on writes to the audio buffer.
 723
 724        The client may choose how much audio to place in each event up to a maximum
 725        of 15 MiB, for example streaming smaller chunks from the client may allow the
 726        VAD to be more responsive. Unlike most other client events, the server will
 727        not send a confirmation response to this event.
 728        """
 729        self._connection.send(
 730            cast(
 731                RealtimeClientEventParam,
 732                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
 733            )
 734        )
 735
 736
 737class RealtimeConversationResource(BaseRealtimeConnectionResource):
 738    @cached_property
 739    def item(self) -> RealtimeConversationItemResource:
 740        return RealtimeConversationItemResource(self._connection)
 741
 742
 743class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
 744    def delete(self, *, item_id: str, event_id: str | Omit = omit) -> None:
 745        """Send this event when you want to remove any item from the conversation
 746        history.
 747
 748        The server will respond with a `conversation.item.deleted` event,
 749        unless the item does not exist in the conversation history, in which case the
 750        server will respond with an error.
 751        """
 752        self._connection.send(
 753            cast(
 754                RealtimeClientEventParam,
 755                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
 756            )
 757        )
 758
 759    def create(
 760        self, *, item: ConversationItemParam, event_id: str | Omit = omit, previous_item_id: str | Omit = omit
 761    ) -> None:
 762        """
 763        Add a new Item to the Conversation's context, including messages, function
 764        calls, and function call responses. This event can be used both to populate a
 765        "history" of the conversation and to add new items mid-stream, but has the
 766        current limitation that it cannot populate assistant audio messages.
 767
 768        If successful, the server will respond with a `conversation.item.created`
 769        event, otherwise an `error` event will be sent.
 770        """
 771        self._connection.send(
 772            cast(
 773                RealtimeClientEventParam,
 774                strip_not_given(
 775                    {
 776                        "type": "conversation.item.create",
 777                        "item": item,
 778                        "event_id": event_id,
 779                        "previous_item_id": previous_item_id,
 780                    }
 781                ),
 782            )
 783        )
 784
 785    def truncate(self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | Omit = omit) -> None:
 786        """Send this event to truncate a previous assistant message’s audio.
 787
 788        The server
 789        will produce audio faster than realtime, so this event is useful when the user
 790        interrupts to truncate audio that has already been sent to the client but not
 791        yet played. This will synchronize the server's understanding of the audio with
 792        the client's playback.
 793
 794        Truncating audio will delete the server-side text transcript to ensure there
 795        is not text in the context that hasn't been heard by the user.
 796
 797        If successful, the server will respond with a `conversation.item.truncated`
 798        event.
 799        """
 800        self._connection.send(
 801            cast(
 802                RealtimeClientEventParam,
 803                strip_not_given(
 804                    {
 805                        "type": "conversation.item.truncate",
 806                        "audio_end_ms": audio_end_ms,
 807                        "content_index": content_index,
 808                        "item_id": item_id,
 809                        "event_id": event_id,
 810                    }
 811                ),
 812            )
 813        )
 814
 815    def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None:
 816        """
 817        Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
 818        The server will respond with a `conversation.item.retrieved` event,
 819        unless the item does not exist in the conversation history, in which case the
 820        server will respond with an error.
 821        """
 822        self._connection.send(
 823            cast(
 824                RealtimeClientEventParam,
 825                strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
 826            )
 827        )
 828
 829
 830class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
 831    def clear(self, *, event_id: str | Omit = omit) -> None:
 832        """**WebRTC/SIP Only:** Emit to cut off the current audio response.
 833
 834        This will trigger the server to
 835        stop generating audio and emit a `output_audio_buffer.cleared` event. This
 836        event should be preceded by a `response.cancel` client event to stop the
 837        generation of the current response.
 838        [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
 839        """
 840        self._connection.send(
 841            cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
 842        )
 843
 844
 845class BaseAsyncRealtimeConnectionResource:
 846    def __init__(self, connection: AsyncRealtimeConnection) -> None:
 847        self._connection = connection
 848
 849
 850class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
 851    async def update(self, *, session: session_update_event_param.Session, event_id: str | Omit = omit) -> None:
 852        """
 853        Send this event to update the session’s configuration.
 854        The client may send this event at any time to update any field
 855        except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
 856
 857        When the server receives a `session.update`, it will respond
 858        with a `session.updated` event showing the full, effective configuration.
 859        Only the fields that are present in the `session.update` are updated. To clear a field like
 860        `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array.
 861        To clear a field like `turn_detection`, pass `null`.
 862        """
 863        await self._connection.send(
 864            cast(
 865                RealtimeClientEventParam,
 866                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
 867            )
 868        )
 869
 870
 871class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
 872    async def create(
 873        self, *, event_id: str | Omit = omit, response: RealtimeResponseCreateParamsParam | Omit = omit
 874    ) -> None:
 875        """
 876        This event instructs the server to create a Response, which means triggering
 877        model inference. When in Server VAD mode, the server will create Responses
 878        automatically.
 879
 880        A Response will include at least one Item, and may have two, in which case
 881        the second will be a function call. These Items will be appended to the
 882        conversation history by default.
 883
 884        The server will respond with a `response.created` event, events for Items
 885        and content created, and finally a `response.done` event to indicate the
 886        Response is complete.
 887
 888        The `response.create` event includes inference configuration like
 889        `instructions` and `tools`. If these are set, they will override the Session's
 890        configuration for this Response only.
 891
 892        Responses can be created out-of-band of the default Conversation, meaning that they can
 893        have arbitrary input, and it's possible to disable writing the output to the Conversation.
 894        Only one Response can write to the default Conversation at a time, but otherwise multiple
 895        Responses can be created in parallel. The `metadata` field is a good way to disambiguate
 896        multiple simultaneous Responses.
 897
 898        Clients can set `conversation` to `none` to create a Response that does not write to the default
 899        Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting
 900        raw Items and references to existing Items.
 901        """
 902        await self._connection.send(
 903            cast(
 904                RealtimeClientEventParam,
 905                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
 906            )
 907        )
 908
 909    async def cancel(self, *, event_id: str | Omit = omit, response_id: str | Omit = omit) -> None:
 910        """Send this event to cancel an in-progress response.
 911
 912        The server will respond
 913        with a `response.done` event with a status of `response.status=cancelled`. If
 914        there is no response to cancel, the server will respond with an error. It's safe
 915        to call `response.cancel` even if no response is in progress, an error will be
 916        returned the session will remain unaffected.
 917        """
 918        await self._connection.send(
 919            cast(
 920                RealtimeClientEventParam,
 921                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
 922            )
 923        )
 924
 925
 926class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
 927    async def clear(self, *, event_id: str | Omit = omit) -> None:
 928        """Send this event to clear the audio bytes in the buffer.
 929
 930        The server will
 931        respond with an `input_audio_buffer.cleared` event.
 932        """
 933        await self._connection.send(
 934            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
 935        )
 936
 937    async def commit(self, *, event_id: str | Omit = omit) -> None:
 938        """
 939        Send this event to commit the user input audio buffer, which will create a  new user message item in the conversation. This event will produce an error  if the input audio buffer is empty. When in Server VAD mode, the client does  not need to send this event, the server will commit the audio buffer  automatically.
 940
 941        Committing the input audio buffer will trigger input audio transcription  (if enabled in session configuration), but it will not create a response  from the model. The server will respond with an `input_audio_buffer.committed` event.
 942        """
 943        await self._connection.send(
 944            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
 945        )
 946
 947    async def append(self, *, audio: str, event_id: str | Omit = omit) -> None:
 948        """Send this event to append audio bytes to the input audio buffer.
 949
 950        The audio
 951        buffer is temporary storage you can write to and later commit. A "commit" will create a new
 952        user message item in the conversation history from the buffer content and clear the buffer.
 953        Input audio transcription (if enabled) will be generated when the buffer is committed.
 954
 955        If VAD is enabled the audio buffer is used to detect speech and the server will decide
 956        when to commit. When Server VAD is disabled, you must commit the audio buffer
 957        manually. Input audio noise reduction operates on writes to the audio buffer.
 958
 959        The client may choose how much audio to place in each event up to a maximum
 960        of 15 MiB, for example streaming smaller chunks from the client may allow the
 961        VAD to be more responsive. Unlike most other client events, the server will
 962        not send a confirmation response to this event.
 963        """
 964        await self._connection.send(
 965            cast(
 966                RealtimeClientEventParam,
 967                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
 968            )
 969        )
 970
 971
 972class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
 973    @cached_property
 974    def item(self) -> AsyncRealtimeConversationItemResource:
 975        return AsyncRealtimeConversationItemResource(self._connection)
 976
 977
 978class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
 979    async def delete(self, *, item_id: str, event_id: str | Omit = omit) -> None:
 980        """Send this event when you want to remove any item from the conversation
 981        history.
 982
 983        The server will respond with a `conversation.item.deleted` event,
 984        unless the item does not exist in the conversation history, in which case the
 985        server will respond with an error.
 986        """
 987        await self._connection.send(
 988            cast(
 989                RealtimeClientEventParam,
 990                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
 991            )
 992        )
 993
 994    async def create(
 995        self, *, item: ConversationItemParam, event_id: str | Omit = omit, previous_item_id: str | Omit = omit
 996    ) -> None:
 997        """
 998        Add a new Item to the Conversation's context, including messages, function
 999        calls, and function call responses. This event can be used both to populate a
1000        "history" of the conversation and to add new items mid-stream, but has the
1001        current limitation that it cannot populate assistant audio messages.
1002
1003        If successful, the server will respond with a `conversation.item.created`
1004        event, otherwise an `error` event will be sent.
1005        """
1006        await self._connection.send(
1007            cast(
1008                RealtimeClientEventParam,
1009                strip_not_given(
1010                    {
1011                        "type": "conversation.item.create",
1012                        "item": item,
1013                        "event_id": event_id,
1014                        "previous_item_id": previous_item_id,
1015                    }
1016                ),
1017            )
1018        )
1019
1020    async def truncate(
1021        self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | Omit = omit
1022    ) -> None:
1023        """Send this event to truncate a previous assistant message’s audio.
1024
1025        The server
1026        will produce audio faster than realtime, so this event is useful when the user
1027        interrupts to truncate audio that has already been sent to the client but not
1028        yet played. This will synchronize the server's understanding of the audio with
1029        the client's playback.
1030
1031        Truncating audio will delete the server-side text transcript to ensure there
1032        is not text in the context that hasn't been heard by the user.
1033
1034        If successful, the server will respond with a `conversation.item.truncated`
1035        event.
1036        """
1037        await self._connection.send(
1038            cast(
1039                RealtimeClientEventParam,
1040                strip_not_given(
1041                    {
1042                        "type": "conversation.item.truncate",
1043                        "audio_end_ms": audio_end_ms,
1044                        "content_index": content_index,
1045                        "item_id": item_id,
1046                        "event_id": event_id,
1047                    }
1048                ),
1049            )
1050        )
1051
1052    async def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None:
1053        """
1054        Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
1055        The server will respond with a `conversation.item.retrieved` event,
1056        unless the item does not exist in the conversation history, in which case the
1057        server will respond with an error.
1058        """
1059        await self._connection.send(
1060            cast(
1061                RealtimeClientEventParam,
1062                strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
1063            )
1064        )
1065
1066
1067class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
1068    async def clear(self, *, event_id: str | Omit = omit) -> None:
1069        """**WebRTC/SIP Only:** Emit to cut off the current audio response.
1070
1071        This will trigger the server to
1072        stop generating audio and emit a `output_audio_buffer.cleared` event. This
1073        event should be preceded by a `response.cancel` client event to stop the
1074        generation of the current response.
1075        [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
1076        """
1077        await self._connection.send(
1078            cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
1079        )