openai-python/src/openai/resources/beta/realtime/realtime.py at main

   1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
   2
   3from __future__ import annotations
   4
   5import json
   6import logging
   7from types import TracebackType
   8from typing import TYPE_CHECKING, Any, Iterator, cast
   9from typing_extensions import AsyncIterator
  10
  11import httpx
  12from pydantic import BaseModel
  13
  14from .sessions import (
  15    Sessions,
  16    AsyncSessions,
  17    SessionsWithRawResponse,
  18    AsyncSessionsWithRawResponse,
  19    SessionsWithStreamingResponse,
  20    AsyncSessionsWithStreamingResponse,
  21)
  22from ...._types import NOT_GIVEN, Query, Headers, NotGiven
  23from ...._utils import (
  24    is_azure_client,
  25    maybe_transform,
  26    strip_not_given,
  27    async_maybe_transform,
  28    is_async_azure_client,
  29)
  30from ...._compat import cached_property
  31from ...._models import construct_type_unchecked
  32from ...._resource import SyncAPIResource, AsyncAPIResource
  33from ...._exceptions import OpenAIError
  34from ...._base_client import _merge_mappings
  35from ....types.beta.realtime import (
  36    session_update_event_param,
  37    response_create_event_param,
  38    transcription_session_update_param,
  39)
  40from .transcription_sessions import (
  41    TranscriptionSessions,
  42    AsyncTranscriptionSessions,
  43    TranscriptionSessionsWithRawResponse,
  44    AsyncTranscriptionSessionsWithRawResponse,
  45    TranscriptionSessionsWithStreamingResponse,
  46    AsyncTranscriptionSessionsWithStreamingResponse,
  47)
  48from ....types.websocket_connection_options import WebsocketConnectionOptions
  49from ....types.beta.realtime.realtime_client_event import RealtimeClientEvent
  50from ....types.beta.realtime.realtime_server_event import RealtimeServerEvent
  51from ....types.beta.realtime.conversation_item_param import ConversationItemParam
  52from ....types.beta.realtime.realtime_client_event_param import RealtimeClientEventParam
  53
  54if TYPE_CHECKING:
  55    from websockets.sync.client import ClientConnection as WebsocketConnection
  56    from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
  57
  58    from ...._client import OpenAI, AsyncOpenAI
  59
  60__all__ = ["Realtime", "AsyncRealtime"]
  61
  62log: logging.Logger = logging.getLogger(__name__)
  63
  64
  65class Realtime(SyncAPIResource):
  66    @cached_property
  67    def sessions(self) -> Sessions:
  68        return Sessions(self._client)
  69
  70    @cached_property
  71    def transcription_sessions(self) -> TranscriptionSessions:
  72        return TranscriptionSessions(self._client)
  73
  74    @cached_property
  75    def with_raw_response(self) -> RealtimeWithRawResponse:
  76        """
  77        This property can be used as a prefix for any HTTP method call to return
  78        the raw response object instead of the parsed content.
  79
  80        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  81        """
  82        return RealtimeWithRawResponse(self)
  83
  84    @cached_property
  85    def with_streaming_response(self) -> RealtimeWithStreamingResponse:
  86        """
  87        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  88
  89        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  90        """
  91        return RealtimeWithStreamingResponse(self)
  92
  93    def connect(
  94        self,
  95        *,
  96        model: str,
  97        extra_query: Query = {},
  98        extra_headers: Headers = {},
  99        websocket_connection_options: WebsocketConnectionOptions = {},
 100    ) -> RealtimeConnectionManager:
 101        """
 102        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
 103
 104        Some notable benefits of the API include:
 105
 106        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
 107        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
 108        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
 109
 110        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
 111        """
 112        return RealtimeConnectionManager(
 113            client=self._client,
 114            extra_query=extra_query,
 115            extra_headers=extra_headers,
 116            websocket_connection_options=websocket_connection_options,
 117            model=model,
 118        )
 119
 120
 121class AsyncRealtime(AsyncAPIResource):
 122    @cached_property
 123    def sessions(self) -> AsyncSessions:
 124        return AsyncSessions(self._client)
 125
 126    @cached_property
 127    def transcription_sessions(self) -> AsyncTranscriptionSessions:
 128        return AsyncTranscriptionSessions(self._client)
 129
 130    @cached_property
 131    def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
 132        """
 133        This property can be used as a prefix for any HTTP method call to return
 134        the raw response object instead of the parsed content.
 135
 136        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
 137        """
 138        return AsyncRealtimeWithRawResponse(self)
 139
 140    @cached_property
 141    def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
 142        """
 143        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 144
 145        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
 146        """
 147        return AsyncRealtimeWithStreamingResponse(self)
 148
 149    def connect(
 150        self,
 151        *,
 152        model: str,
 153        extra_query: Query = {},
 154        extra_headers: Headers = {},
 155        websocket_connection_options: WebsocketConnectionOptions = {},
 156    ) -> AsyncRealtimeConnectionManager:
 157        """
 158        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
 159
 160        Some notable benefits of the API include:
 161
 162        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
 163        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
 164        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
 165
 166        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
 167        """
 168        return AsyncRealtimeConnectionManager(
 169            client=self._client,
 170            extra_query=extra_query,
 171            extra_headers=extra_headers,
 172            websocket_connection_options=websocket_connection_options,
 173            model=model,
 174        )
 175
 176
 177class RealtimeWithRawResponse:
 178    def __init__(self, realtime: Realtime) -> None:
 179        self._realtime = realtime
 180
 181    @cached_property
 182    def sessions(self) -> SessionsWithRawResponse:
 183        return SessionsWithRawResponse(self._realtime.sessions)
 184
 185    @cached_property
 186    def transcription_sessions(self) -> TranscriptionSessionsWithRawResponse:
 187        return TranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
 188
 189
 190class AsyncRealtimeWithRawResponse:
 191    def __init__(self, realtime: AsyncRealtime) -> None:
 192        self._realtime = realtime
 193
 194    @cached_property
 195    def sessions(self) -> AsyncSessionsWithRawResponse:
 196        return AsyncSessionsWithRawResponse(self._realtime.sessions)
 197
 198    @cached_property
 199    def transcription_sessions(self) -> AsyncTranscriptionSessionsWithRawResponse:
 200        return AsyncTranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
 201
 202
 203class RealtimeWithStreamingResponse:
 204    def __init__(self, realtime: Realtime) -> None:
 205        self._realtime = realtime
 206
 207    @cached_property
 208    def sessions(self) -> SessionsWithStreamingResponse:
 209        return SessionsWithStreamingResponse(self._realtime.sessions)
 210
 211    @cached_property
 212    def transcription_sessions(self) -> TranscriptionSessionsWithStreamingResponse:
 213        return TranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
 214
 215
 216class AsyncRealtimeWithStreamingResponse:
 217    def __init__(self, realtime: AsyncRealtime) -> None:
 218        self._realtime = realtime
 219
 220    @cached_property
 221    def sessions(self) -> AsyncSessionsWithStreamingResponse:
 222        return AsyncSessionsWithStreamingResponse(self._realtime.sessions)
 223
 224    @cached_property
 225    def transcription_sessions(self) -> AsyncTranscriptionSessionsWithStreamingResponse:
 226        return AsyncTranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
 227
 228
 229class AsyncRealtimeConnection:
 230    """Represents a live websocket connection to the Realtime API"""
 231
 232    session: AsyncRealtimeSessionResource
 233    response: AsyncRealtimeResponseResource
 234    input_audio_buffer: AsyncRealtimeInputAudioBufferResource
 235    conversation: AsyncRealtimeConversationResource
 236    output_audio_buffer: AsyncRealtimeOutputAudioBufferResource
 237    transcription_session: AsyncRealtimeTranscriptionSessionResource
 238
 239    _connection: AsyncWebsocketConnection
 240
 241    def __init__(self, connection: AsyncWebsocketConnection) -> None:
 242        self._connection = connection
 243
 244        self.session = AsyncRealtimeSessionResource(self)
 245        self.response = AsyncRealtimeResponseResource(self)
 246        self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
 247        self.conversation = AsyncRealtimeConversationResource(self)
 248        self.output_audio_buffer = AsyncRealtimeOutputAudioBufferResource(self)
 249        self.transcription_session = AsyncRealtimeTranscriptionSessionResource(self)
 250
 251    async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
 252        """
 253        An infinite-iterator that will continue to yield events until
 254        the connection is closed.
 255        """
 256        from websockets.exceptions import ConnectionClosedOK
 257
 258        try:
 259            while True:
 260                yield await self.recv()
 261        except ConnectionClosedOK:
 262            return
 263
 264    async def recv(self) -> RealtimeServerEvent:
 265        """
 266        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
 267
 268        Canceling this method is safe. There's no risk of losing data.
 269        """
 270        return self.parse_event(await self.recv_bytes())
 271
 272    async def recv_bytes(self) -> bytes:
 273        """Receive the next message from the connection as raw bytes.
 274
 275        Canceling this method is safe. There's no risk of losing data.
 276
 277        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
 278        then you can call `.parse_event(data)`.
 279        """
 280        message = await self._connection.recv(decode=False)
 281        log.debug(f"Received websocket message: %s", message)
 282        return message
 283
 284    async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
 285        data = (
 286            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
 287            if isinstance(event, BaseModel)
 288            else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
 289        )
 290        await self._connection.send(data)
 291
 292    async def close(self, *, code: int = 1000, reason: str = "") -> None:
 293        await self._connection.close(code=code, reason=reason)
 294
 295    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
 296        """
 297        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
 298
 299        This is helpful if you're using `.recv_bytes()`.
 300        """
 301        return cast(
 302            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
 303        )
 304
 305
 306class AsyncRealtimeConnectionManager:
 307    """
 308    Context manager over a `AsyncRealtimeConnection` that is returned by `beta.realtime.connect()`
 309
 310    This context manager ensures that the connection will be closed when it exits.
 311
 312    ---
 313
 314    Note that if your application doesn't work well with the context manager approach then you
 315    can call the `.enter()` method directly to initiate a connection.
 316
 317    **Warning**: You must remember to close the connection with `.close()`.
 318
 319    ```py
 320    connection = await client.beta.realtime.connect(...).enter()
 321    # ...
 322    await connection.close()
 323    ```
 324    """
 325
 326    def __init__(
 327        self,
 328        *,
 329        client: AsyncOpenAI,
 330        model: str,
 331        extra_query: Query,
 332        extra_headers: Headers,
 333        websocket_connection_options: WebsocketConnectionOptions,
 334    ) -> None:
 335        self.__client = client
 336        self.__model = model
 337        self.__connection: AsyncRealtimeConnection | None = None
 338        self.__extra_query = extra_query
 339        self.__extra_headers = extra_headers
 340        self.__websocket_connection_options = websocket_connection_options
 341
 342    async def __aenter__(self) -> AsyncRealtimeConnection:
 343        """
 344        👋 If your application doesn't work well with the context manager approach then you
 345        can call this method directly to initiate a connection.
 346
 347        **Warning**: You must remember to close the connection with `.close()`.
 348
 349        ```py
 350        connection = await client.beta.realtime.connect(...).enter()
 351        # ...
 352        await connection.close()
 353        ```
 354        """
 355        try:
 356            from websockets.asyncio.client import connect
 357        except ImportError as exc:
 358            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
 359
 360        extra_query = self.__extra_query
 361        await self.__client._refresh_api_key()
 362        auth_headers = self.__client.auth_headers
 363        if is_async_azure_client(self.__client):
 364            url, auth_headers = await self.__client._configure_realtime(self.__model, extra_query)
 365        else:
 366            url = self._prepare_url().copy_with(
 367                params={
 368                    **self.__client.base_url.params,
 369                    "model": self.__model,
 370                    **extra_query,
 371                },
 372            )
 373        log.debug("Connecting to %s", url)
 374        if self.__websocket_connection_options:
 375            log.debug("Connection options: %s", self.__websocket_connection_options)
 376
 377        self.__connection = AsyncRealtimeConnection(
 378            await connect(
 379                str(url),
 380                user_agent_header=self.__client.user_agent,
 381                additional_headers=_merge_mappings(
 382                    {
 383                        **auth_headers,
 384                        "OpenAI-Beta": "realtime=v1",
 385                    },
 386                    self.__extra_headers,
 387                ),
 388                **self.__websocket_connection_options,
 389            )
 390        )
 391
 392        return self.__connection
 393
 394    enter = __aenter__
 395
 396    def _prepare_url(self) -> httpx.URL:
 397        if self.__client.websocket_base_url is not None:
 398            base_url = httpx.URL(self.__client.websocket_base_url)
 399        else:
 400            base_url = self.__client._base_url.copy_with(scheme="wss")
 401
 402        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
 403        return base_url.copy_with(raw_path=merge_raw_path)
 404
 405    async def __aexit__(
 406        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
 407    ) -> None:
 408        if self.__connection is not None:
 409            await self.__connection.close()
 410
 411
 412class RealtimeConnection:
 413    """Represents a live websocket connection to the Realtime API"""
 414
 415    session: RealtimeSessionResource
 416    response: RealtimeResponseResource
 417    input_audio_buffer: RealtimeInputAudioBufferResource
 418    conversation: RealtimeConversationResource
 419    output_audio_buffer: RealtimeOutputAudioBufferResource
 420    transcription_session: RealtimeTranscriptionSessionResource
 421
 422    _connection: WebsocketConnection
 423
 424    def __init__(self, connection: WebsocketConnection) -> None:
 425        self._connection = connection
 426
 427        self.session = RealtimeSessionResource(self)
 428        self.response = RealtimeResponseResource(self)
 429        self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
 430        self.conversation = RealtimeConversationResource(self)
 431        self.output_audio_buffer = RealtimeOutputAudioBufferResource(self)
 432        self.transcription_session = RealtimeTranscriptionSessionResource(self)
 433
 434    def __iter__(self) -> Iterator[RealtimeServerEvent]:
 435        """
 436        An infinite-iterator that will continue to yield events until
 437        the connection is closed.
 438        """
 439        from websockets.exceptions import ConnectionClosedOK
 440
 441        try:
 442            while True:
 443                yield self.recv()
 444        except ConnectionClosedOK:
 445            return
 446
 447    def recv(self) -> RealtimeServerEvent:
 448        """
 449        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
 450
 451        Canceling this method is safe. There's no risk of losing data.
 452        """
 453        return self.parse_event(self.recv_bytes())
 454
 455    def recv_bytes(self) -> bytes:
 456        """Receive the next message from the connection as raw bytes.
 457
 458        Canceling this method is safe. There's no risk of losing data.
 459
 460        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
 461        then you can call `.parse_event(data)`.
 462        """
 463        message = self._connection.recv(decode=False)
 464        log.debug(f"Received websocket message: %s", message)
 465        return message
 466
 467    def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
 468        data = (
 469            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
 470            if isinstance(event, BaseModel)
 471            else json.dumps(maybe_transform(event, RealtimeClientEventParam))
 472        )
 473        self._connection.send(data)
 474
 475    def close(self, *, code: int = 1000, reason: str = "") -> None:
 476        self._connection.close(code=code, reason=reason)
 477
 478    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
 479        """
 480        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
 481
 482        This is helpful if you're using `.recv_bytes()`.
 483        """
 484        return cast(
 485            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
 486        )
 487
 488
 489class RealtimeConnectionManager:
 490    """
 491    Context manager over a `RealtimeConnection` that is returned by `beta.realtime.connect()`
 492
 493    This context manager ensures that the connection will be closed when it exits.
 494
 495    ---
 496
 497    Note that if your application doesn't work well with the context manager approach then you
 498    can call the `.enter()` method directly to initiate a connection.
 499
 500    **Warning**: You must remember to close the connection with `.close()`.
 501
 502    ```py
 503    connection = client.beta.realtime.connect(...).enter()
 504    # ...
 505    connection.close()
 506    ```
 507    """
 508
 509    def __init__(
 510        self,
 511        *,
 512        client: OpenAI,
 513        model: str,
 514        extra_query: Query,
 515        extra_headers: Headers,
 516        websocket_connection_options: WebsocketConnectionOptions,
 517    ) -> None:
 518        self.__client = client
 519        self.__model = model
 520        self.__connection: RealtimeConnection | None = None
 521        self.__extra_query = extra_query
 522        self.__extra_headers = extra_headers
 523        self.__websocket_connection_options = websocket_connection_options
 524
 525    def __enter__(self) -> RealtimeConnection:
 526        """
 527        👋 If your application doesn't work well with the context manager approach then you
 528        can call this method directly to initiate a connection.
 529
 530        **Warning**: You must remember to close the connection with `.close()`.
 531
 532        ```py
 533        connection = client.beta.realtime.connect(...).enter()
 534        # ...
 535        connection.close()
 536        ```
 537        """
 538        try:
 539            from websockets.sync.client import connect
 540        except ImportError as exc:
 541            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
 542
 543        extra_query = self.__extra_query
 544        self.__client._refresh_api_key()
 545        auth_headers = self.__client.auth_headers
 546        if is_azure_client(self.__client):
 547            url, auth_headers = self.__client._configure_realtime(self.__model, extra_query)
 548        else:
 549            url = self._prepare_url().copy_with(
 550                params={
 551                    **self.__client.base_url.params,
 552                    "model": self.__model,
 553                    **extra_query,
 554                },
 555            )
 556        log.debug("Connecting to %s", url)
 557        if self.__websocket_connection_options:
 558            log.debug("Connection options: %s", self.__websocket_connection_options)
 559
 560        self.__connection = RealtimeConnection(
 561            connect(
 562                str(url),
 563                user_agent_header=self.__client.user_agent,
 564                additional_headers=_merge_mappings(
 565                    {
 566                        **auth_headers,
 567                        "OpenAI-Beta": "realtime=v1",
 568                    },
 569                    self.__extra_headers,
 570                ),
 571                **self.__websocket_connection_options,
 572            )
 573        )
 574
 575        return self.__connection
 576
 577    enter = __enter__
 578
 579    def _prepare_url(self) -> httpx.URL:
 580        if self.__client.websocket_base_url is not None:
 581            base_url = httpx.URL(self.__client.websocket_base_url)
 582        else:
 583            base_url = self.__client._base_url.copy_with(scheme="wss")
 584
 585        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
 586        return base_url.copy_with(raw_path=merge_raw_path)
 587
 588    def __exit__(
 589        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
 590    ) -> None:
 591        if self.__connection is not None:
 592            self.__connection.close()
 593
 594
 595class BaseRealtimeConnectionResource:
 596    def __init__(self, connection: RealtimeConnection) -> None:
 597        self._connection = connection
 598
 599
 600class RealtimeSessionResource(BaseRealtimeConnectionResource):
 601    def update(self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN) -> None:
 602        """
 603        Send this event to update the session’s default configuration.
 604        The client may send this event at any time to update any field,
 605        except for `voice`. However, note that once a session has been
 606        initialized with a particular `model`, it can’t be changed to
 607        another model using `session.update`.
 608
 609        When the server receives a `session.update`, it will respond
 610        with a `session.updated` event showing the full, effective configuration.
 611        Only the fields that are present are updated. To clear a field like
 612        `instructions`, pass an empty string.
 613        """
 614        self._connection.send(
 615            cast(
 616                RealtimeClientEventParam,
 617                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
 618            )
 619        )
 620
 621
 622class RealtimeResponseResource(BaseRealtimeConnectionResource):
 623    def create(
 624        self,
 625        *,
 626        event_id: str | NotGiven = NOT_GIVEN,
 627        response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
 628    ) -> None:
 629        """
 630        This event instructs the server to create a Response, which means triggering
 631        model inference. When in Server VAD mode, the server will create Responses
 632        automatically.
 633
 634        A Response will include at least one Item, and may have two, in which case
 635        the second will be a function call. These Items will be appended to the
 636        conversation history.
 637
 638        The server will respond with a `response.created` event, events for Items
 639        and content created, and finally a `response.done` event to indicate the
 640        Response is complete.
 641
 642        The `response.create` event includes inference configuration like
 643        `instructions`, and `temperature`. These fields will override the Session's
 644        configuration for this Response only.
 645        """
 646        self._connection.send(
 647            cast(
 648                RealtimeClientEventParam,
 649                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
 650            )
 651        )
 652
 653    def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
 654        """Send this event to cancel an in-progress response.
 655
 656        The server will respond
 657        with a `response.done` event with a status of `response.status=cancelled`. If
 658        there is no response to cancel, the server will respond with an error.
 659        """
 660        self._connection.send(
 661            cast(
 662                RealtimeClientEventParam,
 663                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
 664            )
 665        )
 666
 667
 668class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
 669    def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
 670        """Send this event to clear the audio bytes in the buffer.
 671
 672        The server will
 673        respond with an `input_audio_buffer.cleared` event.
 674        """
 675        self._connection.send(
 676            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
 677        )
 678
 679    def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
 680        """
 681        Send this event to commit the user input audio buffer, which will create a
 682        new user message item in the conversation. This event will produce an error
 683        if the input audio buffer is empty. When in Server VAD mode, the client does
 684        not need to send this event, the server will commit the audio buffer
 685        automatically.
 686
 687        Committing the input audio buffer will trigger input audio transcription
 688        (if enabled in session configuration), but it will not create a response
 689        from the model. The server will respond with an `input_audio_buffer.committed`
 690        event.
 691        """
 692        self._connection.send(
 693            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
 694        )
 695
 696    def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
 697        """Send this event to append audio bytes to the input audio buffer.
 698
 699        The audio
 700        buffer is temporary storage you can write to and later commit. In Server VAD
 701        mode, the audio buffer is used to detect speech and the server will decide
 702        when to commit. When Server VAD is disabled, you must commit the audio buffer
 703        manually.
 704
 705        The client may choose how much audio to place in each event up to a maximum
 706        of 15 MiB, for example streaming smaller chunks from the client may allow the
 707        VAD to be more responsive. Unlike made other client events, the server will
 708        not send a confirmation response to this event.
 709        """
 710        self._connection.send(
 711            cast(
 712                RealtimeClientEventParam,
 713                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
 714            )
 715        )
 716
 717
 718class RealtimeConversationResource(BaseRealtimeConnectionResource):
 719    @cached_property
 720    def item(self) -> RealtimeConversationItemResource:
 721        return RealtimeConversationItemResource(self._connection)
 722
 723
 724class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
 725    def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
 726        """Send this event when you want to remove any item from the conversation
 727        history.
 728
 729        The server will respond with a `conversation.item.deleted` event,
 730        unless the item does not exist in the conversation history, in which case the
 731        server will respond with an error.
 732        """
 733        self._connection.send(
 734            cast(
 735                RealtimeClientEventParam,
 736                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
 737            )
 738        )
 739
 740    def create(
 741        self,
 742        *,
 743        item: ConversationItemParam,
 744        event_id: str | NotGiven = NOT_GIVEN,
 745        previous_item_id: str | NotGiven = NOT_GIVEN,
 746    ) -> None:
 747        """
 748        Add a new Item to the Conversation's context, including messages, function
 749        calls, and function call responses. This event can be used both to populate a
 750        "history" of the conversation and to add new items mid-stream, but has the
 751        current limitation that it cannot populate assistant audio messages.
 752
 753        If successful, the server will respond with a `conversation.item.created`
 754        event, otherwise an `error` event will be sent.
 755        """
 756        self._connection.send(
 757            cast(
 758                RealtimeClientEventParam,
 759                strip_not_given(
 760                    {
 761                        "type": "conversation.item.create",
 762                        "item": item,
 763                        "event_id": event_id,
 764                        "previous_item_id": previous_item_id,
 765                    }
 766                ),
 767            )
 768        )
 769
 770    def truncate(
 771        self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
 772    ) -> None:
 773        """Send this event to truncate a previous assistant message’s audio.
 774
 775        The server
 776        will produce audio faster than realtime, so this event is useful when the user
 777        interrupts to truncate audio that has already been sent to the client but not
 778        yet played. This will synchronize the server's understanding of the audio with
 779        the client's playback.
 780
 781        Truncating audio will delete the server-side text transcript to ensure there
 782        is not text in the context that hasn't been heard by the user.
 783
 784        If successful, the server will respond with a `conversation.item.truncated`
 785        event.
 786        """
 787        self._connection.send(
 788            cast(
 789                RealtimeClientEventParam,
 790                strip_not_given(
 791                    {
 792                        "type": "conversation.item.truncate",
 793                        "audio_end_ms": audio_end_ms,
 794                        "content_index": content_index,
 795                        "item_id": item_id,
 796                        "event_id": event_id,
 797                    }
 798                ),
 799            )
 800        )
 801
 802    def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
 803        """
 804        Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
 805        The server will respond with a `conversation.item.retrieved` event,
 806        unless the item does not exist in the conversation history, in which case the
 807        server will respond with an error.
 808        """
 809        self._connection.send(
 810            cast(
 811                RealtimeClientEventParam,
 812                strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
 813            )
 814        )
 815
 816
 817class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
 818    def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
 819        """**WebRTC Only:** Emit to cut off the current audio response.
 820
 821        This will trigger the server to
 822        stop generating audio and emit a `output_audio_buffer.cleared` event. This
 823        event should be preceded by a `response.cancel` client event to stop the
 824        generation of the current response.
 825        [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
 826        """
 827        self._connection.send(
 828            cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
 829        )
 830
 831
 832class RealtimeTranscriptionSessionResource(BaseRealtimeConnectionResource):
 833    def update(
 834        self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
 835    ) -> None:
 836        """Send this event to update a transcription session."""
 837        self._connection.send(
 838            cast(
 839                RealtimeClientEventParam,
 840                strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
 841            )
 842        )
 843
 844
 845class BaseAsyncRealtimeConnectionResource:
 846    def __init__(self, connection: AsyncRealtimeConnection) -> None:
 847        self._connection = connection
 848
 849
 850class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
 851    async def update(
 852        self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN
 853    ) -> None:
 854        """
 855        Send this event to update the session’s default configuration.
 856        The client may send this event at any time to update any field,
 857        except for `voice`. However, note that once a session has been
 858        initialized with a particular `model`, it can’t be changed to
 859        another model using `session.update`.
 860
 861        When the server receives a `session.update`, it will respond
 862        with a `session.updated` event showing the full, effective configuration.
 863        Only the fields that are present are updated. To clear a field like
 864        `instructions`, pass an empty string.
 865        """
 866        await self._connection.send(
 867            cast(
 868                RealtimeClientEventParam,
 869                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
 870            )
 871        )
 872
 873
 874class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
 875    async def create(
 876        self,
 877        *,
 878        event_id: str | NotGiven = NOT_GIVEN,
 879        response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
 880    ) -> None:
 881        """
 882        This event instructs the server to create a Response, which means triggering
 883        model inference. When in Server VAD mode, the server will create Responses
 884        automatically.
 885
 886        A Response will include at least one Item, and may have two, in which case
 887        the second will be a function call. These Items will be appended to the
 888        conversation history.
 889
 890        The server will respond with a `response.created` event, events for Items
 891        and content created, and finally a `response.done` event to indicate the
 892        Response is complete.
 893
 894        The `response.create` event includes inference configuration like
 895        `instructions`, and `temperature`. These fields will override the Session's
 896        configuration for this Response only.
 897        """
 898        await self._connection.send(
 899            cast(
 900                RealtimeClientEventParam,
 901                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
 902            )
 903        )
 904
 905    async def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
 906        """Send this event to cancel an in-progress response.
 907
 908        The server will respond
 909        with a `response.done` event with a status of `response.status=cancelled`. If
 910        there is no response to cancel, the server will respond with an error.
 911        """
 912        await self._connection.send(
 913            cast(
 914                RealtimeClientEventParam,
 915                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
 916            )
 917        )
 918
 919
 920class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
 921    async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
 922        """Send this event to clear the audio bytes in the buffer.
 923
 924        The server will
 925        respond with an `input_audio_buffer.cleared` event.
 926        """
 927        await self._connection.send(
 928            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
 929        )
 930
 931    async def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
 932        """
 933        Send this event to commit the user input audio buffer, which will create a
 934        new user message item in the conversation. This event will produce an error
 935        if the input audio buffer is empty. When in Server VAD mode, the client does
 936        not need to send this event, the server will commit the audio buffer
 937        automatically.
 938
 939        Committing the input audio buffer will trigger input audio transcription
 940        (if enabled in session configuration), but it will not create a response
 941        from the model. The server will respond with an `input_audio_buffer.committed`
 942        event.
 943        """
 944        await self._connection.send(
 945            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
 946        )
 947
 948    async def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
 949        """Send this event to append audio bytes to the input audio buffer.
 950
 951        The audio
 952        buffer is temporary storage you can write to and later commit. In Server VAD
 953        mode, the audio buffer is used to detect speech and the server will decide
 954        when to commit. When Server VAD is disabled, you must commit the audio buffer
 955        manually.
 956
 957        The client may choose how much audio to place in each event up to a maximum
 958        of 15 MiB, for example streaming smaller chunks from the client may allow the
 959        VAD to be more responsive. Unlike made other client events, the server will
 960        not send a confirmation response to this event.
 961        """
 962        await self._connection.send(
 963            cast(
 964                RealtimeClientEventParam,
 965                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
 966            )
 967        )
 968
 969
 970class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
 971    @cached_property
 972    def item(self) -> AsyncRealtimeConversationItemResource:
 973        return AsyncRealtimeConversationItemResource(self._connection)
 974
 975
 976class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
 977    async def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
 978        """Send this event when you want to remove any item from the conversation
 979        history.
 980
 981        The server will respond with a `conversation.item.deleted` event,
 982        unless the item does not exist in the conversation history, in which case the
 983        server will respond with an error.
 984        """
 985        await self._connection.send(
 986            cast(
 987                RealtimeClientEventParam,
 988                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
 989            )
 990        )
 991
 992    async def create(
 993        self,
 994        *,
 995        item: ConversationItemParam,
 996        event_id: str | NotGiven = NOT_GIVEN,
 997        previous_item_id: str | NotGiven = NOT_GIVEN,
 998    ) -> None:
 999        """
1000        Add a new Item to the Conversation's context, including messages, function
1001        calls, and function call responses. This event can be used both to populate a
1002        "history" of the conversation and to add new items mid-stream, but has the
1003        current limitation that it cannot populate assistant audio messages.
1004
1005        If successful, the server will respond with a `conversation.item.created`
1006        event, otherwise an `error` event will be sent.
1007        """
1008        await self._connection.send(
1009            cast(
1010                RealtimeClientEventParam,
1011                strip_not_given(
1012                    {
1013                        "type": "conversation.item.create",
1014                        "item": item,
1015                        "event_id": event_id,
1016                        "previous_item_id": previous_item_id,
1017                    }
1018                ),
1019            )
1020        )
1021
1022    async def truncate(
1023        self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
1024    ) -> None:
1025        """Send this event to truncate a previous assistant message’s audio.
1026
1027        The server
1028        will produce audio faster than realtime, so this event is useful when the user
1029        interrupts to truncate audio that has already been sent to the client but not
1030        yet played. This will synchronize the server's understanding of the audio with
1031        the client's playback.
1032
1033        Truncating audio will delete the server-side text transcript to ensure there
1034        is not text in the context that hasn't been heard by the user.
1035
1036        If successful, the server will respond with a `conversation.item.truncated`
1037        event.
1038        """
1039        await self._connection.send(
1040            cast(
1041                RealtimeClientEventParam,
1042                strip_not_given(
1043                    {
1044                        "type": "conversation.item.truncate",
1045                        "audio_end_ms": audio_end_ms,
1046                        "content_index": content_index,
1047                        "item_id": item_id,
1048                        "event_id": event_id,
1049                    }
1050                ),
1051            )
1052        )
1053
1054    async def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
1055        """
1056        Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
1057        The server will respond with a `conversation.item.retrieved` event,
1058        unless the item does not exist in the conversation history, in which case the
1059        server will respond with an error.
1060        """
1061        await self._connection.send(
1062            cast(
1063                RealtimeClientEventParam,
1064                strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
1065            )
1066        )
1067
1068
1069class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
1070    async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
1071        """**WebRTC Only:** Emit to cut off the current audio response.
1072
1073        This will trigger the server to
1074        stop generating audio and emit a `output_audio_buffer.cleared` event. This
1075        event should be preceded by a `response.cancel` client event to stop the
1076        generation of the current response.
1077        [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
1078        """
1079        await self._connection.send(
1080            cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
1081        )
1082
1083
1084class AsyncRealtimeTranscriptionSessionResource(BaseAsyncRealtimeConnectionResource):
1085    async def update(
1086        self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
1087    ) -> None:
1088        """Send this event to update a transcription session."""
1089        await self._connection.send(
1090            cast(
1091                RealtimeClientEventParam,
1092                strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
1093            )
1094        )