main
1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3from __future__ import annotations
4
5import json
6import logging
7from types import TracebackType
8from typing import TYPE_CHECKING, Any, Iterator, cast
9from typing_extensions import AsyncIterator
10
11import httpx
12from pydantic import BaseModel
13
14from .sessions import (
15 Sessions,
16 AsyncSessions,
17 SessionsWithRawResponse,
18 AsyncSessionsWithRawResponse,
19 SessionsWithStreamingResponse,
20 AsyncSessionsWithStreamingResponse,
21)
22from ...._types import NOT_GIVEN, Query, Headers, NotGiven
23from ...._utils import (
24 is_azure_client,
25 maybe_transform,
26 strip_not_given,
27 async_maybe_transform,
28 is_async_azure_client,
29)
30from ...._compat import cached_property
31from ...._models import construct_type_unchecked
32from ...._resource import SyncAPIResource, AsyncAPIResource
33from ...._exceptions import OpenAIError
34from ...._base_client import _merge_mappings
35from ....types.beta.realtime import (
36 session_update_event_param,
37 response_create_event_param,
38 transcription_session_update_param,
39)
40from .transcription_sessions import (
41 TranscriptionSessions,
42 AsyncTranscriptionSessions,
43 TranscriptionSessionsWithRawResponse,
44 AsyncTranscriptionSessionsWithRawResponse,
45 TranscriptionSessionsWithStreamingResponse,
46 AsyncTranscriptionSessionsWithStreamingResponse,
47)
48from ....types.websocket_connection_options import WebsocketConnectionOptions
49from ....types.beta.realtime.realtime_client_event import RealtimeClientEvent
50from ....types.beta.realtime.realtime_server_event import RealtimeServerEvent
51from ....types.beta.realtime.conversation_item_param import ConversationItemParam
52from ....types.beta.realtime.realtime_client_event_param import RealtimeClientEventParam
53
54if TYPE_CHECKING:
55 from websockets.sync.client import ClientConnection as WebsocketConnection
56 from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
57
58 from ...._client import OpenAI, AsyncOpenAI
59
60__all__ = ["Realtime", "AsyncRealtime"]
61
62log: logging.Logger = logging.getLogger(__name__)
63
64
65class Realtime(SyncAPIResource):
66 @cached_property
67 def sessions(self) -> Sessions:
68 return Sessions(self._client)
69
70 @cached_property
71 def transcription_sessions(self) -> TranscriptionSessions:
72 return TranscriptionSessions(self._client)
73
74 @cached_property
75 def with_raw_response(self) -> RealtimeWithRawResponse:
76 """
77 This property can be used as a prefix for any HTTP method call to return
78 the raw response object instead of the parsed content.
79
80 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
81 """
82 return RealtimeWithRawResponse(self)
83
84 @cached_property
85 def with_streaming_response(self) -> RealtimeWithStreamingResponse:
86 """
87 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
88
89 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
90 """
91 return RealtimeWithStreamingResponse(self)
92
93 def connect(
94 self,
95 *,
96 model: str,
97 extra_query: Query = {},
98 extra_headers: Headers = {},
99 websocket_connection_options: WebsocketConnectionOptions = {},
100 ) -> RealtimeConnectionManager:
101 """
102 The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
103
104 Some notable benefits of the API include:
105
106 - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
107 - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
108 - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
109
110 The Realtime API is a stateful, event-based API that communicates over a WebSocket.
111 """
112 return RealtimeConnectionManager(
113 client=self._client,
114 extra_query=extra_query,
115 extra_headers=extra_headers,
116 websocket_connection_options=websocket_connection_options,
117 model=model,
118 )
119
120
121class AsyncRealtime(AsyncAPIResource):
122 @cached_property
123 def sessions(self) -> AsyncSessions:
124 return AsyncSessions(self._client)
125
126 @cached_property
127 def transcription_sessions(self) -> AsyncTranscriptionSessions:
128 return AsyncTranscriptionSessions(self._client)
129
130 @cached_property
131 def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
132 """
133 This property can be used as a prefix for any HTTP method call to return
134 the raw response object instead of the parsed content.
135
136 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
137 """
138 return AsyncRealtimeWithRawResponse(self)
139
140 @cached_property
141 def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
142 """
143 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
144
145 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
146 """
147 return AsyncRealtimeWithStreamingResponse(self)
148
149 def connect(
150 self,
151 *,
152 model: str,
153 extra_query: Query = {},
154 extra_headers: Headers = {},
155 websocket_connection_options: WebsocketConnectionOptions = {},
156 ) -> AsyncRealtimeConnectionManager:
157 """
158 The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
159
160 Some notable benefits of the API include:
161
162 - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
163 - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
164 - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
165
166 The Realtime API is a stateful, event-based API that communicates over a WebSocket.
167 """
168 return AsyncRealtimeConnectionManager(
169 client=self._client,
170 extra_query=extra_query,
171 extra_headers=extra_headers,
172 websocket_connection_options=websocket_connection_options,
173 model=model,
174 )
175
176
177class RealtimeWithRawResponse:
178 def __init__(self, realtime: Realtime) -> None:
179 self._realtime = realtime
180
181 @cached_property
182 def sessions(self) -> SessionsWithRawResponse:
183 return SessionsWithRawResponse(self._realtime.sessions)
184
185 @cached_property
186 def transcription_sessions(self) -> TranscriptionSessionsWithRawResponse:
187 return TranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
188
189
190class AsyncRealtimeWithRawResponse:
191 def __init__(self, realtime: AsyncRealtime) -> None:
192 self._realtime = realtime
193
194 @cached_property
195 def sessions(self) -> AsyncSessionsWithRawResponse:
196 return AsyncSessionsWithRawResponse(self._realtime.sessions)
197
198 @cached_property
199 def transcription_sessions(self) -> AsyncTranscriptionSessionsWithRawResponse:
200 return AsyncTranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
201
202
203class RealtimeWithStreamingResponse:
204 def __init__(self, realtime: Realtime) -> None:
205 self._realtime = realtime
206
207 @cached_property
208 def sessions(self) -> SessionsWithStreamingResponse:
209 return SessionsWithStreamingResponse(self._realtime.sessions)
210
211 @cached_property
212 def transcription_sessions(self) -> TranscriptionSessionsWithStreamingResponse:
213 return TranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
214
215
216class AsyncRealtimeWithStreamingResponse:
217 def __init__(self, realtime: AsyncRealtime) -> None:
218 self._realtime = realtime
219
220 @cached_property
221 def sessions(self) -> AsyncSessionsWithStreamingResponse:
222 return AsyncSessionsWithStreamingResponse(self._realtime.sessions)
223
224 @cached_property
225 def transcription_sessions(self) -> AsyncTranscriptionSessionsWithStreamingResponse:
226 return AsyncTranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
227
228
229class AsyncRealtimeConnection:
230 """Represents a live websocket connection to the Realtime API"""
231
232 session: AsyncRealtimeSessionResource
233 response: AsyncRealtimeResponseResource
234 input_audio_buffer: AsyncRealtimeInputAudioBufferResource
235 conversation: AsyncRealtimeConversationResource
236 output_audio_buffer: AsyncRealtimeOutputAudioBufferResource
237 transcription_session: AsyncRealtimeTranscriptionSessionResource
238
239 _connection: AsyncWebsocketConnection
240
241 def __init__(self, connection: AsyncWebsocketConnection) -> None:
242 self._connection = connection
243
244 self.session = AsyncRealtimeSessionResource(self)
245 self.response = AsyncRealtimeResponseResource(self)
246 self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
247 self.conversation = AsyncRealtimeConversationResource(self)
248 self.output_audio_buffer = AsyncRealtimeOutputAudioBufferResource(self)
249 self.transcription_session = AsyncRealtimeTranscriptionSessionResource(self)
250
251 async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
252 """
253 An infinite-iterator that will continue to yield events until
254 the connection is closed.
255 """
256 from websockets.exceptions import ConnectionClosedOK
257
258 try:
259 while True:
260 yield await self.recv()
261 except ConnectionClosedOK:
262 return
263
264 async def recv(self) -> RealtimeServerEvent:
265 """
266 Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
267
268 Canceling this method is safe. There's no risk of losing data.
269 """
270 return self.parse_event(await self.recv_bytes())
271
272 async def recv_bytes(self) -> bytes:
273 """Receive the next message from the connection as raw bytes.
274
275 Canceling this method is safe. There's no risk of losing data.
276
277 If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
278 then you can call `.parse_event(data)`.
279 """
280 message = await self._connection.recv(decode=False)
281 log.debug(f"Received websocket message: %s", message)
282 return message
283
284 async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
285 data = (
286 event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
287 if isinstance(event, BaseModel)
288 else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
289 )
290 await self._connection.send(data)
291
292 async def close(self, *, code: int = 1000, reason: str = "") -> None:
293 await self._connection.close(code=code, reason=reason)
294
295 def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
296 """
297 Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
298
299 This is helpful if you're using `.recv_bytes()`.
300 """
301 return cast(
302 RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
303 )
304
305
306class AsyncRealtimeConnectionManager:
307 """
308 Context manager over a `AsyncRealtimeConnection` that is returned by `beta.realtime.connect()`
309
310 This context manager ensures that the connection will be closed when it exits.
311
312 ---
313
314 Note that if your application doesn't work well with the context manager approach then you
315 can call the `.enter()` method directly to initiate a connection.
316
317 **Warning**: You must remember to close the connection with `.close()`.
318
319 ```py
320 connection = await client.beta.realtime.connect(...).enter()
321 # ...
322 await connection.close()
323 ```
324 """
325
326 def __init__(
327 self,
328 *,
329 client: AsyncOpenAI,
330 model: str,
331 extra_query: Query,
332 extra_headers: Headers,
333 websocket_connection_options: WebsocketConnectionOptions,
334 ) -> None:
335 self.__client = client
336 self.__model = model
337 self.__connection: AsyncRealtimeConnection | None = None
338 self.__extra_query = extra_query
339 self.__extra_headers = extra_headers
340 self.__websocket_connection_options = websocket_connection_options
341
342 async def __aenter__(self) -> AsyncRealtimeConnection:
343 """
344 👋 If your application doesn't work well with the context manager approach then you
345 can call this method directly to initiate a connection.
346
347 **Warning**: You must remember to close the connection with `.close()`.
348
349 ```py
350 connection = await client.beta.realtime.connect(...).enter()
351 # ...
352 await connection.close()
353 ```
354 """
355 try:
356 from websockets.asyncio.client import connect
357 except ImportError as exc:
358 raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
359
360 extra_query = self.__extra_query
361 await self.__client._refresh_api_key()
362 auth_headers = self.__client.auth_headers
363 if is_async_azure_client(self.__client):
364 url, auth_headers = await self.__client._configure_realtime(self.__model, extra_query)
365 else:
366 url = self._prepare_url().copy_with(
367 params={
368 **self.__client.base_url.params,
369 "model": self.__model,
370 **extra_query,
371 },
372 )
373 log.debug("Connecting to %s", url)
374 if self.__websocket_connection_options:
375 log.debug("Connection options: %s", self.__websocket_connection_options)
376
377 self.__connection = AsyncRealtimeConnection(
378 await connect(
379 str(url),
380 user_agent_header=self.__client.user_agent,
381 additional_headers=_merge_mappings(
382 {
383 **auth_headers,
384 "OpenAI-Beta": "realtime=v1",
385 },
386 self.__extra_headers,
387 ),
388 **self.__websocket_connection_options,
389 )
390 )
391
392 return self.__connection
393
394 enter = __aenter__
395
396 def _prepare_url(self) -> httpx.URL:
397 if self.__client.websocket_base_url is not None:
398 base_url = httpx.URL(self.__client.websocket_base_url)
399 else:
400 base_url = self.__client._base_url.copy_with(scheme="wss")
401
402 merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
403 return base_url.copy_with(raw_path=merge_raw_path)
404
405 async def __aexit__(
406 self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
407 ) -> None:
408 if self.__connection is not None:
409 await self.__connection.close()
410
411
412class RealtimeConnection:
413 """Represents a live websocket connection to the Realtime API"""
414
415 session: RealtimeSessionResource
416 response: RealtimeResponseResource
417 input_audio_buffer: RealtimeInputAudioBufferResource
418 conversation: RealtimeConversationResource
419 output_audio_buffer: RealtimeOutputAudioBufferResource
420 transcription_session: RealtimeTranscriptionSessionResource
421
422 _connection: WebsocketConnection
423
424 def __init__(self, connection: WebsocketConnection) -> None:
425 self._connection = connection
426
427 self.session = RealtimeSessionResource(self)
428 self.response = RealtimeResponseResource(self)
429 self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
430 self.conversation = RealtimeConversationResource(self)
431 self.output_audio_buffer = RealtimeOutputAudioBufferResource(self)
432 self.transcription_session = RealtimeTranscriptionSessionResource(self)
433
434 def __iter__(self) -> Iterator[RealtimeServerEvent]:
435 """
436 An infinite-iterator that will continue to yield events until
437 the connection is closed.
438 """
439 from websockets.exceptions import ConnectionClosedOK
440
441 try:
442 while True:
443 yield self.recv()
444 except ConnectionClosedOK:
445 return
446
447 def recv(self) -> RealtimeServerEvent:
448 """
449 Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
450
451 Canceling this method is safe. There's no risk of losing data.
452 """
453 return self.parse_event(self.recv_bytes())
454
455 def recv_bytes(self) -> bytes:
456 """Receive the next message from the connection as raw bytes.
457
458 Canceling this method is safe. There's no risk of losing data.
459
460 If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
461 then you can call `.parse_event(data)`.
462 """
463 message = self._connection.recv(decode=False)
464 log.debug(f"Received websocket message: %s", message)
465 return message
466
467 def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
468 data = (
469 event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
470 if isinstance(event, BaseModel)
471 else json.dumps(maybe_transform(event, RealtimeClientEventParam))
472 )
473 self._connection.send(data)
474
475 def close(self, *, code: int = 1000, reason: str = "") -> None:
476 self._connection.close(code=code, reason=reason)
477
478 def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
479 """
480 Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
481
482 This is helpful if you're using `.recv_bytes()`.
483 """
484 return cast(
485 RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
486 )
487
488
489class RealtimeConnectionManager:
490 """
491 Context manager over a `RealtimeConnection` that is returned by `beta.realtime.connect()`
492
493 This context manager ensures that the connection will be closed when it exits.
494
495 ---
496
497 Note that if your application doesn't work well with the context manager approach then you
498 can call the `.enter()` method directly to initiate a connection.
499
500 **Warning**: You must remember to close the connection with `.close()`.
501
502 ```py
503 connection = client.beta.realtime.connect(...).enter()
504 # ...
505 connection.close()
506 ```
507 """
508
509 def __init__(
510 self,
511 *,
512 client: OpenAI,
513 model: str,
514 extra_query: Query,
515 extra_headers: Headers,
516 websocket_connection_options: WebsocketConnectionOptions,
517 ) -> None:
518 self.__client = client
519 self.__model = model
520 self.__connection: RealtimeConnection | None = None
521 self.__extra_query = extra_query
522 self.__extra_headers = extra_headers
523 self.__websocket_connection_options = websocket_connection_options
524
525 def __enter__(self) -> RealtimeConnection:
526 """
527 👋 If your application doesn't work well with the context manager approach then you
528 can call this method directly to initiate a connection.
529
530 **Warning**: You must remember to close the connection with `.close()`.
531
532 ```py
533 connection = client.beta.realtime.connect(...).enter()
534 # ...
535 connection.close()
536 ```
537 """
538 try:
539 from websockets.sync.client import connect
540 except ImportError as exc:
541 raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
542
543 extra_query = self.__extra_query
544 self.__client._refresh_api_key()
545 auth_headers = self.__client.auth_headers
546 if is_azure_client(self.__client):
547 url, auth_headers = self.__client._configure_realtime(self.__model, extra_query)
548 else:
549 url = self._prepare_url().copy_with(
550 params={
551 **self.__client.base_url.params,
552 "model": self.__model,
553 **extra_query,
554 },
555 )
556 log.debug("Connecting to %s", url)
557 if self.__websocket_connection_options:
558 log.debug("Connection options: %s", self.__websocket_connection_options)
559
560 self.__connection = RealtimeConnection(
561 connect(
562 str(url),
563 user_agent_header=self.__client.user_agent,
564 additional_headers=_merge_mappings(
565 {
566 **auth_headers,
567 "OpenAI-Beta": "realtime=v1",
568 },
569 self.__extra_headers,
570 ),
571 **self.__websocket_connection_options,
572 )
573 )
574
575 return self.__connection
576
577 enter = __enter__
578
579 def _prepare_url(self) -> httpx.URL:
580 if self.__client.websocket_base_url is not None:
581 base_url = httpx.URL(self.__client.websocket_base_url)
582 else:
583 base_url = self.__client._base_url.copy_with(scheme="wss")
584
585 merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
586 return base_url.copy_with(raw_path=merge_raw_path)
587
588 def __exit__(
589 self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
590 ) -> None:
591 if self.__connection is not None:
592 self.__connection.close()
593
594
595class BaseRealtimeConnectionResource:
596 def __init__(self, connection: RealtimeConnection) -> None:
597 self._connection = connection
598
599
600class RealtimeSessionResource(BaseRealtimeConnectionResource):
601 def update(self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN) -> None:
602 """
603 Send this event to update the session’s default configuration.
604 The client may send this event at any time to update any field,
605 except for `voice`. However, note that once a session has been
606 initialized with a particular `model`, it can’t be changed to
607 another model using `session.update`.
608
609 When the server receives a `session.update`, it will respond
610 with a `session.updated` event showing the full, effective configuration.
611 Only the fields that are present are updated. To clear a field like
612 `instructions`, pass an empty string.
613 """
614 self._connection.send(
615 cast(
616 RealtimeClientEventParam,
617 strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
618 )
619 )
620
621
622class RealtimeResponseResource(BaseRealtimeConnectionResource):
623 def create(
624 self,
625 *,
626 event_id: str | NotGiven = NOT_GIVEN,
627 response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
628 ) -> None:
629 """
630 This event instructs the server to create a Response, which means triggering
631 model inference. When in Server VAD mode, the server will create Responses
632 automatically.
633
634 A Response will include at least one Item, and may have two, in which case
635 the second will be a function call. These Items will be appended to the
636 conversation history.
637
638 The server will respond with a `response.created` event, events for Items
639 and content created, and finally a `response.done` event to indicate the
640 Response is complete.
641
642 The `response.create` event includes inference configuration like
643 `instructions`, and `temperature`. These fields will override the Session's
644 configuration for this Response only.
645 """
646 self._connection.send(
647 cast(
648 RealtimeClientEventParam,
649 strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
650 )
651 )
652
653 def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
654 """Send this event to cancel an in-progress response.
655
656 The server will respond
657 with a `response.done` event with a status of `response.status=cancelled`. If
658 there is no response to cancel, the server will respond with an error.
659 """
660 self._connection.send(
661 cast(
662 RealtimeClientEventParam,
663 strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
664 )
665 )
666
667
668class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
669 def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
670 """Send this event to clear the audio bytes in the buffer.
671
672 The server will
673 respond with an `input_audio_buffer.cleared` event.
674 """
675 self._connection.send(
676 cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
677 )
678
679 def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
680 """
681 Send this event to commit the user input audio buffer, which will create a
682 new user message item in the conversation. This event will produce an error
683 if the input audio buffer is empty. When in Server VAD mode, the client does
684 not need to send this event, the server will commit the audio buffer
685 automatically.
686
687 Committing the input audio buffer will trigger input audio transcription
688 (if enabled in session configuration), but it will not create a response
689 from the model. The server will respond with an `input_audio_buffer.committed`
690 event.
691 """
692 self._connection.send(
693 cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
694 )
695
696 def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
697 """Send this event to append audio bytes to the input audio buffer.
698
699 The audio
700 buffer is temporary storage you can write to and later commit. In Server VAD
701 mode, the audio buffer is used to detect speech and the server will decide
702 when to commit. When Server VAD is disabled, you must commit the audio buffer
703 manually.
704
705 The client may choose how much audio to place in each event up to a maximum
706 of 15 MiB, for example streaming smaller chunks from the client may allow the
707 VAD to be more responsive. Unlike made other client events, the server will
708 not send a confirmation response to this event.
709 """
710 self._connection.send(
711 cast(
712 RealtimeClientEventParam,
713 strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
714 )
715 )
716
717
718class RealtimeConversationResource(BaseRealtimeConnectionResource):
719 @cached_property
720 def item(self) -> RealtimeConversationItemResource:
721 return RealtimeConversationItemResource(self._connection)
722
723
724class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
725 def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
726 """Send this event when you want to remove any item from the conversation
727 history.
728
729 The server will respond with a `conversation.item.deleted` event,
730 unless the item does not exist in the conversation history, in which case the
731 server will respond with an error.
732 """
733 self._connection.send(
734 cast(
735 RealtimeClientEventParam,
736 strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
737 )
738 )
739
740 def create(
741 self,
742 *,
743 item: ConversationItemParam,
744 event_id: str | NotGiven = NOT_GIVEN,
745 previous_item_id: str | NotGiven = NOT_GIVEN,
746 ) -> None:
747 """
748 Add a new Item to the Conversation's context, including messages, function
749 calls, and function call responses. This event can be used both to populate a
750 "history" of the conversation and to add new items mid-stream, but has the
751 current limitation that it cannot populate assistant audio messages.
752
753 If successful, the server will respond with a `conversation.item.created`
754 event, otherwise an `error` event will be sent.
755 """
756 self._connection.send(
757 cast(
758 RealtimeClientEventParam,
759 strip_not_given(
760 {
761 "type": "conversation.item.create",
762 "item": item,
763 "event_id": event_id,
764 "previous_item_id": previous_item_id,
765 }
766 ),
767 )
768 )
769
770 def truncate(
771 self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
772 ) -> None:
773 """Send this event to truncate a previous assistant message’s audio.
774
775 The server
776 will produce audio faster than realtime, so this event is useful when the user
777 interrupts to truncate audio that has already been sent to the client but not
778 yet played. This will synchronize the server's understanding of the audio with
779 the client's playback.
780
781 Truncating audio will delete the server-side text transcript to ensure there
782 is not text in the context that hasn't been heard by the user.
783
784 If successful, the server will respond with a `conversation.item.truncated`
785 event.
786 """
787 self._connection.send(
788 cast(
789 RealtimeClientEventParam,
790 strip_not_given(
791 {
792 "type": "conversation.item.truncate",
793 "audio_end_ms": audio_end_ms,
794 "content_index": content_index,
795 "item_id": item_id,
796 "event_id": event_id,
797 }
798 ),
799 )
800 )
801
802 def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
803 """
804 Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
805 The server will respond with a `conversation.item.retrieved` event,
806 unless the item does not exist in the conversation history, in which case the
807 server will respond with an error.
808 """
809 self._connection.send(
810 cast(
811 RealtimeClientEventParam,
812 strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
813 )
814 )
815
816
817class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
818 def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
819 """**WebRTC Only:** Emit to cut off the current audio response.
820
821 This will trigger the server to
822 stop generating audio and emit a `output_audio_buffer.cleared` event. This
823 event should be preceded by a `response.cancel` client event to stop the
824 generation of the current response.
825 [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
826 """
827 self._connection.send(
828 cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
829 )
830
831
832class RealtimeTranscriptionSessionResource(BaseRealtimeConnectionResource):
833 def update(
834 self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
835 ) -> None:
836 """Send this event to update a transcription session."""
837 self._connection.send(
838 cast(
839 RealtimeClientEventParam,
840 strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
841 )
842 )
843
844
845class BaseAsyncRealtimeConnectionResource:
846 def __init__(self, connection: AsyncRealtimeConnection) -> None:
847 self._connection = connection
848
849
850class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
851 async def update(
852 self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN
853 ) -> None:
854 """
855 Send this event to update the session’s default configuration.
856 The client may send this event at any time to update any field,
857 except for `voice`. However, note that once a session has been
858 initialized with a particular `model`, it can’t be changed to
859 another model using `session.update`.
860
861 When the server receives a `session.update`, it will respond
862 with a `session.updated` event showing the full, effective configuration.
863 Only the fields that are present are updated. To clear a field like
864 `instructions`, pass an empty string.
865 """
866 await self._connection.send(
867 cast(
868 RealtimeClientEventParam,
869 strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
870 )
871 )
872
873
874class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
875 async def create(
876 self,
877 *,
878 event_id: str | NotGiven = NOT_GIVEN,
879 response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
880 ) -> None:
881 """
882 This event instructs the server to create a Response, which means triggering
883 model inference. When in Server VAD mode, the server will create Responses
884 automatically.
885
886 A Response will include at least one Item, and may have two, in which case
887 the second will be a function call. These Items will be appended to the
888 conversation history.
889
890 The server will respond with a `response.created` event, events for Items
891 and content created, and finally a `response.done` event to indicate the
892 Response is complete.
893
894 The `response.create` event includes inference configuration like
895 `instructions`, and `temperature`. These fields will override the Session's
896 configuration for this Response only.
897 """
898 await self._connection.send(
899 cast(
900 RealtimeClientEventParam,
901 strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
902 )
903 )
904
905 async def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
906 """Send this event to cancel an in-progress response.
907
908 The server will respond
909 with a `response.done` event with a status of `response.status=cancelled`. If
910 there is no response to cancel, the server will respond with an error.
911 """
912 await self._connection.send(
913 cast(
914 RealtimeClientEventParam,
915 strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
916 )
917 )
918
919
920class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
921 async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
922 """Send this event to clear the audio bytes in the buffer.
923
924 The server will
925 respond with an `input_audio_buffer.cleared` event.
926 """
927 await self._connection.send(
928 cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
929 )
930
931 async def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
932 """
933 Send this event to commit the user input audio buffer, which will create a
934 new user message item in the conversation. This event will produce an error
935 if the input audio buffer is empty. When in Server VAD mode, the client does
936 not need to send this event, the server will commit the audio buffer
937 automatically.
938
939 Committing the input audio buffer will trigger input audio transcription
940 (if enabled in session configuration), but it will not create a response
941 from the model. The server will respond with an `input_audio_buffer.committed`
942 event.
943 """
944 await self._connection.send(
945 cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
946 )
947
948 async def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
949 """Send this event to append audio bytes to the input audio buffer.
950
951 The audio
952 buffer is temporary storage you can write to and later commit. In Server VAD
953 mode, the audio buffer is used to detect speech and the server will decide
954 when to commit. When Server VAD is disabled, you must commit the audio buffer
955 manually.
956
957 The client may choose how much audio to place in each event up to a maximum
958 of 15 MiB, for example streaming smaller chunks from the client may allow the
959 VAD to be more responsive. Unlike made other client events, the server will
960 not send a confirmation response to this event.
961 """
962 await self._connection.send(
963 cast(
964 RealtimeClientEventParam,
965 strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
966 )
967 )
968
969
970class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
971 @cached_property
972 def item(self) -> AsyncRealtimeConversationItemResource:
973 return AsyncRealtimeConversationItemResource(self._connection)
974
975
976class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
977 async def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
978 """Send this event when you want to remove any item from the conversation
979 history.
980
981 The server will respond with a `conversation.item.deleted` event,
982 unless the item does not exist in the conversation history, in which case the
983 server will respond with an error.
984 """
985 await self._connection.send(
986 cast(
987 RealtimeClientEventParam,
988 strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
989 )
990 )
991
992 async def create(
993 self,
994 *,
995 item: ConversationItemParam,
996 event_id: str | NotGiven = NOT_GIVEN,
997 previous_item_id: str | NotGiven = NOT_GIVEN,
998 ) -> None:
999 """
1000 Add a new Item to the Conversation's context, including messages, function
1001 calls, and function call responses. This event can be used both to populate a
1002 "history" of the conversation and to add new items mid-stream, but has the
1003 current limitation that it cannot populate assistant audio messages.
1004
1005 If successful, the server will respond with a `conversation.item.created`
1006 event, otherwise an `error` event will be sent.
1007 """
1008 await self._connection.send(
1009 cast(
1010 RealtimeClientEventParam,
1011 strip_not_given(
1012 {
1013 "type": "conversation.item.create",
1014 "item": item,
1015 "event_id": event_id,
1016 "previous_item_id": previous_item_id,
1017 }
1018 ),
1019 )
1020 )
1021
1022 async def truncate(
1023 self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
1024 ) -> None:
1025 """Send this event to truncate a previous assistant message’s audio.
1026
1027 The server
1028 will produce audio faster than realtime, so this event is useful when the user
1029 interrupts to truncate audio that has already been sent to the client but not
1030 yet played. This will synchronize the server's understanding of the audio with
1031 the client's playback.
1032
1033 Truncating audio will delete the server-side text transcript to ensure there
1034 is not text in the context that hasn't been heard by the user.
1035
1036 If successful, the server will respond with a `conversation.item.truncated`
1037 event.
1038 """
1039 await self._connection.send(
1040 cast(
1041 RealtimeClientEventParam,
1042 strip_not_given(
1043 {
1044 "type": "conversation.item.truncate",
1045 "audio_end_ms": audio_end_ms,
1046 "content_index": content_index,
1047 "item_id": item_id,
1048 "event_id": event_id,
1049 }
1050 ),
1051 )
1052 )
1053
1054 async def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
1055 """
1056 Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
1057 The server will respond with a `conversation.item.retrieved` event,
1058 unless the item does not exist in the conversation history, in which case the
1059 server will respond with an error.
1060 """
1061 await self._connection.send(
1062 cast(
1063 RealtimeClientEventParam,
1064 strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
1065 )
1066 )
1067
1068
1069class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
1070 async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
1071 """**WebRTC Only:** Emit to cut off the current audio response.
1072
1073 This will trigger the server to
1074 stop generating audio and emit a `output_audio_buffer.cleared` event. This
1075 event should be preceded by a `response.cancel` client event to stop the
1076 generation of the current response.
1077 [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
1078 """
1079 await self._connection.send(
1080 cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
1081 )
1082
1083
1084class AsyncRealtimeTranscriptionSessionResource(BaseAsyncRealtimeConnectionResource):
1085 async def update(
1086 self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
1087 ) -> None:
1088 """Send this event to update a transcription session."""
1089 await self._connection.send(
1090 cast(
1091 RealtimeClientEventParam,
1092 strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
1093 )
1094 )