main
1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3from __future__ import annotations
4
5import json
6import logging
7from types import TracebackType
8from typing import TYPE_CHECKING, Any, Iterator, cast
9from typing_extensions import AsyncIterator
10
11import httpx
12from pydantic import BaseModel
13
14from .calls import (
15 Calls,
16 AsyncCalls,
17 CallsWithRawResponse,
18 AsyncCallsWithRawResponse,
19 CallsWithStreamingResponse,
20 AsyncCallsWithStreamingResponse,
21)
22from ..._types import Omit, Query, Headers, omit
23from ..._utils import (
24 is_azure_client,
25 maybe_transform,
26 strip_not_given,
27 async_maybe_transform,
28 is_async_azure_client,
29)
30from ..._compat import cached_property
31from ..._models import construct_type_unchecked
32from ..._resource import SyncAPIResource, AsyncAPIResource
33from ..._exceptions import OpenAIError
34from ..._base_client import _merge_mappings
35from .client_secrets import (
36 ClientSecrets,
37 AsyncClientSecrets,
38 ClientSecretsWithRawResponse,
39 AsyncClientSecretsWithRawResponse,
40 ClientSecretsWithStreamingResponse,
41 AsyncClientSecretsWithStreamingResponse,
42)
43from ...types.realtime import session_update_event_param
44from ...types.websocket_connection_options import WebsocketConnectionOptions
45from ...types.realtime.realtime_client_event import RealtimeClientEvent
46from ...types.realtime.realtime_server_event import RealtimeServerEvent
47from ...types.realtime.conversation_item_param import ConversationItemParam
48from ...types.realtime.realtime_client_event_param import RealtimeClientEventParam
49from ...types.realtime.realtime_response_create_params_param import RealtimeResponseCreateParamsParam
50
51if TYPE_CHECKING:
52 from websockets.sync.client import ClientConnection as WebsocketConnection
53 from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
54
55 from ..._client import OpenAI, AsyncOpenAI
56
57__all__ = ["Realtime", "AsyncRealtime"]
58
59log: logging.Logger = logging.getLogger(__name__)
60
61
62class Realtime(SyncAPIResource):
63 @cached_property
64 def client_secrets(self) -> ClientSecrets:
65 return ClientSecrets(self._client)
66
67 @cached_property
68 def calls(self) -> Calls:
69 from ...lib._realtime import _Calls
70
71 return _Calls(self._client)
72
73 @cached_property
74 def with_raw_response(self) -> RealtimeWithRawResponse:
75 """
76 This property can be used as a prefix for any HTTP method call to return
77 the raw response object instead of the parsed content.
78
79 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
80 """
81 return RealtimeWithRawResponse(self)
82
83 @cached_property
84 def with_streaming_response(self) -> RealtimeWithStreamingResponse:
85 """
86 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
87
88 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
89 """
90 return RealtimeWithStreamingResponse(self)
91
92 def connect(
93 self,
94 *,
95 call_id: str | Omit = omit,
96 model: str | Omit = omit,
97 extra_query: Query = {},
98 extra_headers: Headers = {},
99 websocket_connection_options: WebsocketConnectionOptions = {},
100 ) -> RealtimeConnectionManager:
101 """
102 The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
103
104 Some notable benefits of the API include:
105
106 - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
107 - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
108 - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
109
110 The Realtime API is a stateful, event-based API that communicates over a WebSocket.
111 """
112 return RealtimeConnectionManager(
113 client=self._client,
114 extra_query=extra_query,
115 extra_headers=extra_headers,
116 websocket_connection_options=websocket_connection_options,
117 call_id=call_id,
118 model=model,
119 )
120
121
122class AsyncRealtime(AsyncAPIResource):
123 @cached_property
124 def client_secrets(self) -> AsyncClientSecrets:
125 return AsyncClientSecrets(self._client)
126
127 @cached_property
128 def calls(self) -> AsyncCalls:
129 from ...lib._realtime import _AsyncCalls
130
131 return _AsyncCalls(self._client)
132
133 @cached_property
134 def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
135 """
136 This property can be used as a prefix for any HTTP method call to return
137 the raw response object instead of the parsed content.
138
139 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
140 """
141 return AsyncRealtimeWithRawResponse(self)
142
143 @cached_property
144 def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
145 """
146 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
147
148 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
149 """
150 return AsyncRealtimeWithStreamingResponse(self)
151
152 def connect(
153 self,
154 *,
155 call_id: str | Omit = omit,
156 model: str | Omit = omit,
157 extra_query: Query = {},
158 extra_headers: Headers = {},
159 websocket_connection_options: WebsocketConnectionOptions = {},
160 ) -> AsyncRealtimeConnectionManager:
161 """
162 The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
163
164 Some notable benefits of the API include:
165
166 - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
167 - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
168 - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
169
170 The Realtime API is a stateful, event-based API that communicates over a WebSocket.
171 """
172 return AsyncRealtimeConnectionManager(
173 client=self._client,
174 extra_query=extra_query,
175 extra_headers=extra_headers,
176 websocket_connection_options=websocket_connection_options,
177 call_id=call_id,
178 model=model,
179 )
180
181
182class RealtimeWithRawResponse:
183 def __init__(self, realtime: Realtime) -> None:
184 self._realtime = realtime
185
186 @cached_property
187 def client_secrets(self) -> ClientSecretsWithRawResponse:
188 return ClientSecretsWithRawResponse(self._realtime.client_secrets)
189
190 @cached_property
191 def calls(self) -> CallsWithRawResponse:
192 return CallsWithRawResponse(self._realtime.calls)
193
194
195class AsyncRealtimeWithRawResponse:
196 def __init__(self, realtime: AsyncRealtime) -> None:
197 self._realtime = realtime
198
199 @cached_property
200 def client_secrets(self) -> AsyncClientSecretsWithRawResponse:
201 return AsyncClientSecretsWithRawResponse(self._realtime.client_secrets)
202
203 @cached_property
204 def calls(self) -> AsyncCallsWithRawResponse:
205 return AsyncCallsWithRawResponse(self._realtime.calls)
206
207
208class RealtimeWithStreamingResponse:
209 def __init__(self, realtime: Realtime) -> None:
210 self._realtime = realtime
211
212 @cached_property
213 def client_secrets(self) -> ClientSecretsWithStreamingResponse:
214 return ClientSecretsWithStreamingResponse(self._realtime.client_secrets)
215
216 @cached_property
217 def calls(self) -> CallsWithStreamingResponse:
218 return CallsWithStreamingResponse(self._realtime.calls)
219
220
221class AsyncRealtimeWithStreamingResponse:
222 def __init__(self, realtime: AsyncRealtime) -> None:
223 self._realtime = realtime
224
225 @cached_property
226 def client_secrets(self) -> AsyncClientSecretsWithStreamingResponse:
227 return AsyncClientSecretsWithStreamingResponse(self._realtime.client_secrets)
228
229 @cached_property
230 def calls(self) -> AsyncCallsWithStreamingResponse:
231 return AsyncCallsWithStreamingResponse(self._realtime.calls)
232
233
234class AsyncRealtimeConnection:
235 """Represents a live websocket connection to the Realtime API"""
236
237 session: AsyncRealtimeSessionResource
238 response: AsyncRealtimeResponseResource
239 input_audio_buffer: AsyncRealtimeInputAudioBufferResource
240 conversation: AsyncRealtimeConversationResource
241 output_audio_buffer: AsyncRealtimeOutputAudioBufferResource
242
243 _connection: AsyncWebsocketConnection
244
245 def __init__(self, connection: AsyncWebsocketConnection) -> None:
246 self._connection = connection
247
248 self.session = AsyncRealtimeSessionResource(self)
249 self.response = AsyncRealtimeResponseResource(self)
250 self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
251 self.conversation = AsyncRealtimeConversationResource(self)
252 self.output_audio_buffer = AsyncRealtimeOutputAudioBufferResource(self)
253
254 async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
255 """
256 An infinite-iterator that will continue to yield events until
257 the connection is closed.
258 """
259 from websockets.exceptions import ConnectionClosedOK
260
261 try:
262 while True:
263 yield await self.recv()
264 except ConnectionClosedOK:
265 return
266
267 async def recv(self) -> RealtimeServerEvent:
268 """
269 Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
270
271 Canceling this method is safe. There's no risk of losing data.
272 """
273 return self.parse_event(await self.recv_bytes())
274
275 async def recv_bytes(self) -> bytes:
276 """Receive the next message from the connection as raw bytes.
277
278 Canceling this method is safe. There's no risk of losing data.
279
280 If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
281 then you can call `.parse_event(data)`.
282 """
283 message = await self._connection.recv(decode=False)
284 log.debug(f"Received websocket message: %s", message)
285 return message
286
287 async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
288 data = (
289 event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
290 if isinstance(event, BaseModel)
291 else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
292 )
293 await self._connection.send(data)
294
295 async def close(self, *, code: int = 1000, reason: str = "") -> None:
296 await self._connection.close(code=code, reason=reason)
297
298 def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
299 """
300 Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
301
302 This is helpful if you're using `.recv_bytes()`.
303 """
304 return cast(
305 RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
306 )
307
308
309class AsyncRealtimeConnectionManager:
310 """
311 Context manager over a `AsyncRealtimeConnection` that is returned by `realtime.connect()`
312
313 This context manager ensures that the connection will be closed when it exits.
314
315 ---
316
317 Note that if your application doesn't work well with the context manager approach then you
318 can call the `.enter()` method directly to initiate a connection.
319
320 **Warning**: You must remember to close the connection with `.close()`.
321
322 ```py
323 connection = await client.realtime.connect(...).enter()
324 # ...
325 await connection.close()
326 ```
327 """
328
329 def __init__(
330 self,
331 *,
332 client: AsyncOpenAI,
333 call_id: str | Omit = omit,
334 model: str | Omit = omit,
335 extra_query: Query,
336 extra_headers: Headers,
337 websocket_connection_options: WebsocketConnectionOptions,
338 ) -> None:
339 self.__client = client
340 self.__call_id = call_id
341 self.__model = model
342 self.__connection: AsyncRealtimeConnection | None = None
343 self.__extra_query = extra_query
344 self.__extra_headers = extra_headers
345 self.__websocket_connection_options = websocket_connection_options
346
347 async def __aenter__(self) -> AsyncRealtimeConnection:
348 """
349 👋 If your application doesn't work well with the context manager approach then you
350 can call this method directly to initiate a connection.
351
352 **Warning**: You must remember to close the connection with `.close()`.
353
354 ```py
355 connection = await client.realtime.connect(...).enter()
356 # ...
357 await connection.close()
358 ```
359 """
360 try:
361 from websockets.asyncio.client import connect
362 except ImportError as exc:
363 raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
364
365 extra_query = self.__extra_query
366 await self.__client._refresh_api_key()
367 auth_headers = self.__client.auth_headers
368 extra_query = self.__extra_query
369 if self.__call_id is not omit:
370 extra_query = {**extra_query, "call_id": self.__call_id}
371 if is_async_azure_client(self.__client):
372 model = self.__model
373 if not model:
374 raise OpenAIError("`model` is required for Azure Realtime API")
375 else:
376 url, auth_headers = await self.__client._configure_realtime(model, extra_query)
377 else:
378 url = self._prepare_url().copy_with(
379 params={
380 **self.__client.base_url.params,
381 **({"model": self.__model} if self.__model is not omit else {}),
382 **extra_query,
383 },
384 )
385 log.debug("Connecting to %s", url)
386 if self.__websocket_connection_options:
387 log.debug("Connection options: %s", self.__websocket_connection_options)
388
389 self.__connection = AsyncRealtimeConnection(
390 await connect(
391 str(url),
392 user_agent_header=self.__client.user_agent,
393 additional_headers=_merge_mappings(
394 {
395 **auth_headers,
396 },
397 self.__extra_headers,
398 ),
399 **self.__websocket_connection_options,
400 )
401 )
402
403 return self.__connection
404
405 enter = __aenter__
406
407 def _prepare_url(self) -> httpx.URL:
408 if self.__client.websocket_base_url is not None:
409 base_url = httpx.URL(self.__client.websocket_base_url)
410 else:
411 base_url = self.__client._base_url.copy_with(scheme="wss")
412
413 merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
414 return base_url.copy_with(raw_path=merge_raw_path)
415
416 async def __aexit__(
417 self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
418 ) -> None:
419 if self.__connection is not None:
420 await self.__connection.close()
421
422
423class RealtimeConnection:
424 """Represents a live websocket connection to the Realtime API"""
425
426 session: RealtimeSessionResource
427 response: RealtimeResponseResource
428 input_audio_buffer: RealtimeInputAudioBufferResource
429 conversation: RealtimeConversationResource
430 output_audio_buffer: RealtimeOutputAudioBufferResource
431
432 _connection: WebsocketConnection
433
434 def __init__(self, connection: WebsocketConnection) -> None:
435 self._connection = connection
436
437 self.session = RealtimeSessionResource(self)
438 self.response = RealtimeResponseResource(self)
439 self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
440 self.conversation = RealtimeConversationResource(self)
441 self.output_audio_buffer = RealtimeOutputAudioBufferResource(self)
442
443 def __iter__(self) -> Iterator[RealtimeServerEvent]:
444 """
445 An infinite-iterator that will continue to yield events until
446 the connection is closed.
447 """
448 from websockets.exceptions import ConnectionClosedOK
449
450 try:
451 while True:
452 yield self.recv()
453 except ConnectionClosedOK:
454 return
455
456 def recv(self) -> RealtimeServerEvent:
457 """
458 Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
459
460 Canceling this method is safe. There's no risk of losing data.
461 """
462 return self.parse_event(self.recv_bytes())
463
464 def recv_bytes(self) -> bytes:
465 """Receive the next message from the connection as raw bytes.
466
467 Canceling this method is safe. There's no risk of losing data.
468
469 If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
470 then you can call `.parse_event(data)`.
471 """
472 message = self._connection.recv(decode=False)
473 log.debug(f"Received websocket message: %s", message)
474 return message
475
476 def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
477 data = (
478 event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
479 if isinstance(event, BaseModel)
480 else json.dumps(maybe_transform(event, RealtimeClientEventParam))
481 )
482 self._connection.send(data)
483
484 def close(self, *, code: int = 1000, reason: str = "") -> None:
485 self._connection.close(code=code, reason=reason)
486
487 def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
488 """
489 Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
490
491 This is helpful if you're using `.recv_bytes()`.
492 """
493 return cast(
494 RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
495 )
496
497
498class RealtimeConnectionManager:
499 """
500 Context manager over a `RealtimeConnection` that is returned by `realtime.connect()`
501
502 This context manager ensures that the connection will be closed when it exits.
503
504 ---
505
506 Note that if your application doesn't work well with the context manager approach then you
507 can call the `.enter()` method directly to initiate a connection.
508
509 **Warning**: You must remember to close the connection with `.close()`.
510
511 ```py
512 connection = client.realtime.connect(...).enter()
513 # ...
514 connection.close()
515 ```
516 """
517
518 def __init__(
519 self,
520 *,
521 client: OpenAI,
522 call_id: str | Omit = omit,
523 model: str | Omit = omit,
524 extra_query: Query,
525 extra_headers: Headers,
526 websocket_connection_options: WebsocketConnectionOptions,
527 ) -> None:
528 self.__client = client
529 self.__call_id = call_id
530 self.__model = model
531 self.__connection: RealtimeConnection | None = None
532 self.__extra_query = extra_query
533 self.__extra_headers = extra_headers
534 self.__websocket_connection_options = websocket_connection_options
535
536 def __enter__(self) -> RealtimeConnection:
537 """
538 👋 If your application doesn't work well with the context manager approach then you
539 can call this method directly to initiate a connection.
540
541 **Warning**: You must remember to close the connection with `.close()`.
542
543 ```py
544 connection = client.realtime.connect(...).enter()
545 # ...
546 connection.close()
547 ```
548 """
549 try:
550 from websockets.sync.client import connect
551 except ImportError as exc:
552 raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
553
554 extra_query = self.__extra_query
555 self.__client._refresh_api_key()
556 auth_headers = self.__client.auth_headers
557 extra_query = self.__extra_query
558 if self.__call_id is not omit:
559 extra_query = {**extra_query, "call_id": self.__call_id}
560 if is_azure_client(self.__client):
561 model = self.__model
562 if not model:
563 raise OpenAIError("`model` is required for Azure Realtime API")
564 else:
565 url, auth_headers = self.__client._configure_realtime(model, extra_query)
566 else:
567 url = self._prepare_url().copy_with(
568 params={
569 **self.__client.base_url.params,
570 **({"model": self.__model} if self.__model is not omit else {}),
571 **extra_query,
572 },
573 )
574 log.debug("Connecting to %s", url)
575 if self.__websocket_connection_options:
576 log.debug("Connection options: %s", self.__websocket_connection_options)
577
578 self.__connection = RealtimeConnection(
579 connect(
580 str(url),
581 user_agent_header=self.__client.user_agent,
582 additional_headers=_merge_mappings(
583 {
584 **auth_headers,
585 },
586 self.__extra_headers,
587 ),
588 **self.__websocket_connection_options,
589 )
590 )
591
592 return self.__connection
593
594 enter = __enter__
595
596 def _prepare_url(self) -> httpx.URL:
597 if self.__client.websocket_base_url is not None:
598 base_url = httpx.URL(self.__client.websocket_base_url)
599 else:
600 base_url = self.__client._base_url.copy_with(scheme="wss")
601
602 merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
603 return base_url.copy_with(raw_path=merge_raw_path)
604
605 def __exit__(
606 self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
607 ) -> None:
608 if self.__connection is not None:
609 self.__connection.close()
610
611
612class BaseRealtimeConnectionResource:
613 def __init__(self, connection: RealtimeConnection) -> None:
614 self._connection = connection
615
616
617class RealtimeSessionResource(BaseRealtimeConnectionResource):
618 def update(self, *, session: session_update_event_param.Session, event_id: str | Omit = omit) -> None:
619 """
620 Send this event to update the session’s configuration.
621 The client may send this event at any time to update any field
622 except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
623
624 When the server receives a `session.update`, it will respond
625 with a `session.updated` event showing the full, effective configuration.
626 Only the fields that are present in the `session.update` are updated. To clear a field like
627 `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array.
628 To clear a field like `turn_detection`, pass `null`.
629 """
630 self._connection.send(
631 cast(
632 RealtimeClientEventParam,
633 strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
634 )
635 )
636
637
638class RealtimeResponseResource(BaseRealtimeConnectionResource):
639 def create(self, *, event_id: str | Omit = omit, response: RealtimeResponseCreateParamsParam | Omit = omit) -> None:
640 """
641 This event instructs the server to create a Response, which means triggering
642 model inference. When in Server VAD mode, the server will create Responses
643 automatically.
644
645 A Response will include at least one Item, and may have two, in which case
646 the second will be a function call. These Items will be appended to the
647 conversation history by default.
648
649 The server will respond with a `response.created` event, events for Items
650 and content created, and finally a `response.done` event to indicate the
651 Response is complete.
652
653 The `response.create` event includes inference configuration like
654 `instructions` and `tools`. If these are set, they will override the Session's
655 configuration for this Response only.
656
657 Responses can be created out-of-band of the default Conversation, meaning that they can
658 have arbitrary input, and it's possible to disable writing the output to the Conversation.
659 Only one Response can write to the default Conversation at a time, but otherwise multiple
660 Responses can be created in parallel. The `metadata` field is a good way to disambiguate
661 multiple simultaneous Responses.
662
663 Clients can set `conversation` to `none` to create a Response that does not write to the default
664 Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting
665 raw Items and references to existing Items.
666 """
667 self._connection.send(
668 cast(
669 RealtimeClientEventParam,
670 strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
671 )
672 )
673
674 def cancel(self, *, event_id: str | Omit = omit, response_id: str | Omit = omit) -> None:
675 """Send this event to cancel an in-progress response.
676
677 The server will respond
678 with a `response.done` event with a status of `response.status=cancelled`. If
679 there is no response to cancel, the server will respond with an error. It's safe
680 to call `response.cancel` even if no response is in progress, an error will be
681 returned the session will remain unaffected.
682 """
683 self._connection.send(
684 cast(
685 RealtimeClientEventParam,
686 strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
687 )
688 )
689
690
691class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
692 def clear(self, *, event_id: str | Omit = omit) -> None:
693 """Send this event to clear the audio bytes in the buffer.
694
695 The server will
696 respond with an `input_audio_buffer.cleared` event.
697 """
698 self._connection.send(
699 cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
700 )
701
702 def commit(self, *, event_id: str | Omit = omit) -> None:
703 """
704 Send this event to commit the user input audio buffer, which will create a new user message item in the conversation. This event will produce an error if the input audio buffer is empty. When in Server VAD mode, the client does not need to send this event, the server will commit the audio buffer automatically.
705
706 Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model. The server will respond with an `input_audio_buffer.committed` event.
707 """
708 self._connection.send(
709 cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
710 )
711
712 def append(self, *, audio: str, event_id: str | Omit = omit) -> None:
713 """Send this event to append audio bytes to the input audio buffer.
714
715 The audio
716 buffer is temporary storage you can write to and later commit. A "commit" will create a new
717 user message item in the conversation history from the buffer content and clear the buffer.
718 Input audio transcription (if enabled) will be generated when the buffer is committed.
719
720 If VAD is enabled the audio buffer is used to detect speech and the server will decide
721 when to commit. When Server VAD is disabled, you must commit the audio buffer
722 manually. Input audio noise reduction operates on writes to the audio buffer.
723
724 The client may choose how much audio to place in each event up to a maximum
725 of 15 MiB, for example streaming smaller chunks from the client may allow the
726 VAD to be more responsive. Unlike most other client events, the server will
727 not send a confirmation response to this event.
728 """
729 self._connection.send(
730 cast(
731 RealtimeClientEventParam,
732 strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
733 )
734 )
735
736
737class RealtimeConversationResource(BaseRealtimeConnectionResource):
738 @cached_property
739 def item(self) -> RealtimeConversationItemResource:
740 return RealtimeConversationItemResource(self._connection)
741
742
743class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
744 def delete(self, *, item_id: str, event_id: str | Omit = omit) -> None:
745 """Send this event when you want to remove any item from the conversation
746 history.
747
748 The server will respond with a `conversation.item.deleted` event,
749 unless the item does not exist in the conversation history, in which case the
750 server will respond with an error.
751 """
752 self._connection.send(
753 cast(
754 RealtimeClientEventParam,
755 strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
756 )
757 )
758
759 def create(
760 self, *, item: ConversationItemParam, event_id: str | Omit = omit, previous_item_id: str | Omit = omit
761 ) -> None:
762 """
763 Add a new Item to the Conversation's context, including messages, function
764 calls, and function call responses. This event can be used both to populate a
765 "history" of the conversation and to add new items mid-stream, but has the
766 current limitation that it cannot populate assistant audio messages.
767
768 If successful, the server will respond with a `conversation.item.created`
769 event, otherwise an `error` event will be sent.
770 """
771 self._connection.send(
772 cast(
773 RealtimeClientEventParam,
774 strip_not_given(
775 {
776 "type": "conversation.item.create",
777 "item": item,
778 "event_id": event_id,
779 "previous_item_id": previous_item_id,
780 }
781 ),
782 )
783 )
784
785 def truncate(self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | Omit = omit) -> None:
786 """Send this event to truncate a previous assistant message’s audio.
787
788 The server
789 will produce audio faster than realtime, so this event is useful when the user
790 interrupts to truncate audio that has already been sent to the client but not
791 yet played. This will synchronize the server's understanding of the audio with
792 the client's playback.
793
794 Truncating audio will delete the server-side text transcript to ensure there
795 is not text in the context that hasn't been heard by the user.
796
797 If successful, the server will respond with a `conversation.item.truncated`
798 event.
799 """
800 self._connection.send(
801 cast(
802 RealtimeClientEventParam,
803 strip_not_given(
804 {
805 "type": "conversation.item.truncate",
806 "audio_end_ms": audio_end_ms,
807 "content_index": content_index,
808 "item_id": item_id,
809 "event_id": event_id,
810 }
811 ),
812 )
813 )
814
815 def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None:
816 """
817 Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
818 The server will respond with a `conversation.item.retrieved` event,
819 unless the item does not exist in the conversation history, in which case the
820 server will respond with an error.
821 """
822 self._connection.send(
823 cast(
824 RealtimeClientEventParam,
825 strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
826 )
827 )
828
829
830class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
831 def clear(self, *, event_id: str | Omit = omit) -> None:
832 """**WebRTC/SIP Only:** Emit to cut off the current audio response.
833
834 This will trigger the server to
835 stop generating audio and emit a `output_audio_buffer.cleared` event. This
836 event should be preceded by a `response.cancel` client event to stop the
837 generation of the current response.
838 [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
839 """
840 self._connection.send(
841 cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
842 )
843
844
845class BaseAsyncRealtimeConnectionResource:
846 def __init__(self, connection: AsyncRealtimeConnection) -> None:
847 self._connection = connection
848
849
850class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
851 async def update(self, *, session: session_update_event_param.Session, event_id: str | Omit = omit) -> None:
852 """
853 Send this event to update the session’s configuration.
854 The client may send this event at any time to update any field
855 except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
856
857 When the server receives a `session.update`, it will respond
858 with a `session.updated` event showing the full, effective configuration.
859 Only the fields that are present in the `session.update` are updated. To clear a field like
860 `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array.
861 To clear a field like `turn_detection`, pass `null`.
862 """
863 await self._connection.send(
864 cast(
865 RealtimeClientEventParam,
866 strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
867 )
868 )
869
870
871class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
872 async def create(
873 self, *, event_id: str | Omit = omit, response: RealtimeResponseCreateParamsParam | Omit = omit
874 ) -> None:
875 """
876 This event instructs the server to create a Response, which means triggering
877 model inference. When in Server VAD mode, the server will create Responses
878 automatically.
879
880 A Response will include at least one Item, and may have two, in which case
881 the second will be a function call. These Items will be appended to the
882 conversation history by default.
883
884 The server will respond with a `response.created` event, events for Items
885 and content created, and finally a `response.done` event to indicate the
886 Response is complete.
887
888 The `response.create` event includes inference configuration like
889 `instructions` and `tools`. If these are set, they will override the Session's
890 configuration for this Response only.
891
892 Responses can be created out-of-band of the default Conversation, meaning that they can
893 have arbitrary input, and it's possible to disable writing the output to the Conversation.
894 Only one Response can write to the default Conversation at a time, but otherwise multiple
895 Responses can be created in parallel. The `metadata` field is a good way to disambiguate
896 multiple simultaneous Responses.
897
898 Clients can set `conversation` to `none` to create a Response that does not write to the default
899 Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting
900 raw Items and references to existing Items.
901 """
902 await self._connection.send(
903 cast(
904 RealtimeClientEventParam,
905 strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
906 )
907 )
908
909 async def cancel(self, *, event_id: str | Omit = omit, response_id: str | Omit = omit) -> None:
910 """Send this event to cancel an in-progress response.
911
912 The server will respond
913 with a `response.done` event with a status of `response.status=cancelled`. If
914 there is no response to cancel, the server will respond with an error. It's safe
915 to call `response.cancel` even if no response is in progress, an error will be
916 returned the session will remain unaffected.
917 """
918 await self._connection.send(
919 cast(
920 RealtimeClientEventParam,
921 strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
922 )
923 )
924
925
926class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
927 async def clear(self, *, event_id: str | Omit = omit) -> None:
928 """Send this event to clear the audio bytes in the buffer.
929
930 The server will
931 respond with an `input_audio_buffer.cleared` event.
932 """
933 await self._connection.send(
934 cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
935 )
936
937 async def commit(self, *, event_id: str | Omit = omit) -> None:
938 """
939 Send this event to commit the user input audio buffer, which will create a new user message item in the conversation. This event will produce an error if the input audio buffer is empty. When in Server VAD mode, the client does not need to send this event, the server will commit the audio buffer automatically.
940
941 Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model. The server will respond with an `input_audio_buffer.committed` event.
942 """
943 await self._connection.send(
944 cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
945 )
946
947 async def append(self, *, audio: str, event_id: str | Omit = omit) -> None:
948 """Send this event to append audio bytes to the input audio buffer.
949
950 The audio
951 buffer is temporary storage you can write to and later commit. A "commit" will create a new
952 user message item in the conversation history from the buffer content and clear the buffer.
953 Input audio transcription (if enabled) will be generated when the buffer is committed.
954
955 If VAD is enabled the audio buffer is used to detect speech and the server will decide
956 when to commit. When Server VAD is disabled, you must commit the audio buffer
957 manually. Input audio noise reduction operates on writes to the audio buffer.
958
959 The client may choose how much audio to place in each event up to a maximum
960 of 15 MiB, for example streaming smaller chunks from the client may allow the
961 VAD to be more responsive. Unlike most other client events, the server will
962 not send a confirmation response to this event.
963 """
964 await self._connection.send(
965 cast(
966 RealtimeClientEventParam,
967 strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
968 )
969 )
970
971
972class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
973 @cached_property
974 def item(self) -> AsyncRealtimeConversationItemResource:
975 return AsyncRealtimeConversationItemResource(self._connection)
976
977
978class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
979 async def delete(self, *, item_id: str, event_id: str | Omit = omit) -> None:
980 """Send this event when you want to remove any item from the conversation
981 history.
982
983 The server will respond with a `conversation.item.deleted` event,
984 unless the item does not exist in the conversation history, in which case the
985 server will respond with an error.
986 """
987 await self._connection.send(
988 cast(
989 RealtimeClientEventParam,
990 strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
991 )
992 )
993
994 async def create(
995 self, *, item: ConversationItemParam, event_id: str | Omit = omit, previous_item_id: str | Omit = omit
996 ) -> None:
997 """
998 Add a new Item to the Conversation's context, including messages, function
999 calls, and function call responses. This event can be used both to populate a
1000 "history" of the conversation and to add new items mid-stream, but has the
1001 current limitation that it cannot populate assistant audio messages.
1002
1003 If successful, the server will respond with a `conversation.item.created`
1004 event, otherwise an `error` event will be sent.
1005 """
1006 await self._connection.send(
1007 cast(
1008 RealtimeClientEventParam,
1009 strip_not_given(
1010 {
1011 "type": "conversation.item.create",
1012 "item": item,
1013 "event_id": event_id,
1014 "previous_item_id": previous_item_id,
1015 }
1016 ),
1017 )
1018 )
1019
1020 async def truncate(
1021 self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | Omit = omit
1022 ) -> None:
1023 """Send this event to truncate a previous assistant message’s audio.
1024
1025 The server
1026 will produce audio faster than realtime, so this event is useful when the user
1027 interrupts to truncate audio that has already been sent to the client but not
1028 yet played. This will synchronize the server's understanding of the audio with
1029 the client's playback.
1030
1031 Truncating audio will delete the server-side text transcript to ensure there
1032 is not text in the context that hasn't been heard by the user.
1033
1034 If successful, the server will respond with a `conversation.item.truncated`
1035 event.
1036 """
1037 await self._connection.send(
1038 cast(
1039 RealtimeClientEventParam,
1040 strip_not_given(
1041 {
1042 "type": "conversation.item.truncate",
1043 "audio_end_ms": audio_end_ms,
1044 "content_index": content_index,
1045 "item_id": item_id,
1046 "event_id": event_id,
1047 }
1048 ),
1049 )
1050 )
1051
1052 async def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None:
1053 """
1054 Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
1055 The server will respond with a `conversation.item.retrieved` event,
1056 unless the item does not exist in the conversation history, in which case the
1057 server will respond with an error.
1058 """
1059 await self._connection.send(
1060 cast(
1061 RealtimeClientEventParam,
1062 strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
1063 )
1064 )
1065
1066
1067class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
1068 async def clear(self, *, event_id: str | Omit = omit) -> None:
1069 """**WebRTC/SIP Only:** Emit to cut off the current audio response.
1070
1071 This will trigger the server to
1072 stop generating audio and emit a `output_audio_buffer.cleared` event. This
1073 event should be preceded by a `response.cancel` client event to stop the
1074 generation of the current response.
1075 [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
1076 """
1077 await self._connection.send(
1078 cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
1079 )