main
1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3from typing import List, Union, Optional
4from typing_extensions import Literal
5
6from ..._models import BaseModel
7from ..shared.metadata import Metadata
8from .conversation_item import ConversationItem
9from .realtime_audio_formats import RealtimeAudioFormats
10from .realtime_response_usage import RealtimeResponseUsage
11from .realtime_response_status import RealtimeResponseStatus
12
13__all__ = ["RealtimeResponse", "Audio", "AudioOutput"]
14
15
16class AudioOutput(BaseModel):
17 format: Optional[RealtimeAudioFormats] = None
18 """The format of the output audio."""
19
20 voice: Union[
21 str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"], None
22 ] = None
23 """The voice the model uses to respond.
24
25 Voice cannot be changed during the session once the model has responded with
26 audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
27 `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`. We recommend
28 `marin` and `cedar` for best quality.
29 """
30
31
32class Audio(BaseModel):
33 output: Optional[AudioOutput] = None
34
35
36class RealtimeResponse(BaseModel):
37 id: Optional[str] = None
38 """The unique ID of the response, will look like `resp_1234`."""
39
40 audio: Optional[Audio] = None
41 """Configuration for audio output."""
42
43 conversation_id: Optional[str] = None
44 """
45 Which conversation the response is added to, determined by the `conversation`
46 field in the `response.create` event. If `auto`, the response will be added to
47 the default conversation and the value of `conversation_id` will be an id like
48 `conv_1234`. If `none`, the response will not be added to any conversation and
49 the value of `conversation_id` will be `null`. If responses are being triggered
50 automatically by VAD the response will be added to the default conversation
51 """
52
53 max_output_tokens: Union[int, Literal["inf"], None] = None
54 """
55 Maximum number of output tokens for a single assistant response, inclusive of
56 tool calls, that was used in this response.
57 """
58
59 metadata: Optional[Metadata] = None
60 """Set of 16 key-value pairs that can be attached to an object.
61
62 This can be useful for storing additional information about the object in a
63 structured format, and querying for objects via API or the dashboard.
64
65 Keys are strings with a maximum length of 64 characters. Values are strings with
66 a maximum length of 512 characters.
67 """
68
69 object: Optional[Literal["realtime.response"]] = None
70 """The object type, must be `realtime.response`."""
71
72 output: Optional[List[ConversationItem]] = None
73 """The list of output items generated by the response."""
74
75 output_modalities: Optional[List[Literal["text", "audio"]]] = None
76 """
77 The set of modalities the model used to respond, currently the only possible
78 values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text
79 transcript. Setting the output to mode `text` will disable audio output from the
80 model.
81 """
82
83 status: Optional[Literal["completed", "cancelled", "failed", "incomplete", "in_progress"]] = None
84 """
85 The final status of the response (`completed`, `cancelled`, `failed`, or
86 `incomplete`, `in_progress`).
87 """
88
89 status_details: Optional[RealtimeResponseStatus] = None
90 """Additional details about the status."""
91
92 usage: Optional[RealtimeResponseUsage] = None
93 """Usage statistics for the Response, this will correspond to billing.
94
95 A Realtime API session will maintain a conversation context and append new Items
96 to the Conversation, thus output from previous turns (text and audio tokens)
97 will become the input for later turns.
98 """