main
1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3from __future__ import annotations
4
5from typing import List
6from typing_extensions import Literal
7
8import httpx
9
10from .... import _legacy_response
11from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
12from ...._utils import maybe_transform, async_maybe_transform
13from ...._compat import cached_property
14from ...._resource import SyncAPIResource, AsyncAPIResource
15from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
16from ...._base_client import make_request_options
17from ....types.beta.realtime import transcription_session_create_params
18from ....types.beta.realtime.transcription_session import TranscriptionSession
19
20__all__ = ["TranscriptionSessions", "AsyncTranscriptionSessions"]
21
22
23class TranscriptionSessions(SyncAPIResource):
24 @cached_property
25 def with_raw_response(self) -> TranscriptionSessionsWithRawResponse:
26 """
27 This property can be used as a prefix for any HTTP method call to return
28 the raw response object instead of the parsed content.
29
30 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
31 """
32 return TranscriptionSessionsWithRawResponse(self)
33
34 @cached_property
35 def with_streaming_response(self) -> TranscriptionSessionsWithStreamingResponse:
36 """
37 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
38
39 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
40 """
41 return TranscriptionSessionsWithStreamingResponse(self)
42
43 def create(
44 self,
45 *,
46 client_secret: transcription_session_create_params.ClientSecret | NotGiven = NOT_GIVEN,
47 include: List[str] | NotGiven = NOT_GIVEN,
48 input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
49 input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction
50 | NotGiven = NOT_GIVEN,
51 input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
52 modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
53 turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
54 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
55 # The extra values given here take precedence over values defined on the client or passed to this method.
56 extra_headers: Headers | None = None,
57 extra_query: Query | None = None,
58 extra_body: Body | None = None,
59 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
60 ) -> TranscriptionSession:
61 """
62 Create an ephemeral API token for use in client-side applications with the
63 Realtime API specifically for realtime transcriptions. Can be configured with
64 the same session parameters as the `transcription_session.update` client event.
65
66 It responds with a session object, plus a `client_secret` key which contains a
67 usable ephemeral API token that can be used to authenticate browser clients for
68 the Realtime API.
69
70 Args:
71 client_secret: Configuration options for the generated client secret.
72
73 include:
74 The set of items to include in the transcription. Current available items are:
75
76 - `item.input_audio_transcription.logprobs`
77
78 input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
79 `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
80 (mono), and little-endian byte order.
81
82 input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
83 off. Noise reduction filters audio added to the input audio buffer before it is
84 sent to VAD and the model. Filtering the audio can improve VAD and turn
85 detection accuracy (reducing false positives) and model performance by improving
86 perception of the input audio.
87
88 input_audio_transcription: Configuration for input audio transcription. The client can optionally set the
89 language and prompt for transcription, these offer additional guidance to the
90 transcription service.
91
92 modalities: The set of modalities the model can respond with. To disable audio, set this to
93 ["text"].
94
95 turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
96 set to `null` to turn off, in which case the client must manually trigger model
97 response. Server VAD means that the model will detect the start and end of
98 speech based on audio volume and respond at the end of user speech. Semantic VAD
99 is more advanced and uses a turn detection model (in conjunction with VAD) to
100 semantically estimate whether the user has finished speaking, then dynamically
101 sets a timeout based on this probability. For example, if user audio trails off
102 with "uhhm", the model will score a low probability of turn end and wait longer
103 for the user to continue speaking. This can be useful for more natural
104 conversations, but may have a higher latency.
105
106 extra_headers: Send extra headers
107
108 extra_query: Add additional query parameters to the request
109
110 extra_body: Add additional JSON properties to the request
111
112 timeout: Override the client-level default timeout for this request, in seconds
113 """
114 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
115 return self._post(
116 "/realtime/transcription_sessions",
117 body=maybe_transform(
118 {
119 "client_secret": client_secret,
120 "include": include,
121 "input_audio_format": input_audio_format,
122 "input_audio_noise_reduction": input_audio_noise_reduction,
123 "input_audio_transcription": input_audio_transcription,
124 "modalities": modalities,
125 "turn_detection": turn_detection,
126 },
127 transcription_session_create_params.TranscriptionSessionCreateParams,
128 ),
129 options=make_request_options(
130 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
131 ),
132 cast_to=TranscriptionSession,
133 )
134
135
136class AsyncTranscriptionSessions(AsyncAPIResource):
137 @cached_property
138 def with_raw_response(self) -> AsyncTranscriptionSessionsWithRawResponse:
139 """
140 This property can be used as a prefix for any HTTP method call to return
141 the raw response object instead of the parsed content.
142
143 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
144 """
145 return AsyncTranscriptionSessionsWithRawResponse(self)
146
147 @cached_property
148 def with_streaming_response(self) -> AsyncTranscriptionSessionsWithStreamingResponse:
149 """
150 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
151
152 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
153 """
154 return AsyncTranscriptionSessionsWithStreamingResponse(self)
155
156 async def create(
157 self,
158 *,
159 client_secret: transcription_session_create_params.ClientSecret | NotGiven = NOT_GIVEN,
160 include: List[str] | NotGiven = NOT_GIVEN,
161 input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
162 input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction
163 | NotGiven = NOT_GIVEN,
164 input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
165 modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
166 turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
167 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
168 # The extra values given here take precedence over values defined on the client or passed to this method.
169 extra_headers: Headers | None = None,
170 extra_query: Query | None = None,
171 extra_body: Body | None = None,
172 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
173 ) -> TranscriptionSession:
174 """
175 Create an ephemeral API token for use in client-side applications with the
176 Realtime API specifically for realtime transcriptions. Can be configured with
177 the same session parameters as the `transcription_session.update` client event.
178
179 It responds with a session object, plus a `client_secret` key which contains a
180 usable ephemeral API token that can be used to authenticate browser clients for
181 the Realtime API.
182
183 Args:
184 client_secret: Configuration options for the generated client secret.
185
186 include:
187 The set of items to include in the transcription. Current available items are:
188
189 - `item.input_audio_transcription.logprobs`
190
191 input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
192 `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
193 (mono), and little-endian byte order.
194
195 input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
196 off. Noise reduction filters audio added to the input audio buffer before it is
197 sent to VAD and the model. Filtering the audio can improve VAD and turn
198 detection accuracy (reducing false positives) and model performance by improving
199 perception of the input audio.
200
201 input_audio_transcription: Configuration for input audio transcription. The client can optionally set the
202 language and prompt for transcription, these offer additional guidance to the
203 transcription service.
204
205 modalities: The set of modalities the model can respond with. To disable audio, set this to
206 ["text"].
207
208 turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
209 set to `null` to turn off, in which case the client must manually trigger model
210 response. Server VAD means that the model will detect the start and end of
211 speech based on audio volume and respond at the end of user speech. Semantic VAD
212 is more advanced and uses a turn detection model (in conjunction with VAD) to
213 semantically estimate whether the user has finished speaking, then dynamically
214 sets a timeout based on this probability. For example, if user audio trails off
215 with "uhhm", the model will score a low probability of turn end and wait longer
216 for the user to continue speaking. This can be useful for more natural
217 conversations, but may have a higher latency.
218
219 extra_headers: Send extra headers
220
221 extra_query: Add additional query parameters to the request
222
223 extra_body: Add additional JSON properties to the request
224
225 timeout: Override the client-level default timeout for this request, in seconds
226 """
227 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
228 return await self._post(
229 "/realtime/transcription_sessions",
230 body=await async_maybe_transform(
231 {
232 "client_secret": client_secret,
233 "include": include,
234 "input_audio_format": input_audio_format,
235 "input_audio_noise_reduction": input_audio_noise_reduction,
236 "input_audio_transcription": input_audio_transcription,
237 "modalities": modalities,
238 "turn_detection": turn_detection,
239 },
240 transcription_session_create_params.TranscriptionSessionCreateParams,
241 ),
242 options=make_request_options(
243 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
244 ),
245 cast_to=TranscriptionSession,
246 )
247
248
249class TranscriptionSessionsWithRawResponse:
250 def __init__(self, transcription_sessions: TranscriptionSessions) -> None:
251 self._transcription_sessions = transcription_sessions
252
253 self.create = _legacy_response.to_raw_response_wrapper(
254 transcription_sessions.create,
255 )
256
257
258class AsyncTranscriptionSessionsWithRawResponse:
259 def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None:
260 self._transcription_sessions = transcription_sessions
261
262 self.create = _legacy_response.async_to_raw_response_wrapper(
263 transcription_sessions.create,
264 )
265
266
267class TranscriptionSessionsWithStreamingResponse:
268 def __init__(self, transcription_sessions: TranscriptionSessions) -> None:
269 self._transcription_sessions = transcription_sessions
270
271 self.create = to_streamed_response_wrapper(
272 transcription_sessions.create,
273 )
274
275
276class AsyncTranscriptionSessionsWithStreamingResponse:
277 def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None:
278 self._transcription_sessions = transcription_sessions
279
280 self.create = async_to_streamed_response_wrapper(
281 transcription_sessions.create,
282 )