Commit 847ff0b8
Changed files (29)
src
openai
resources
realtime
types
realtime
src/openai/resources/realtime/realtime.py
@@ -32,7 +32,7 @@ from .client_secrets import (
ClientSecretsWithStreamingResponse,
AsyncClientSecretsWithStreamingResponse,
)
-from ...types.realtime import session_update_event_param, transcription_session_update_param
+from ...types.realtime import session_update_event_param
from ...types.websocket_connection_options import WebsocketConnectionOptions
from ...types.realtime.realtime_client_event import RealtimeClientEvent
from ...types.realtime.realtime_server_event import RealtimeServerEvent
@@ -199,7 +199,6 @@ class AsyncRealtimeConnection:
input_audio_buffer: AsyncRealtimeInputAudioBufferResource
conversation: AsyncRealtimeConversationResource
output_audio_buffer: AsyncRealtimeOutputAudioBufferResource
- transcription_session: AsyncRealtimeTranscriptionSessionResource
_connection: AsyncWebsocketConnection
@@ -211,7 +210,6 @@ class AsyncRealtimeConnection:
self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
self.conversation = AsyncRealtimeConversationResource(self)
self.output_audio_buffer = AsyncRealtimeOutputAudioBufferResource(self)
- self.transcription_session = AsyncRealtimeTranscriptionSessionResource(self)
async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
"""
@@ -381,7 +379,6 @@ class RealtimeConnection:
input_audio_buffer: RealtimeInputAudioBufferResource
conversation: RealtimeConversationResource
output_audio_buffer: RealtimeOutputAudioBufferResource
- transcription_session: RealtimeTranscriptionSessionResource
_connection: WebsocketConnection
@@ -393,7 +390,6 @@ class RealtimeConnection:
self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
self.conversation = RealtimeConversationResource(self)
self.output_audio_buffer = RealtimeOutputAudioBufferResource(self)
- self.transcription_session = RealtimeTranscriptionSessionResource(self)
def __iter__(self) -> Iterator[RealtimeServerEvent]:
"""
@@ -565,8 +561,7 @@ class RealtimeSessionResource(BaseRealtimeConnectionResource):
"""
Send this event to update the session’s configuration.
The client may send this event at any time to update any field
- except for `voice` and `model`. `voice` can be updated only if there have been no other
- audio outputs yet.
+ except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
When the server receives a `session.update`, it will respond
with a `session.updated` event showing the full, effective configuration.
@@ -800,19 +795,6 @@ class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
)
-class RealtimeTranscriptionSessionResource(BaseRealtimeConnectionResource):
- def update(
- self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
- ) -> None:
- """Send this event to update a transcription session."""
- self._connection.send(
- cast(
- RealtimeClientEventParam,
- strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
- )
- )
-
-
class BaseAsyncRealtimeConnectionResource:
def __init__(self, connection: AsyncRealtimeConnection) -> None:
self._connection = connection
@@ -825,8 +807,7 @@ class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
"""
Send this event to update the session’s configuration.
The client may send this event at any time to update any field
- except for `voice` and `model`. `voice` can be updated only if there have been no other
- audio outputs yet.
+ except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
When the server receives a `session.update`, it will respond
with a `session.updated` event showing the full, effective configuration.
@@ -1058,16 +1039,3 @@ class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource
await self._connection.send(
cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
)
-
-
-class AsyncRealtimeTranscriptionSessionResource(BaseAsyncRealtimeConnectionResource):
- async def update(
- self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
- ) -> None:
- """Send this event to update a transcription session."""
- await self._connection.send(
- cast(
- RealtimeClientEventParam,
- strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
- )
- )
src/openai/types/realtime/__init__.py
@@ -2,8 +2,6 @@
from __future__ import annotations
-from .models import Models as Models
-from .models_param import ModelsParam as ModelsParam
from .realtime_error import RealtimeError as RealtimeError
from .conversation_item import ConversationItem as ConversationItem
from .realtime_response import RealtimeResponse as RealtimeResponse
@@ -25,6 +23,7 @@ from .session_created_event import SessionCreatedEvent as SessionCreatedEvent
from .session_updated_event import SessionUpdatedEvent as SessionUpdatedEvent
from .conversation_item_done import ConversationItemDone as ConversationItemDone
from .realtime_audio_formats import RealtimeAudioFormats as RealtimeAudioFormats
+from .realtime_function_tool import RealtimeFunctionTool as RealtimeFunctionTool
from .realtime_mcp_tool_call import RealtimeMcpToolCall as RealtimeMcpToolCall
from .realtime_mcphttp_error import RealtimeMcphttpError as RealtimeMcphttpError
from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
@@ -60,15 +59,14 @@ from .response_create_event_param import ResponseCreateEventParam as ResponseCre
from .response_mcp_call_completed import ResponseMcpCallCompleted as ResponseMcpCallCompleted
from .realtime_audio_config_output import RealtimeAudioConfigOutput as RealtimeAudioConfigOutput
from .realtime_audio_formats_param import RealtimeAudioFormatsParam as RealtimeAudioFormatsParam
+from .realtime_function_tool_param import RealtimeFunctionToolParam as RealtimeFunctionToolParam
from .realtime_mcp_tool_call_param import RealtimeMcpToolCallParam as RealtimeMcpToolCallParam
from .realtime_mcphttp_error_param import RealtimeMcphttpErrorParam as RealtimeMcphttpErrorParam
-from .transcription_session_update import TranscriptionSessionUpdate as TranscriptionSessionUpdate
from .client_secret_create_response import ClientSecretCreateResponse as ClientSecretCreateResponse
from .realtime_mcp_approval_request import RealtimeMcpApprovalRequest as RealtimeMcpApprovalRequest
from .realtime_mcp_list_tools_param import RealtimeMcpListToolsParam as RealtimeMcpListToolsParam
from .realtime_tracing_config_param import RealtimeTracingConfigParam as RealtimeTracingConfigParam
from .response_mcp_call_in_progress import ResponseMcpCallInProgress as ResponseMcpCallInProgress
-from .transcription_session_created import TranscriptionSessionCreated as TranscriptionSessionCreated
from .conversation_item_create_event import ConversationItemCreateEvent as ConversationItemCreateEvent
from .conversation_item_delete_event import ConversationItemDeleteEvent as ConversationItemDeleteEvent
from .input_audio_buffer_clear_event import InputAudioBufferClearEvent as InputAudioBufferClearEvent
@@ -100,11 +98,9 @@ from .response_content_part_added_event import ResponseContentPartAddedEvent as
from .response_mcp_call_arguments_delta import ResponseMcpCallArgumentsDelta as ResponseMcpCallArgumentsDelta
from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent as InputAudioBufferCommittedEvent
from .realtime_audio_config_output_param import RealtimeAudioConfigOutputParam as RealtimeAudioConfigOutputParam
-from .transcription_session_update_param import TranscriptionSessionUpdateParam as TranscriptionSessionUpdateParam
from .realtime_audio_input_turn_detection import RealtimeAudioInputTurnDetection as RealtimeAudioInputTurnDetection
from .realtime_mcp_approval_request_param import RealtimeMcpApprovalRequestParam as RealtimeMcpApprovalRequestParam
from .realtime_truncation_retention_ratio import RealtimeTruncationRetentionRatio as RealtimeTruncationRetentionRatio
-from .transcription_session_updated_event import TranscriptionSessionUpdatedEvent as TranscriptionSessionUpdatedEvent
from .conversation_item_create_event_param import ConversationItemCreateEventParam as ConversationItemCreateEventParam
from .conversation_item_delete_event_param import ConversationItemDeleteEventParam as ConversationItemDeleteEventParam
from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam as InputAudioBufferClearEventParam
@@ -181,9 +177,6 @@ from .realtime_conversation_item_assistant_message import (
from .realtime_response_usage_output_token_details import (
RealtimeResponseUsageOutputTokenDetails as RealtimeResponseUsageOutputTokenDetails,
)
-from .realtime_transcription_session_client_secret import (
- RealtimeTranscriptionSessionClientSecret as RealtimeTranscriptionSessionClientSecret,
-)
from .response_function_call_arguments_delta_event import (
ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
)
@@ -229,9 +222,6 @@ from .conversation_item_input_audio_transcription_delta_event import (
from .conversation_item_input_audio_transcription_failed_event import (
ConversationItemInputAudioTranscriptionFailedEvent as ConversationItemInputAudioTranscriptionFailedEvent,
)
-from .realtime_transcription_session_input_audio_transcription import (
- RealtimeTranscriptionSessionInputAudioTranscription as RealtimeTranscriptionSessionInputAudioTranscription,
-)
from .realtime_transcription_session_audio_input_turn_detection import (
RealtimeTranscriptionSessionAudioInputTurnDetection as RealtimeTranscriptionSessionAudioInputTurnDetection,
)
src/openai/types/realtime/client_secret_create_response.py
@@ -1,15 +1,18 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
from typing import Union
-from typing_extensions import TypeAlias
+from typing_extensions import Annotated, TypeAlias
+from ..._utils import PropertyInfo
from ..._models import BaseModel
from .realtime_session_create_response import RealtimeSessionCreateResponse
from .realtime_transcription_session_create_response import RealtimeTranscriptionSessionCreateResponse
__all__ = ["ClientSecretCreateResponse", "Session"]
-Session: TypeAlias = Union[RealtimeSessionCreateResponse, RealtimeTranscriptionSessionCreateResponse]
+Session: TypeAlias = Annotated[
+ Union[RealtimeSessionCreateResponse, RealtimeTranscriptionSessionCreateResponse], PropertyInfo(discriminator="type")
+]
class ClientSecretCreateResponse(BaseModel):
src/openai/types/realtime/realtime_audio_input_turn_detection.py
@@ -27,7 +27,7 @@ class RealtimeAudioInputTurnDetection(BaseModel):
idle_timeout_ms: Optional[int] = None
"""
Optional idle timeout after which turn detection will auto-timeout when no
- additional audio is received.
+ additional audio is received and emits a `timeout_triggered` event.
"""
interrupt_response: Optional[bool] = None
src/openai/types/realtime/realtime_audio_input_turn_detection_param.py
@@ -27,7 +27,7 @@ class RealtimeAudioInputTurnDetectionParam(TypedDict, total=False):
idle_timeout_ms: Optional[int]
"""
Optional idle timeout after which turn detection will auto-timeout when no
- additional audio is received.
+ additional audio is received and emits a `timeout_triggered` event.
"""
interrupt_response: bool
src/openai/types/realtime/realtime_client_event.py
@@ -7,7 +7,6 @@ from ..._utils import PropertyInfo
from .session_update_event import SessionUpdateEvent
from .response_cancel_event import ResponseCancelEvent
from .response_create_event import ResponseCreateEvent
-from .transcription_session_update import TranscriptionSessionUpdate
from .conversation_item_create_event import ConversationItemCreateEvent
from .conversation_item_delete_event import ConversationItemDeleteEvent
from .input_audio_buffer_clear_event import InputAudioBufferClearEvent
@@ -32,7 +31,6 @@ RealtimeClientEvent: TypeAlias = Annotated[
ResponseCancelEvent,
ResponseCreateEvent,
SessionUpdateEvent,
- TranscriptionSessionUpdate,
],
PropertyInfo(discriminator="type"),
]
src/openai/types/realtime/realtime_client_event_param.py
@@ -8,7 +8,6 @@ from typing_extensions import TypeAlias
from .session_update_event_param import SessionUpdateEventParam
from .response_cancel_event_param import ResponseCancelEventParam
from .response_create_event_param import ResponseCreateEventParam
-from .transcription_session_update_param import TranscriptionSessionUpdateParam
from .conversation_item_create_event_param import ConversationItemCreateEventParam
from .conversation_item_delete_event_param import ConversationItemDeleteEventParam
from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam
@@ -32,5 +31,4 @@ RealtimeClientEventParam: TypeAlias = Union[
ResponseCancelEventParam,
ResponseCreateEventParam,
SessionUpdateEventParam,
- TranscriptionSessionUpdateParam,
]
src/openai/types/realtime/models.py → src/openai/types/realtime/realtime_function_tool.py
@@ -5,10 +5,10 @@ from typing_extensions import Literal
from ..._models import BaseModel
-__all__ = ["Models"]
+__all__ = ["RealtimeFunctionTool"]
-class Models(BaseModel):
+class RealtimeFunctionTool(BaseModel):
description: Optional[str] = None
"""
The description of the function, including guidance on when and how to call it,
src/openai/types/realtime/models_param.py → src/openai/types/realtime/realtime_function_tool_param.py
@@ -4,10 +4,10 @@ from __future__ import annotations
from typing_extensions import Literal, TypedDict
-__all__ = ["ModelsParam"]
+__all__ = ["RealtimeFunctionToolParam"]
-class ModelsParam(TypedDict, total=False):
+class RealtimeFunctionToolParam(TypedDict, total=False):
description: str
"""
The description of the function, including guidance on when and how to call it,
src/openai/types/realtime/realtime_response_create_params.py
@@ -3,10 +3,10 @@
from typing import List, Union, Optional
from typing_extensions import Literal, TypeAlias
-from .models import Models
from ..._models import BaseModel
from ..shared.metadata import Metadata
from .conversation_item import ConversationItem
+from .realtime_function_tool import RealtimeFunctionTool
from ..responses.response_prompt import ResponsePrompt
from ..responses.tool_choice_mcp import ToolChoiceMcp
from ..responses.tool_choice_options import ToolChoiceOptions
@@ -18,7 +18,7 @@ __all__ = ["RealtimeResponseCreateParams", "ToolChoice", "Tool"]
ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunction, ToolChoiceMcp]
-Tool: TypeAlias = Union[Models, RealtimeResponseCreateMcpTool]
+Tool: TypeAlias = Union[RealtimeFunctionTool, RealtimeResponseCreateMcpTool]
class RealtimeResponseCreateParams(BaseModel):
src/openai/types/realtime/realtime_response_create_params_param.py
@@ -5,9 +5,9 @@ from __future__ import annotations
from typing import List, Union, Iterable, Optional
from typing_extensions import Literal, TypeAlias, TypedDict
-from .models_param import ModelsParam
from ..shared_params.metadata import Metadata
from .conversation_item_param import ConversationItemParam
+from .realtime_function_tool_param import RealtimeFunctionToolParam
from ..responses.tool_choice_options import ToolChoiceOptions
from ..responses.response_prompt_param import ResponsePromptParam
from ..responses.tool_choice_mcp_param import ToolChoiceMcpParam
@@ -19,7 +19,7 @@ __all__ = ["RealtimeResponseCreateParamsParam", "ToolChoice", "Tool"]
ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunctionParam, ToolChoiceMcpParam]
-Tool: TypeAlias = Union[ModelsParam, RealtimeResponseCreateMcpToolParam]
+Tool: TypeAlias = Union[RealtimeFunctionToolParam, RealtimeResponseCreateMcpToolParam]
class RealtimeResponseCreateParamsParam(TypedDict, total=False):
src/openai/types/realtime/realtime_server_event.py
@@ -25,7 +25,6 @@ from .mcp_list_tools_in_progress import McpListToolsInProgress
from .response_audio_delta_event import ResponseAudioDeltaEvent
from .response_mcp_call_completed import ResponseMcpCallCompleted
from .response_mcp_call_in_progress import ResponseMcpCallInProgress
-from .transcription_session_created import TranscriptionSessionCreated
from .conversation_item_created_event import ConversationItemCreatedEvent
from .conversation_item_deleted_event import ConversationItemDeletedEvent
from .response_output_item_done_event import ResponseOutputItemDoneEvent
@@ -37,7 +36,6 @@ from .conversation_item_truncated_event import ConversationItemTruncatedEvent
from .response_content_part_added_event import ResponseContentPartAddedEvent
from .response_mcp_call_arguments_delta import ResponseMcpCallArgumentsDelta
from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent
-from .transcription_session_updated_event import TranscriptionSessionUpdatedEvent
from .input_audio_buffer_timeout_triggered import InputAudioBufferTimeoutTriggered
from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent
from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent
@@ -137,8 +135,6 @@ RealtimeServerEvent: TypeAlias = Annotated[
ResponseTextDoneEvent,
SessionCreatedEvent,
SessionUpdatedEvent,
- TranscriptionSessionUpdatedEvent,
- TranscriptionSessionCreated,
OutputAudioBufferStarted,
OutputAudioBufferStopped,
OutputAudioBufferCleared,
src/openai/types/realtime/realtime_session_create_response.py
@@ -3,12 +3,12 @@
from typing import Dict, List, Union, Optional
from typing_extensions import Literal, TypeAlias
-from .models import Models
from ..._models import BaseModel
from .audio_transcription import AudioTranscription
from .realtime_truncation import RealtimeTruncation
from .noise_reduction_type import NoiseReductionType
from .realtime_audio_formats import RealtimeAudioFormats
+from .realtime_function_tool import RealtimeFunctionTool
from ..responses.response_prompt import ResponsePrompt
from ..responses.tool_choice_mcp import ToolChoiceMcp
from ..responses.tool_choice_options import ToolChoiceOptions
@@ -64,7 +64,7 @@ class AudioInputTurnDetection(BaseModel):
idle_timeout_ms: Optional[int] = None
"""
Optional idle timeout after which turn detection will auto-timeout when no
- additional audio is received.
+ additional audio is received and emits a `timeout_triggered` event.
"""
interrupt_response: Optional[bool] = None
@@ -298,7 +298,7 @@ class ToolMcpTool(BaseModel):
"""
-Tool: TypeAlias = Union[Models, ToolMcpTool]
+Tool: TypeAlias = Union[RealtimeFunctionTool, ToolMcpTool]
class TracingTracingConfiguration(BaseModel):
@@ -325,12 +325,15 @@ Tracing: TypeAlias = Union[Literal["auto"], TracingTracingConfiguration, None]
class RealtimeSessionCreateResponse(BaseModel):
+ client_secret: RealtimeSessionClientSecret
+ """Ephemeral key returned by the API."""
+
+ type: Literal["realtime"]
+ """The type of session to create. Always `realtime` for the Realtime API."""
+
audio: Optional[Audio] = None
"""Configuration for input and output audio."""
- client_secret: Optional[RealtimeSessionClientSecret] = None
- """Ephemeral key returned by the API."""
-
include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
"""Additional fields to include in server outputs.
@@ -415,6 +418,3 @@ class RealtimeSessionCreateResponse(BaseModel):
Controls how the realtime conversation is truncated prior to model inference.
The default is `auto`.
"""
-
- type: Optional[Literal["realtime"]] = None
- """The type of session to create. Always `realtime` for the Realtime API."""
src/openai/types/realtime/realtime_tools_config_param.py
@@ -6,7 +6,7 @@ from typing import Dict, List, Union, Optional
from typing_extensions import Literal, Required, TypeAlias, TypedDict
from ..._types import SequenceNotStr
-from .models_param import ModelsParam
+from .realtime_function_tool_param import RealtimeFunctionToolParam
__all__ = [
"RealtimeToolsConfigParam",
@@ -138,6 +138,6 @@ class Mcp(TypedDict, total=False):
"""
-RealtimeToolsConfigUnionParam: TypeAlias = Union[ModelsParam, Mcp]
+RealtimeToolsConfigUnionParam: TypeAlias = Union[RealtimeFunctionToolParam, Mcp]
RealtimeToolsConfigParam: TypeAlias = List[RealtimeToolsConfigUnionParam]
src/openai/types/realtime/realtime_tools_config_union.py
@@ -3,9 +3,9 @@
from typing import Dict, List, Union, Optional
from typing_extensions import Literal, Annotated, TypeAlias
-from .models import Models
from ..._utils import PropertyInfo
from ..._models import BaseModel
+from .realtime_function_tool import RealtimeFunctionTool
__all__ = [
"RealtimeToolsConfigUnion",
@@ -138,4 +138,4 @@ class Mcp(BaseModel):
"""
-RealtimeToolsConfigUnion: TypeAlias = Annotated[Union[Models, Mcp], PropertyInfo(discriminator="type")]
+RealtimeToolsConfigUnion: TypeAlias = Annotated[Union[RealtimeFunctionTool, Mcp], PropertyInfo(discriminator="type")]
src/openai/types/realtime/realtime_tools_config_union_param.py
@@ -6,7 +6,7 @@ from typing import Dict, Union, Optional
from typing_extensions import Literal, Required, TypeAlias, TypedDict
from ..._types import SequenceNotStr
-from .models_param import ModelsParam
+from .realtime_function_tool_param import RealtimeFunctionToolParam
__all__ = [
"RealtimeToolsConfigUnionParam",
@@ -137,4 +137,4 @@ class Mcp(TypedDict, total=False):
"""
-RealtimeToolsConfigUnionParam: TypeAlias = Union[ModelsParam, Mcp]
+RealtimeToolsConfigUnionParam: TypeAlias = Union[RealtimeFunctionToolParam, Mcp]
src/openai/types/realtime/realtime_transcription_session_client_secret.py
@@ -1,20 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ..._models import BaseModel
-
-__all__ = ["RealtimeTranscriptionSessionClientSecret"]
-
-
-class RealtimeTranscriptionSessionClientSecret(BaseModel):
- expires_at: int
- """Timestamp for when the token expires.
-
- Currently, all tokens expire after one minute.
- """
-
- value: str
- """
- Ephemeral key usable in client environments to authenticate connections to the
- Realtime API. Use this in client-side environments rather than a standard API
- token, which should only be used server-side.
- """
src/openai/types/realtime/realtime_transcription_session_create_response.py
@@ -4,33 +4,32 @@ from typing import List, Optional
from typing_extensions import Literal
from ..._models import BaseModel
-from .realtime_transcription_session_client_secret import RealtimeTranscriptionSessionClientSecret
+from .audio_transcription import AudioTranscription
+from .noise_reduction_type import NoiseReductionType
+from .realtime_audio_formats import RealtimeAudioFormats
from .realtime_transcription_session_turn_detection import RealtimeTranscriptionSessionTurnDetection
-from .realtime_transcription_session_input_audio_transcription import (
- RealtimeTranscriptionSessionInputAudioTranscription,
-)
-__all__ = ["RealtimeTranscriptionSessionCreateResponse"]
+__all__ = ["RealtimeTranscriptionSessionCreateResponse", "Audio", "AudioInput", "AudioInputNoiseReduction"]
-class RealtimeTranscriptionSessionCreateResponse(BaseModel):
- client_secret: RealtimeTranscriptionSessionClientSecret
- """Ephemeral key returned by the API.
+class AudioInputNoiseReduction(BaseModel):
+ type: Optional[NoiseReductionType] = None
+ """Type of noise reduction.
- Only present when the session is created on the server via REST API.
+ `near_field` is for close-talking microphones such as headphones, `far_field` is
+ for far-field microphones such as laptop or conference room microphones.
"""
- input_audio_format: Optional[str] = None
- """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
- input_audio_transcription: Optional[RealtimeTranscriptionSessionInputAudioTranscription] = None
- """Configuration of the transcription model."""
+class AudioInput(BaseModel):
+ format: Optional[RealtimeAudioFormats] = None
+ """The PCM audio format. Only a 24kHz sample rate is supported."""
- modalities: Optional[List[Literal["text", "audio"]]] = None
- """The set of modalities the model can respond with.
+ noise_reduction: Optional[AudioInputNoiseReduction] = None
+ """Configuration for input audio noise reduction."""
- To disable audio, set this to ["text"].
- """
+ transcription: Optional[AudioTranscription] = None
+ """Configuration of the transcription model."""
turn_detection: Optional[RealtimeTranscriptionSessionTurnDetection] = None
"""Configuration for turn detection.
@@ -39,3 +38,31 @@ class RealtimeTranscriptionSessionCreateResponse(BaseModel):
the start and end of speech based on audio volume and respond at the end of user
speech.
"""
+
+
+class Audio(BaseModel):
+ input: Optional[AudioInput] = None
+
+
+class RealtimeTranscriptionSessionCreateResponse(BaseModel):
+ id: str
+ """Unique identifier for the session that looks like `sess_1234567890abcdef`."""
+
+ object: str
+ """The object type. Always `realtime.transcription_session`."""
+
+ type: Literal["transcription"]
+ """The type of session. Always `transcription` for transcription sessions."""
+
+ audio: Optional[Audio] = None
+ """Configuration for input audio for the session."""
+
+ expires_at: Optional[int] = None
+ """Expiration timestamp for the session, in seconds since epoch."""
+
+ include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
+ """Additional fields to include in server outputs.
+
+ - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+ transcription.
+ """
src/openai/types/realtime/realtime_transcription_session_input_audio_transcription.py
@@ -1,36 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["RealtimeTranscriptionSessionInputAudioTranscription"]
-
-
-class RealtimeTranscriptionSessionInputAudioTranscription(BaseModel):
- language: Optional[str] = None
- """The language of the input audio.
-
- Supplying the input language in
- [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
- format will improve accuracy and latency.
- """
-
- model: Optional[Literal["whisper-1", "gpt-4o-transcribe-latest", "gpt-4o-mini-transcribe", "gpt-4o-transcribe"]] = (
- None
- )
- """The model to use for transcription.
-
- Current options are `whisper-1`, `gpt-4o-transcribe-latest`,
- `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
- """
-
- prompt: Optional[str] = None
- """
- An optional text to guide the model's style or continue a previous audio
- segment. For `whisper-1`, the
- [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
- For `gpt-4o-transcribe` models, the prompt is a free text string, for example
- "expect words related to technology".
- """
src/openai/types/realtime/transcription_session_created.py
@@ -1,24 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .realtime_transcription_session_create_response import RealtimeTranscriptionSessionCreateResponse
-
-__all__ = ["TranscriptionSessionCreated"]
-
-
-class TranscriptionSessionCreated(BaseModel):
- event_id: str
- """The unique ID of the server event."""
-
- session: RealtimeTranscriptionSessionCreateResponse
- """A new Realtime transcription session configuration.
-
- When a session is created on the server via REST API, the session object also
- contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
- not present when a session is updated via the WebSocket API.
- """
-
- type: Literal["transcription_session.created"]
- """The event type, must be `transcription_session.created`."""
src/openai/types/realtime/transcription_session_update.py
@@ -1,98 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .audio_transcription import AudioTranscription
-from .noise_reduction_type import NoiseReductionType
-
-__all__ = ["TranscriptionSessionUpdate", "Session", "SessionInputAudioNoiseReduction", "SessionTurnDetection"]
-
-
-class SessionInputAudioNoiseReduction(BaseModel):
- type: Optional[NoiseReductionType] = None
- """Type of noise reduction.
-
- `near_field` is for close-talking microphones such as headphones, `far_field` is
- for far-field microphones such as laptop or conference room microphones.
- """
-
-
-class SessionTurnDetection(BaseModel):
- prefix_padding_ms: Optional[int] = None
- """Amount of audio to include before the VAD detected speech (in milliseconds).
-
- Defaults to 300ms.
- """
-
- silence_duration_ms: Optional[int] = None
- """Duration of silence to detect speech stop (in milliseconds).
-
- Defaults to 500ms. With shorter values the model will respond more quickly, but
- may jump in on short pauses from the user.
- """
-
- threshold: Optional[float] = None
- """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
-
- A higher threshold will require louder audio to activate the model, and thus
- might perform better in noisy environments.
- """
-
- type: Optional[Literal["server_vad"]] = None
- """Type of turn detection.
-
- Only `server_vad` is currently supported for transcription sessions.
- """
-
-
-class Session(BaseModel):
- include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
- """The set of items to include in the transcription.
-
- Current available items are: `item.input_audio_transcription.logprobs`
- """
-
- input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
- """The format of input audio.
-
- Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
- be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
- byte order.
- """
-
- input_audio_noise_reduction: Optional[SessionInputAudioNoiseReduction] = None
- """Configuration for input audio noise reduction.
-
- This can be set to `null` to turn off. Noise reduction filters audio added to
- the input audio buffer before it is sent to VAD and the model. Filtering the
- audio can improve VAD and turn detection accuracy (reducing false positives) and
- model performance by improving perception of the input audio.
- """
-
- input_audio_transcription: Optional[AudioTranscription] = None
- """Configuration for input audio transcription.
-
- The client can optionally set the language and prompt for transcription, these
- offer additional guidance to the transcription service.
- """
-
- turn_detection: Optional[SessionTurnDetection] = None
- """Configuration for turn detection.
-
- Can be set to `null` to turn off. Server VAD means that the model will detect
- the start and end of speech based on audio volume and respond at the end of user
- speech.
- """
-
-
-class TranscriptionSessionUpdate(BaseModel):
- session: Session
- """Realtime transcription session object configuration."""
-
- type: Literal["transcription_session.update"]
- """The event type, must be `transcription_session.update`."""
-
- event_id: Optional[str] = None
- """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/transcription_session_update_param.py
@@ -1,99 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List
-from typing_extensions import Literal, Required, TypedDict
-
-from .noise_reduction_type import NoiseReductionType
-from .audio_transcription_param import AudioTranscriptionParam
-
-__all__ = ["TranscriptionSessionUpdateParam", "Session", "SessionInputAudioNoiseReduction", "SessionTurnDetection"]
-
-
-class SessionInputAudioNoiseReduction(TypedDict, total=False):
- type: NoiseReductionType
- """Type of noise reduction.
-
- `near_field` is for close-talking microphones such as headphones, `far_field` is
- for far-field microphones such as laptop or conference room microphones.
- """
-
-
-class SessionTurnDetection(TypedDict, total=False):
- prefix_padding_ms: int
- """Amount of audio to include before the VAD detected speech (in milliseconds).
-
- Defaults to 300ms.
- """
-
- silence_duration_ms: int
- """Duration of silence to detect speech stop (in milliseconds).
-
- Defaults to 500ms. With shorter values the model will respond more quickly, but
- may jump in on short pauses from the user.
- """
-
- threshold: float
- """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
-
- A higher threshold will require louder audio to activate the model, and thus
- might perform better in noisy environments.
- """
-
- type: Literal["server_vad"]
- """Type of turn detection.
-
- Only `server_vad` is currently supported for transcription sessions.
- """
-
-
-class Session(TypedDict, total=False):
- include: List[Literal["item.input_audio_transcription.logprobs"]]
- """The set of items to include in the transcription.
-
- Current available items are: `item.input_audio_transcription.logprobs`
- """
-
- input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
- """The format of input audio.
-
- Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
- be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
- byte order.
- """
-
- input_audio_noise_reduction: SessionInputAudioNoiseReduction
- """Configuration for input audio noise reduction.
-
- This can be set to `null` to turn off. Noise reduction filters audio added to
- the input audio buffer before it is sent to VAD and the model. Filtering the
- audio can improve VAD and turn detection accuracy (reducing false positives) and
- model performance by improving perception of the input audio.
- """
-
- input_audio_transcription: AudioTranscriptionParam
- """Configuration for input audio transcription.
-
- The client can optionally set the language and prompt for transcription, these
- offer additional guidance to the transcription service.
- """
-
- turn_detection: SessionTurnDetection
- """Configuration for turn detection.
-
- Can be set to `null` to turn off. Server VAD means that the model will detect
- the start and end of speech based on audio volume and respond at the end of user
- speech.
- """
-
-
-class TranscriptionSessionUpdateParam(TypedDict, total=False):
- session: Required[Session]
- """Realtime transcription session object configuration."""
-
- type: Required[Literal["transcription_session.update"]]
- """The event type, must be `transcription_session.update`."""
-
- event_id: str
- """Optional client-generated ID used to identify this event."""
src/openai/types/realtime/transcription_session_updated_event.py
@@ -1,24 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .realtime_transcription_session_create_response import RealtimeTranscriptionSessionCreateResponse
-
-__all__ = ["TranscriptionSessionUpdatedEvent"]
-
-
-class TranscriptionSessionUpdatedEvent(BaseModel):
- event_id: str
- """The unique ID of the server event."""
-
- session: RealtimeTranscriptionSessionCreateResponse
- """A new Realtime transcription session configuration.
-
- When a session is created on the server via REST API, the session object also
- contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
- not present when a session is updated via the WebSocket API.
- """
-
- type: Literal["transcription_session.updated"]
- """The event type, must be `transcription_session.updated`."""
src/openai/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "openai"
-__version__ = "1.107.0" # x-release-please-version
+__version__ = "1.107.1" # x-release-please-version
.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "1.107.0"
+ ".": "1.107.1"
}
\ No newline at end of file
.stats.yml
@@ -1,4 +1,4 @@
configured_endpoints: 118
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-7807ec6037efcee1af7decbfd3974a42b761fb6c6a71b4050fe43484d7fcbac4.yml
-openapi_spec_hash: da6851e3891ad2659a50ed6a736fd32a
-config_hash: 74d955cdc2377213f5268ea309090f6c
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-16cb18bed32bae8c5840fb39a1bf664026cc40463ad0c487dcb0df1bd3d72db0.yml
+openapi_spec_hash: 4cb51b22f98dee1a90bc7add82d1d132
+config_hash: 930dac3aa861344867e4ac84f037b5df
api.md
@@ -892,7 +892,6 @@ from openai.types.realtime import (
McpListToolsCompleted,
McpListToolsFailed,
McpListToolsInProgress,
- Models,
NoiseReductionType,
OutputAudioBufferClearEvent,
RateLimitsUpdatedEvent,
@@ -909,6 +908,7 @@ from openai.types.realtime import (
RealtimeConversationItemUserMessage,
RealtimeError,
RealtimeErrorEvent,
+ RealtimeFunctionTool,
RealtimeMcpApprovalRequest,
RealtimeMcpApprovalResponse,
RealtimeMcpListTools,
@@ -961,7 +961,6 @@ from openai.types.realtime import (
SessionCreatedEvent,
SessionUpdateEvent,
SessionUpdatedEvent,
- TranscriptionSessionCreated,
TranscriptionSessionUpdate,
TranscriptionSessionUpdatedEvent,
)
@@ -975,9 +974,7 @@ Types:
from openai.types.realtime import (
RealtimeSessionClientSecret,
RealtimeSessionCreateResponse,
- RealtimeTranscriptionSessionClientSecret,
RealtimeTranscriptionSessionCreateResponse,
- RealtimeTranscriptionSessionInputAudioTranscription,
RealtimeTranscriptionSessionTurnDetection,
ClientSecretCreateResponse,
)
CHANGELOG.md
@@ -1,5 +1,13 @@
# Changelog
+## 1.107.1 (2025-09-10)
+
+Full Changelog: [v1.107.0...v1.107.1](https://github.com/openai/openai-python/compare/v1.107.0...v1.107.1)
+
+### Chores
+
+* **api:** fix realtime GA types ([570fc5a](https://github.com/openai/openai-python/commit/570fc5a28ada665fd658b24675361680cfeb086f))
+
## 1.107.0 (2025-09-08)
Full Changelog: [v1.106.1...v1.107.0](https://github.com/openai/openai-python/compare/v1.106.1...v1.107.0)
pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "openai"
-version = "1.107.0"
+version = "1.107.1"
description = "The official Python library for the openai API"
dynamic = ["readme"]
license = "Apache-2.0"