Commit `847ff0b8`

stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>

2025-09-10 23:03:41

release: 1.107.1 (#2619) tag: v1.107.1

* chore(api): fix realtime GA types * release: 1.107.1 --------- Co-authored-by: stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>

main

1 parent 0296375

Changed files (29)

src

.release-please-manifest.json

@@ -32,7 +32,7 @@ from .client_secrets import (
     ClientSecretsWithStreamingResponse,
     AsyncClientSecretsWithStreamingResponse,
 )
-from ...types.realtime import session_update_event_param, transcription_session_update_param
+from ...types.realtime import session_update_event_param
 from ...types.websocket_connection_options import WebsocketConnectionOptions
 from ...types.realtime.realtime_client_event import RealtimeClientEvent
 from ...types.realtime.realtime_server_event import RealtimeServerEvent
@@ -199,7 +199,6 @@ class AsyncRealtimeConnection:
     input_audio_buffer: AsyncRealtimeInputAudioBufferResource
     conversation: AsyncRealtimeConversationResource
     output_audio_buffer: AsyncRealtimeOutputAudioBufferResource
-    transcription_session: AsyncRealtimeTranscriptionSessionResource
 
     _connection: AsyncWebsocketConnection
 
@@ -211,7 +210,6 @@ class AsyncRealtimeConnection:
         self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
         self.conversation = AsyncRealtimeConversationResource(self)
         self.output_audio_buffer = AsyncRealtimeOutputAudioBufferResource(self)
-        self.transcription_session = AsyncRealtimeTranscriptionSessionResource(self)
 
     async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
         """
@@ -381,7 +379,6 @@ class RealtimeConnection:
     input_audio_buffer: RealtimeInputAudioBufferResource
     conversation: RealtimeConversationResource
     output_audio_buffer: RealtimeOutputAudioBufferResource
-    transcription_session: RealtimeTranscriptionSessionResource
 
     _connection: WebsocketConnection
 
@@ -393,7 +390,6 @@ class RealtimeConnection:
         self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
         self.conversation = RealtimeConversationResource(self)
         self.output_audio_buffer = RealtimeOutputAudioBufferResource(self)
-        self.transcription_session = RealtimeTranscriptionSessionResource(self)
 
     def __iter__(self) -> Iterator[RealtimeServerEvent]:
         """
@@ -565,8 +561,7 @@ class RealtimeSessionResource(BaseRealtimeConnectionResource):
         """
         Send this event to update the session’s configuration.
         The client may send this event at any time to update any field
-        except for `voice` and `model`. `voice` can be updated only if there have been no other
-        audio outputs yet.
+        except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
 
         When the server receives a `session.update`, it will respond
         with a `session.updated` event showing the full, effective configuration.
@@ -800,19 +795,6 @@ class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
         )
 
 
-class RealtimeTranscriptionSessionResource(BaseRealtimeConnectionResource):
-    def update(
-        self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
-    ) -> None:
-        """Send this event to update a transcription session."""
-        self._connection.send(
-            cast(
-                RealtimeClientEventParam,
-                strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
-            )
-        )
-
-
 class BaseAsyncRealtimeConnectionResource:
     def __init__(self, connection: AsyncRealtimeConnection) -> None:
         self._connection = connection
@@ -825,8 +807,7 @@ class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
         """
         Send this event to update the session’s configuration.
         The client may send this event at any time to update any field
-        except for `voice` and `model`. `voice` can be updated only if there have been no other
-        audio outputs yet.
+        except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
 
         When the server receives a `session.update`, it will respond
         with a `session.updated` event showing the full, effective configuration.
@@ -1058,16 +1039,3 @@ class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource
         await self._connection.send(
             cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
         )
-
-
-class AsyncRealtimeTranscriptionSessionResource(BaseAsyncRealtimeConnectionResource):
-    async def update(
-        self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
-    ) -> None:
-        """Send this event to update a transcription session."""
-        await self._connection.send(
-            cast(
-                RealtimeClientEventParam,
-                strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
-            )
-        )

@@ -2,8 +2,6 @@
 
 from __future__ import annotations
 
-from .models import Models as Models
-from .models_param import ModelsParam as ModelsParam
 from .realtime_error import RealtimeError as RealtimeError
 from .conversation_item import ConversationItem as ConversationItem
 from .realtime_response import RealtimeResponse as RealtimeResponse
@@ -25,6 +23,7 @@ from .session_created_event import SessionCreatedEvent as SessionCreatedEvent
 from .session_updated_event import SessionUpdatedEvent as SessionUpdatedEvent
 from .conversation_item_done import ConversationItemDone as ConversationItemDone
 from .realtime_audio_formats import RealtimeAudioFormats as RealtimeAudioFormats
+from .realtime_function_tool import RealtimeFunctionTool as RealtimeFunctionTool
 from .realtime_mcp_tool_call import RealtimeMcpToolCall as RealtimeMcpToolCall
 from .realtime_mcphttp_error import RealtimeMcphttpError as RealtimeMcphttpError
 from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
@@ -60,15 +59,14 @@ from .response_create_event_param import ResponseCreateEventParam as ResponseCre
 from .response_mcp_call_completed import ResponseMcpCallCompleted as ResponseMcpCallCompleted
 from .realtime_audio_config_output import RealtimeAudioConfigOutput as RealtimeAudioConfigOutput
 from .realtime_audio_formats_param import RealtimeAudioFormatsParam as RealtimeAudioFormatsParam
+from .realtime_function_tool_param import RealtimeFunctionToolParam as RealtimeFunctionToolParam
 from .realtime_mcp_tool_call_param import RealtimeMcpToolCallParam as RealtimeMcpToolCallParam
 from .realtime_mcphttp_error_param import RealtimeMcphttpErrorParam as RealtimeMcphttpErrorParam
-from .transcription_session_update import TranscriptionSessionUpdate as TranscriptionSessionUpdate
 from .client_secret_create_response import ClientSecretCreateResponse as ClientSecretCreateResponse
 from .realtime_mcp_approval_request import RealtimeMcpApprovalRequest as RealtimeMcpApprovalRequest
 from .realtime_mcp_list_tools_param import RealtimeMcpListToolsParam as RealtimeMcpListToolsParam
 from .realtime_tracing_config_param import RealtimeTracingConfigParam as RealtimeTracingConfigParam
 from .response_mcp_call_in_progress import ResponseMcpCallInProgress as ResponseMcpCallInProgress
-from .transcription_session_created import TranscriptionSessionCreated as TranscriptionSessionCreated
 from .conversation_item_create_event import ConversationItemCreateEvent as ConversationItemCreateEvent
 from .conversation_item_delete_event import ConversationItemDeleteEvent as ConversationItemDeleteEvent
 from .input_audio_buffer_clear_event import InputAudioBufferClearEvent as InputAudioBufferClearEvent
@@ -100,11 +98,9 @@ from .response_content_part_added_event import ResponseContentPartAddedEvent as
 from .response_mcp_call_arguments_delta import ResponseMcpCallArgumentsDelta as ResponseMcpCallArgumentsDelta
 from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent as InputAudioBufferCommittedEvent
 from .realtime_audio_config_output_param import RealtimeAudioConfigOutputParam as RealtimeAudioConfigOutputParam
-from .transcription_session_update_param import TranscriptionSessionUpdateParam as TranscriptionSessionUpdateParam
 from .realtime_audio_input_turn_detection import RealtimeAudioInputTurnDetection as RealtimeAudioInputTurnDetection
 from .realtime_mcp_approval_request_param import RealtimeMcpApprovalRequestParam as RealtimeMcpApprovalRequestParam
 from .realtime_truncation_retention_ratio import RealtimeTruncationRetentionRatio as RealtimeTruncationRetentionRatio
-from .transcription_session_updated_event import TranscriptionSessionUpdatedEvent as TranscriptionSessionUpdatedEvent
 from .conversation_item_create_event_param import ConversationItemCreateEventParam as ConversationItemCreateEventParam
 from .conversation_item_delete_event_param import ConversationItemDeleteEventParam as ConversationItemDeleteEventParam
 from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam as InputAudioBufferClearEventParam
@@ -181,9 +177,6 @@ from .realtime_conversation_item_assistant_message import (
 from .realtime_response_usage_output_token_details import (
     RealtimeResponseUsageOutputTokenDetails as RealtimeResponseUsageOutputTokenDetails,
 )
-from .realtime_transcription_session_client_secret import (
-    RealtimeTranscriptionSessionClientSecret as RealtimeTranscriptionSessionClientSecret,
-)
 from .response_function_call_arguments_delta_event import (
     ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
 )
@@ -229,9 +222,6 @@ from .conversation_item_input_audio_transcription_delta_event import (
 from .conversation_item_input_audio_transcription_failed_event import (
     ConversationItemInputAudioTranscriptionFailedEvent as ConversationItemInputAudioTranscriptionFailedEvent,
 )
-from .realtime_transcription_session_input_audio_transcription import (
-    RealtimeTranscriptionSessionInputAudioTranscription as RealtimeTranscriptionSessionInputAudioTranscription,
-)
 from .realtime_transcription_session_audio_input_turn_detection import (
     RealtimeTranscriptionSessionAudioInputTurnDetection as RealtimeTranscriptionSessionAudioInputTurnDetection,
 )

@@ -1,15 +1,18 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import TypeAlias
+from typing_extensions import Annotated, TypeAlias
 
+from ..._utils import PropertyInfo
 from ..._models import BaseModel
 from .realtime_session_create_response import RealtimeSessionCreateResponse
 from .realtime_transcription_session_create_response import RealtimeTranscriptionSessionCreateResponse
 
 __all__ = ["ClientSecretCreateResponse", "Session"]
 
-Session: TypeAlias = Union[RealtimeSessionCreateResponse, RealtimeTranscriptionSessionCreateResponse]
+Session: TypeAlias = Annotated[
+    Union[RealtimeSessionCreateResponse, RealtimeTranscriptionSessionCreateResponse], PropertyInfo(discriminator="type")
+]
 
 
 class ClientSecretCreateResponse(BaseModel):

@@ -27,7 +27,7 @@ class RealtimeAudioInputTurnDetection(BaseModel):
     idle_timeout_ms: Optional[int] = None
     """
     Optional idle timeout after which turn detection will auto-timeout when no
-    additional audio is received.
+    additional audio is received and emits a `timeout_triggered` event.
     """
 
     interrupt_response: Optional[bool] = None

@@ -27,7 +27,7 @@ class RealtimeAudioInputTurnDetectionParam(TypedDict, total=False):
     idle_timeout_ms: Optional[int]
     """
     Optional idle timeout after which turn detection will auto-timeout when no
-    additional audio is received.
+    additional audio is received and emits a `timeout_triggered` event.
     """
 
     interrupt_response: bool

@@ -7,7 +7,6 @@ from ..._utils import PropertyInfo
 from .session_update_event import SessionUpdateEvent
 from .response_cancel_event import ResponseCancelEvent
 from .response_create_event import ResponseCreateEvent
-from .transcription_session_update import TranscriptionSessionUpdate
 from .conversation_item_create_event import ConversationItemCreateEvent
 from .conversation_item_delete_event import ConversationItemDeleteEvent
 from .input_audio_buffer_clear_event import InputAudioBufferClearEvent
@@ -32,7 +31,6 @@ RealtimeClientEvent: TypeAlias = Annotated[
         ResponseCancelEvent,
         ResponseCreateEvent,
         SessionUpdateEvent,
-        TranscriptionSessionUpdate,
     ],
     PropertyInfo(discriminator="type"),
 ]

@@ -8,7 +8,6 @@ from typing_extensions import TypeAlias
 from .session_update_event_param import SessionUpdateEventParam
 from .response_cancel_event_param import ResponseCancelEventParam
 from .response_create_event_param import ResponseCreateEventParam
-from .transcription_session_update_param import TranscriptionSessionUpdateParam
 from .conversation_item_create_event_param import ConversationItemCreateEventParam
 from .conversation_item_delete_event_param import ConversationItemDeleteEventParam
 from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam
@@ -32,5 +31,4 @@ RealtimeClientEventParam: TypeAlias = Union[
     ResponseCancelEventParam,
     ResponseCreateEventParam,
     SessionUpdateEventParam,
-    TranscriptionSessionUpdateParam,
 ]

@@ -5,10 +5,10 @@ from typing_extensions import Literal
 
 from ..._models import BaseModel
 
-__all__ = ["Models"]
+__all__ = ["RealtimeFunctionTool"]
 
 
-class Models(BaseModel):
+class RealtimeFunctionTool(BaseModel):
     description: Optional[str] = None
     """
     The description of the function, including guidance on when and how to call it,

@@ -4,10 +4,10 @@ from __future__ import annotations
 
 from typing_extensions import Literal, TypedDict
 
-__all__ = ["ModelsParam"]
+__all__ = ["RealtimeFunctionToolParam"]
 
 
-class ModelsParam(TypedDict, total=False):
+class RealtimeFunctionToolParam(TypedDict, total=False):
     description: str
     """
     The description of the function, including guidance on when and how to call it,

@@ -3,10 +3,10 @@
 from typing import List, Union, Optional
 from typing_extensions import Literal, TypeAlias
 
-from .models import Models
 from ..._models import BaseModel
 from ..shared.metadata import Metadata
 from .conversation_item import ConversationItem
+from .realtime_function_tool import RealtimeFunctionTool
 from ..responses.response_prompt import ResponsePrompt
 from ..responses.tool_choice_mcp import ToolChoiceMcp
 from ..responses.tool_choice_options import ToolChoiceOptions
@@ -18,7 +18,7 @@ __all__ = ["RealtimeResponseCreateParams", "ToolChoice", "Tool"]
 
 ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunction, ToolChoiceMcp]
 
-Tool: TypeAlias = Union[Models, RealtimeResponseCreateMcpTool]
+Tool: TypeAlias = Union[RealtimeFunctionTool, RealtimeResponseCreateMcpTool]
 
 
 class RealtimeResponseCreateParams(BaseModel):

@@ -5,9 +5,9 @@ from __future__ import annotations
 from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, TypeAlias, TypedDict
 
-from .models_param import ModelsParam
 from ..shared_params.metadata import Metadata
 from .conversation_item_param import ConversationItemParam
+from .realtime_function_tool_param import RealtimeFunctionToolParam
 from ..responses.tool_choice_options import ToolChoiceOptions
 from ..responses.response_prompt_param import ResponsePromptParam
 from ..responses.tool_choice_mcp_param import ToolChoiceMcpParam
@@ -19,7 +19,7 @@ __all__ = ["RealtimeResponseCreateParamsParam", "ToolChoice", "Tool"]
 
 ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunctionParam, ToolChoiceMcpParam]
 
-Tool: TypeAlias = Union[ModelsParam, RealtimeResponseCreateMcpToolParam]
+Tool: TypeAlias = Union[RealtimeFunctionToolParam, RealtimeResponseCreateMcpToolParam]
 
 
 class RealtimeResponseCreateParamsParam(TypedDict, total=False):

@@ -25,7 +25,6 @@ from .mcp_list_tools_in_progress import McpListToolsInProgress
 from .response_audio_delta_event import ResponseAudioDeltaEvent
 from .response_mcp_call_completed import ResponseMcpCallCompleted
 from .response_mcp_call_in_progress import ResponseMcpCallInProgress
-from .transcription_session_created import TranscriptionSessionCreated
 from .conversation_item_created_event import ConversationItemCreatedEvent
 from .conversation_item_deleted_event import ConversationItemDeletedEvent
 from .response_output_item_done_event import ResponseOutputItemDoneEvent
@@ -37,7 +36,6 @@ from .conversation_item_truncated_event import ConversationItemTruncatedEvent
 from .response_content_part_added_event import ResponseContentPartAddedEvent
 from .response_mcp_call_arguments_delta import ResponseMcpCallArgumentsDelta
 from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent
-from .transcription_session_updated_event import TranscriptionSessionUpdatedEvent
 from .input_audio_buffer_timeout_triggered import InputAudioBufferTimeoutTriggered
 from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent
 from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent
@@ -137,8 +135,6 @@ RealtimeServerEvent: TypeAlias = Annotated[
         ResponseTextDoneEvent,
         SessionCreatedEvent,
         SessionUpdatedEvent,
-        TranscriptionSessionUpdatedEvent,
-        TranscriptionSessionCreated,
         OutputAudioBufferStarted,
         OutputAudioBufferStopped,
         OutputAudioBufferCleared,

@@ -3,12 +3,12 @@
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, TypeAlias
 
-from .models import Models
 from ..._models import BaseModel
 from .audio_transcription import AudioTranscription
 from .realtime_truncation import RealtimeTruncation
 from .noise_reduction_type import NoiseReductionType
 from .realtime_audio_formats import RealtimeAudioFormats
+from .realtime_function_tool import RealtimeFunctionTool
 from ..responses.response_prompt import ResponsePrompt
 from ..responses.tool_choice_mcp import ToolChoiceMcp
 from ..responses.tool_choice_options import ToolChoiceOptions
@@ -64,7 +64,7 @@ class AudioInputTurnDetection(BaseModel):
     idle_timeout_ms: Optional[int] = None
     """
     Optional idle timeout after which turn detection will auto-timeout when no
-    additional audio is received.
+    additional audio is received and emits a `timeout_triggered` event.
     """
 
     interrupt_response: Optional[bool] = None
@@ -298,7 +298,7 @@ class ToolMcpTool(BaseModel):
     """
 
 
-Tool: TypeAlias = Union[Models, ToolMcpTool]
+Tool: TypeAlias = Union[RealtimeFunctionTool, ToolMcpTool]
 
 
 class TracingTracingConfiguration(BaseModel):
@@ -325,12 +325,15 @@ Tracing: TypeAlias = Union[Literal["auto"], TracingTracingConfiguration, None]
 
 
 class RealtimeSessionCreateResponse(BaseModel):
+    client_secret: RealtimeSessionClientSecret
+    """Ephemeral key returned by the API."""
+
+    type: Literal["realtime"]
+    """The type of session to create. Always `realtime` for the Realtime API."""
+
     audio: Optional[Audio] = None
     """Configuration for input and output audio."""
 
-    client_secret: Optional[RealtimeSessionClientSecret] = None
-    """Ephemeral key returned by the API."""
-
     include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
     """Additional fields to include in server outputs.
 
@@ -415,6 +418,3 @@ class RealtimeSessionCreateResponse(BaseModel):
     Controls how the realtime conversation is truncated prior to model inference.
     The default is `auto`.
     """
-
-    type: Optional[Literal["realtime"]] = None
-    """The type of session to create. Always `realtime` for the Realtime API."""

@@ -6,7 +6,7 @@ from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from ..._types import SequenceNotStr
-from .models_param import ModelsParam
+from .realtime_function_tool_param import RealtimeFunctionToolParam
 
 __all__ = [
     "RealtimeToolsConfigParam",
@@ -138,6 +138,6 @@ class Mcp(TypedDict, total=False):
     """
 
 
-RealtimeToolsConfigUnionParam: TypeAlias = Union[ModelsParam, Mcp]
+RealtimeToolsConfigUnionParam: TypeAlias = Union[RealtimeFunctionToolParam, Mcp]
 
 RealtimeToolsConfigParam: TypeAlias = List[RealtimeToolsConfigUnionParam]

@@ -3,9 +3,9 @@
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
-from .models import Models
 from ..._utils import PropertyInfo
 from ..._models import BaseModel
+from .realtime_function_tool import RealtimeFunctionTool
 
 __all__ = [
     "RealtimeToolsConfigUnion",
@@ -138,4 +138,4 @@ class Mcp(BaseModel):
     """
 
 
-RealtimeToolsConfigUnion: TypeAlias = Annotated[Union[Models, Mcp], PropertyInfo(discriminator="type")]
+RealtimeToolsConfigUnion: TypeAlias = Annotated[Union[RealtimeFunctionTool, Mcp], PropertyInfo(discriminator="type")]

@@ -6,7 +6,7 @@ from typing import Dict, Union, Optional
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from ..._types import SequenceNotStr
-from .models_param import ModelsParam
+from .realtime_function_tool_param import RealtimeFunctionToolParam
 
 __all__ = [
     "RealtimeToolsConfigUnionParam",
@@ -137,4 +137,4 @@ class Mcp(TypedDict, total=False):
     """
 
 
-RealtimeToolsConfigUnionParam: TypeAlias = Union[ModelsParam, Mcp]
+RealtimeToolsConfigUnionParam: TypeAlias = Union[RealtimeFunctionToolParam, Mcp]

@@ -1,20 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ..._models import BaseModel
-
-__all__ = ["RealtimeTranscriptionSessionClientSecret"]
-
-
-class RealtimeTranscriptionSessionClientSecret(BaseModel):
-    expires_at: int
-    """Timestamp for when the token expires.
-
-    Currently, all tokens expire after one minute.
-    """
-
-    value: str
-    """
-    Ephemeral key usable in client environments to authenticate connections to the
-    Realtime API. Use this in client-side environments rather than a standard API
-    token, which should only be used server-side.
-    """

@@ -4,33 +4,32 @@ from typing import List, Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
-from .realtime_transcription_session_client_secret import RealtimeTranscriptionSessionClientSecret
+from .audio_transcription import AudioTranscription
+from .noise_reduction_type import NoiseReductionType
+from .realtime_audio_formats import RealtimeAudioFormats
 from .realtime_transcription_session_turn_detection import RealtimeTranscriptionSessionTurnDetection
-from .realtime_transcription_session_input_audio_transcription import (
-    RealtimeTranscriptionSessionInputAudioTranscription,
-)
 
-__all__ = ["RealtimeTranscriptionSessionCreateResponse"]
+__all__ = ["RealtimeTranscriptionSessionCreateResponse", "Audio", "AudioInput", "AudioInputNoiseReduction"]
 
 
-class RealtimeTranscriptionSessionCreateResponse(BaseModel):
-    client_secret: RealtimeTranscriptionSessionClientSecret
-    """Ephemeral key returned by the API.
+class AudioInputNoiseReduction(BaseModel):
+    type: Optional[NoiseReductionType] = None
+    """Type of noise reduction.
 
-    Only present when the session is created on the server via REST API.
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
     """
 
-    input_audio_format: Optional[str] = None
-    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
 
-    input_audio_transcription: Optional[RealtimeTranscriptionSessionInputAudioTranscription] = None
-    """Configuration of the transcription model."""
+class AudioInput(BaseModel):
+    format: Optional[RealtimeAudioFormats] = None
+    """The PCM audio format. Only a 24kHz sample rate is supported."""
 
-    modalities: Optional[List[Literal["text", "audio"]]] = None
-    """The set of modalities the model can respond with.
+    noise_reduction: Optional[AudioInputNoiseReduction] = None
+    """Configuration for input audio noise reduction."""
 
-    To disable audio, set this to ["text"].
-    """
+    transcription: Optional[AudioTranscription] = None
+    """Configuration of the transcription model."""
 
     turn_detection: Optional[RealtimeTranscriptionSessionTurnDetection] = None
     """Configuration for turn detection.
@@ -39,3 +38,31 @@ class RealtimeTranscriptionSessionCreateResponse(BaseModel):
     the start and end of speech based on audio volume and respond at the end of user
     speech.
     """
+
+
+class Audio(BaseModel):
+    input: Optional[AudioInput] = None
+
+
+class RealtimeTranscriptionSessionCreateResponse(BaseModel):
+    id: str
+    """Unique identifier for the session that looks like `sess_1234567890abcdef`."""
+
+    object: str
+    """The object type. Always `realtime.transcription_session`."""
+
+    type: Literal["transcription"]
+    """The type of session. Always `transcription` for transcription sessions."""
+
+    audio: Optional[Audio] = None
+    """Configuration for input audio for the session."""
+
+    expires_at: Optional[int] = None
+    """Expiration timestamp for the session, in seconds since epoch."""
+
+    include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
+    """Additional fields to include in server outputs.
+
+    - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+      transcription.
+    """

@@ -1,36 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["RealtimeTranscriptionSessionInputAudioTranscription"]
-
-
-class RealtimeTranscriptionSessionInputAudioTranscription(BaseModel):
-    language: Optional[str] = None
-    """The language of the input audio.
-
-    Supplying the input language in
-    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
-    format will improve accuracy and latency.
-    """
-
-    model: Optional[Literal["whisper-1", "gpt-4o-transcribe-latest", "gpt-4o-mini-transcribe", "gpt-4o-transcribe"]] = (
-        None
-    )
-    """The model to use for transcription.
-
-    Current options are `whisper-1`, `gpt-4o-transcribe-latest`,
-    `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
-    """
-
-    prompt: Optional[str] = None
-    """
-    An optional text to guide the model's style or continue a previous audio
-    segment. For `whisper-1`, the
-    [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
-    For `gpt-4o-transcribe` models, the prompt is a free text string, for example
-    "expect words related to technology".
-    """

@@ -1,24 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .realtime_transcription_session_create_response import RealtimeTranscriptionSessionCreateResponse
-
-__all__ = ["TranscriptionSessionCreated"]
-
-
-class TranscriptionSessionCreated(BaseModel):
-    event_id: str
-    """The unique ID of the server event."""
-
-    session: RealtimeTranscriptionSessionCreateResponse
-    """A new Realtime transcription session configuration.
-
-    When a session is created on the server via REST API, the session object also
-    contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
-    not present when a session is updated via the WebSocket API.
-    """
-
-    type: Literal["transcription_session.created"]
-    """The event type, must be `transcription_session.created`."""

@@ -1,98 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .audio_transcription import AudioTranscription
-from .noise_reduction_type import NoiseReductionType
-
-__all__ = ["TranscriptionSessionUpdate", "Session", "SessionInputAudioNoiseReduction", "SessionTurnDetection"]
-
-
-class SessionInputAudioNoiseReduction(BaseModel):
-    type: Optional[NoiseReductionType] = None
-    """Type of noise reduction.
-
-    `near_field` is for close-talking microphones such as headphones, `far_field` is
-    for far-field microphones such as laptop or conference room microphones.
-    """
-
-
-class SessionTurnDetection(BaseModel):
-    prefix_padding_ms: Optional[int] = None
-    """Amount of audio to include before the VAD detected speech (in milliseconds).
-
-    Defaults to 300ms.
-    """
-
-    silence_duration_ms: Optional[int] = None
-    """Duration of silence to detect speech stop (in milliseconds).
-
-    Defaults to 500ms. With shorter values the model will respond more quickly, but
-    may jump in on short pauses from the user.
-    """
-
-    threshold: Optional[float] = None
-    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
-
-    A higher threshold will require louder audio to activate the model, and thus
-    might perform better in noisy environments.
-    """
-
-    type: Optional[Literal["server_vad"]] = None
-    """Type of turn detection.
-
-    Only `server_vad` is currently supported for transcription sessions.
-    """
-
-
-class Session(BaseModel):
-    include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
-    """The set of items to include in the transcription.
-
-    Current available items are: `item.input_audio_transcription.logprobs`
-    """
-
-    input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
-    """The format of input audio.
-
-    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
-    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
-    byte order.
-    """
-
-    input_audio_noise_reduction: Optional[SessionInputAudioNoiseReduction] = None
-    """Configuration for input audio noise reduction.
-
-    This can be set to `null` to turn off. Noise reduction filters audio added to
-    the input audio buffer before it is sent to VAD and the model. Filtering the
-    audio can improve VAD and turn detection accuracy (reducing false positives) and
-    model performance by improving perception of the input audio.
-    """
-
-    input_audio_transcription: Optional[AudioTranscription] = None
-    """Configuration for input audio transcription.
-
-    The client can optionally set the language and prompt for transcription, these
-    offer additional guidance to the transcription service.
-    """
-
-    turn_detection: Optional[SessionTurnDetection] = None
-    """Configuration for turn detection.
-
-    Can be set to `null` to turn off. Server VAD means that the model will detect
-    the start and end of speech based on audio volume and respond at the end of user
-    speech.
-    """
-
-
-class TranscriptionSessionUpdate(BaseModel):
-    session: Session
-    """Realtime transcription session object configuration."""
-
-    type: Literal["transcription_session.update"]
-    """The event type, must be `transcription_session.update`."""
-
-    event_id: Optional[str] = None
-    """Optional client-generated ID used to identify this event."""

@@ -1,99 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List
-from typing_extensions import Literal, Required, TypedDict
-
-from .noise_reduction_type import NoiseReductionType
-from .audio_transcription_param import AudioTranscriptionParam
-
-__all__ = ["TranscriptionSessionUpdateParam", "Session", "SessionInputAudioNoiseReduction", "SessionTurnDetection"]
-
-
-class SessionInputAudioNoiseReduction(TypedDict, total=False):
-    type: NoiseReductionType
-    """Type of noise reduction.
-
-    `near_field` is for close-talking microphones such as headphones, `far_field` is
-    for far-field microphones such as laptop or conference room microphones.
-    """
-
-
-class SessionTurnDetection(TypedDict, total=False):
-    prefix_padding_ms: int
-    """Amount of audio to include before the VAD detected speech (in milliseconds).
-
-    Defaults to 300ms.
-    """
-
-    silence_duration_ms: int
-    """Duration of silence to detect speech stop (in milliseconds).
-
-    Defaults to 500ms. With shorter values the model will respond more quickly, but
-    may jump in on short pauses from the user.
-    """
-
-    threshold: float
-    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
-
-    A higher threshold will require louder audio to activate the model, and thus
-    might perform better in noisy environments.
-    """
-
-    type: Literal["server_vad"]
-    """Type of turn detection.
-
-    Only `server_vad` is currently supported for transcription sessions.
-    """
-
-
-class Session(TypedDict, total=False):
-    include: List[Literal["item.input_audio_transcription.logprobs"]]
-    """The set of items to include in the transcription.
-
-    Current available items are: `item.input_audio_transcription.logprobs`
-    """
-
-    input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
-    """The format of input audio.
-
-    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
-    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
-    byte order.
-    """
-
-    input_audio_noise_reduction: SessionInputAudioNoiseReduction
-    """Configuration for input audio noise reduction.
-
-    This can be set to `null` to turn off. Noise reduction filters audio added to
-    the input audio buffer before it is sent to VAD and the model. Filtering the
-    audio can improve VAD and turn detection accuracy (reducing false positives) and
-    model performance by improving perception of the input audio.
-    """
-
-    input_audio_transcription: AudioTranscriptionParam
-    """Configuration for input audio transcription.
-
-    The client can optionally set the language and prompt for transcription, these
-    offer additional guidance to the transcription service.
-    """
-
-    turn_detection: SessionTurnDetection
-    """Configuration for turn detection.
-
-    Can be set to `null` to turn off. Server VAD means that the model will detect
-    the start and end of speech based on audio volume and respond at the end of user
-    speech.
-    """
-
-
-class TranscriptionSessionUpdateParam(TypedDict, total=False):
-    session: Required[Session]
-    """Realtime transcription session object configuration."""
-
-    type: Required[Literal["transcription_session.update"]]
-    """The event type, must be `transcription_session.update`."""
-
-    event_id: str
-    """Optional client-generated ID used to identify this event."""

@@ -1,24 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .realtime_transcription_session_create_response import RealtimeTranscriptionSessionCreateResponse
-
-__all__ = ["TranscriptionSessionUpdatedEvent"]
-
-
-class TranscriptionSessionUpdatedEvent(BaseModel):
-    event_id: str
-    """The unique ID of the server event."""
-
-    session: RealtimeTranscriptionSessionCreateResponse
-    """A new Realtime transcription session configuration.
-
-    When a session is created on the server via REST API, the session object also
-    contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
-    not present when a session is updated via the WebSocket API.
-    """
-
-    type: Literal["transcription_session.updated"]
-    """The event type, must be `transcription_session.updated`."""

@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.107.0"  # x-release-please-version
+__version__ = "1.107.1"  # x-release-please-version

@@ -1,3 +1,3 @@
 {
-  ".": "1.107.0"
+  ".": "1.107.1"
 }
\ No newline at end of file

@@ -1,4 +1,4 @@
 configured_endpoints: 118
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-7807ec6037efcee1af7decbfd3974a42b761fb6c6a71b4050fe43484d7fcbac4.yml
-openapi_spec_hash: da6851e3891ad2659a50ed6a736fd32a
-config_hash: 74d955cdc2377213f5268ea309090f6c
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-16cb18bed32bae8c5840fb39a1bf664026cc40463ad0c487dcb0df1bd3d72db0.yml
+openapi_spec_hash: 4cb51b22f98dee1a90bc7add82d1d132
+config_hash: 930dac3aa861344867e4ac84f037b5df

@@ -892,7 +892,6 @@ from openai.types.realtime import (
     McpListToolsCompleted,
     McpListToolsFailed,
     McpListToolsInProgress,
-    Models,
     NoiseReductionType,
     OutputAudioBufferClearEvent,
     RateLimitsUpdatedEvent,
@@ -909,6 +908,7 @@ from openai.types.realtime import (
     RealtimeConversationItemUserMessage,
     RealtimeError,
     RealtimeErrorEvent,
+    RealtimeFunctionTool,
     RealtimeMcpApprovalRequest,
     RealtimeMcpApprovalResponse,
     RealtimeMcpListTools,
@@ -961,7 +961,6 @@ from openai.types.realtime import (
     SessionCreatedEvent,
     SessionUpdateEvent,
     SessionUpdatedEvent,
-    TranscriptionSessionCreated,
     TranscriptionSessionUpdate,
     TranscriptionSessionUpdatedEvent,
 )
@@ -975,9 +974,7 @@ Types:
 from openai.types.realtime import (
     RealtimeSessionClientSecret,
     RealtimeSessionCreateResponse,
-    RealtimeTranscriptionSessionClientSecret,
     RealtimeTranscriptionSessionCreateResponse,
-    RealtimeTranscriptionSessionInputAudioTranscription,
     RealtimeTranscriptionSessionTurnDetection,
     ClientSecretCreateResponse,
 )

@@ -1,5 +1,13 @@
 # Changelog
 
+## 1.107.1 (2025-09-10)
+
+Full Changelog: [v1.107.0...v1.107.1](https://github.com/openai/openai-python/compare/v1.107.0...v1.107.1)
+
+### Chores
+
+* **api:** fix realtime GA types ([570fc5a](https://github.com/openai/openai-python/commit/570fc5a28ada665fd658b24675361680cfeb086f))
+
 ## 1.107.0 (2025-09-08)
 
 Full Changelog: [v1.106.1...v1.107.0](https://github.com/openai/openai-python/compare/v1.106.1...v1.107.0)

@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.107.0"
+version = "1.107.1"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"

Commit 847ff0b8

Commit `847ff0b8`