Commit 4756247c
Changed files (21)
src
openai
resources
responses
types
realtime
responses
tests
api_resources
realtime
src/openai/resources/responses/responses.py
@@ -288,10 +288,10 @@ class Responses(SyncAPIResource):
truncation: The truncation strategy to use for the model response.
- - `auto`: If the context of this response and previous ones exceeds the model's
- context window size, the model will truncate the response to fit the context
- window by dropping input items in the middle of the conversation.
- - `disabled` (default): If a model response will exceed the context window size
+ - `auto`: If the input to this Response exceeds the model's context window size,
+ the model will truncate the response to fit the context window by dropping
+ items from the beginning of the conversation.
+ - `disabled` (default): If the input size will exceed the context window size
for a model, the request will fail with a 400 error.
user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
@@ -527,10 +527,10 @@ class Responses(SyncAPIResource):
truncation: The truncation strategy to use for the model response.
- - `auto`: If the context of this response and previous ones exceeds the model's
- context window size, the model will truncate the response to fit the context
- window by dropping input items in the middle of the conversation.
- - `disabled` (default): If a model response will exceed the context window size
+ - `auto`: If the input to this Response exceeds the model's context window size,
+ the model will truncate the response to fit the context window by dropping
+ items from the beginning of the conversation.
+ - `disabled` (default): If the input size will exceed the context window size
for a model, the request will fail with a 400 error.
user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
@@ -766,10 +766,10 @@ class Responses(SyncAPIResource):
truncation: The truncation strategy to use for the model response.
- - `auto`: If the context of this response and previous ones exceeds the model's
- context window size, the model will truncate the response to fit the context
- window by dropping input items in the middle of the conversation.
- - `disabled` (default): If a model response will exceed the context window size
+ - `auto`: If the input to this Response exceeds the model's context window size,
+ the model will truncate the response to fit the context window by dropping
+ items from the beginning of the conversation.
+ - `disabled` (default): If the input size will exceed the context window size
for a model, the request will fail with a 400 error.
user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
@@ -1719,10 +1719,10 @@ class AsyncResponses(AsyncAPIResource):
truncation: The truncation strategy to use for the model response.
- - `auto`: If the context of this response and previous ones exceeds the model's
- context window size, the model will truncate the response to fit the context
- window by dropping input items in the middle of the conversation.
- - `disabled` (default): If a model response will exceed the context window size
+ - `auto`: If the input to this Response exceeds the model's context window size,
+ the model will truncate the response to fit the context window by dropping
+ items from the beginning of the conversation.
+ - `disabled` (default): If the input size will exceed the context window size
for a model, the request will fail with a 400 error.
user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
@@ -1958,10 +1958,10 @@ class AsyncResponses(AsyncAPIResource):
truncation: The truncation strategy to use for the model response.
- - `auto`: If the context of this response and previous ones exceeds the model's
- context window size, the model will truncate the response to fit the context
- window by dropping input items in the middle of the conversation.
- - `disabled` (default): If a model response will exceed the context window size
+ - `auto`: If the input to this Response exceeds the model's context window size,
+ the model will truncate the response to fit the context window by dropping
+ items from the beginning of the conversation.
+ - `disabled` (default): If the input size will exceed the context window size
for a model, the request will fail with a 400 error.
user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
@@ -2197,10 +2197,10 @@ class AsyncResponses(AsyncAPIResource):
truncation: The truncation strategy to use for the model response.
- - `auto`: If the context of this response and previous ones exceeds the model's
- context window size, the model will truncate the response to fit the context
- window by dropping input items in the middle of the conversation.
- - `disabled` (default): If a model response will exceed the context window size
+ - `auto`: If the input to this Response exceeds the model's context window size,
+ the model will truncate the response to fit the context window by dropping
+ items from the beginning of the conversation.
+ - `disabled` (default): If the input size will exceed the context window size
for a model, the request will fail with a 400 error.
user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
src/openai/types/realtime/input_audio_buffer_timeout_triggered.py
@@ -9,10 +9,16 @@ __all__ = ["InputAudioBufferTimeoutTriggered"]
class InputAudioBufferTimeoutTriggered(BaseModel):
audio_end_ms: int
- """Millisecond offset where speech ended within the buffered audio."""
+ """
+ Millisecond offset of audio written to the input audio buffer at the time the
+ timeout was triggered.
+ """
audio_start_ms: int
- """Millisecond offset where speech started within the buffered audio."""
+ """
+ Millisecond offset of audio written to the input audio buffer that was after the
+ playback time of the last model response.
+ """
event_id: str
"""The unique ID of the server event."""
src/openai/types/realtime/realtime_audio_config_input.py
@@ -49,8 +49,11 @@ class RealtimeAudioConfigInput(BaseModel):
"""Configuration for turn detection, ether Server VAD or Semantic VAD.
This can be set to `null` to turn off, in which case the client must manually
- trigger model response. Server VAD means that the model will detect the start
- and end of speech based on audio volume and respond at the end of user speech.
+ trigger model response.
+
+ Server VAD means that the model will detect the start and end of speech based on
+ audio volume and respond at the end of user speech.
+
Semantic VAD is more advanced and uses a turn detection model (in conjunction
with VAD) to semantically estimate whether the user has finished speaking, then
dynamically sets a timeout based on this probability. For example, if user audio
src/openai/types/realtime/realtime_audio_config_input_param.py
@@ -2,6 +2,7 @@
from __future__ import annotations
+from typing import Optional
from typing_extensions import TypedDict
from .noise_reduction_type import NoiseReductionType
@@ -46,12 +47,15 @@ class RealtimeAudioConfigInputParam(TypedDict, total=False):
transcription, these offer additional guidance to the transcription service.
"""
- turn_detection: RealtimeAudioInputTurnDetectionParam
+ turn_detection: Optional[RealtimeAudioInputTurnDetectionParam]
"""Configuration for turn detection, ether Server VAD or Semantic VAD.
This can be set to `null` to turn off, in which case the client must manually
- trigger model response. Server VAD means that the model will detect the start
- and end of speech based on audio volume and respond at the end of user speech.
+ trigger model response.
+
+ Server VAD means that the model will detect the start and end of speech based on
+ audio volume and respond at the end of user speech.
+
Semantic VAD is more advanced and uses a turn detection model (in conjunction
with VAD) to semantically estimate whether the user has finished speaking, then
dynamically sets a timeout based on this probability. For example, if user audio
src/openai/types/realtime/realtime_audio_input_turn_detection.py
@@ -1,33 +1,38 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import Optional
-from typing_extensions import Literal
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+from ..._utils import PropertyInfo
from ..._models import BaseModel
-__all__ = ["RealtimeAudioInputTurnDetection"]
+__all__ = ["RealtimeAudioInputTurnDetection", "ServerVad", "SemanticVad"]
-class RealtimeAudioInputTurnDetection(BaseModel):
+class ServerVad(BaseModel):
+ type: Literal["server_vad"]
+ """Type of turn detection, `server_vad` to turn on simple Server VAD."""
+
create_response: Optional[bool] = None
"""
Whether or not to automatically generate a response when a VAD stop event
occurs.
"""
- eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
- """Used only for `semantic_vad` mode.
+ idle_timeout_ms: Optional[int] = None
+ """Optional timeout after which a model response will be triggered automatically.
- The eagerness of the model to respond. `low` will wait longer for the user to
- continue speaking, `high` will respond more quickly. `auto` is the default and
- is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
- 4s, and 2s respectively.
- """
+ This is useful for situations in which a long pause from the user is unexpected,
+ such as a phone call. The model will effectively prompt the user to continue the
+ conversation based on the current context.
- idle_timeout_ms: Optional[int] = None
- """
- Optional idle timeout after which turn detection will auto-timeout when no
- additional audio is received and emits a `timeout_triggered` event.
+ The timeout value will be applied after the last model response's audio has
+ finished playing, i.e. it's set to the `response.done` time plus audio playback
+ duration.
+
+ An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+ Response) will be emitted when the timeout is reached. Idle timeout is currently
+ only supported for `server_vad` mode.
"""
interrupt_response: Optional[bool] = None
@@ -60,5 +65,34 @@ class RealtimeAudioInputTurnDetection(BaseModel):
perform better in noisy environments.
"""
- type: Optional[Literal["server_vad", "semantic_vad"]] = None
- """Type of turn detection."""
+
+class SemanticVad(BaseModel):
+ type: Literal["semantic_vad"]
+ """Type of turn detection, `semantic_vad` to turn on Semantic VAD."""
+
+ create_response: Optional[bool] = None
+ """
+ Whether or not to automatically generate a response when a VAD stop event
+ occurs.
+ """
+
+ eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
+ """Used only for `semantic_vad` mode.
+
+ The eagerness of the model to respond. `low` will wait longer for the user to
+ continue speaking, `high` will respond more quickly. `auto` is the default and
+ is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
+ 4s, and 2s respectively.
+ """
+
+ interrupt_response: Optional[bool] = None
+ """
+ Whether or not to automatically interrupt any ongoing response with output to
+ the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+ occurs.
+ """
+
+
+RealtimeAudioInputTurnDetection: TypeAlias = Annotated[
+ Union[ServerVad, SemanticVad, None], PropertyInfo(discriminator="type")
+]
src/openai/types/realtime/realtime_audio_input_turn_detection_param.py
@@ -2,32 +2,36 @@
from __future__ import annotations
-from typing import Optional
-from typing_extensions import Literal, TypedDict
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
-__all__ = ["RealtimeAudioInputTurnDetectionParam"]
+__all__ = ["RealtimeAudioInputTurnDetectionParam", "ServerVad", "SemanticVad"]
-class RealtimeAudioInputTurnDetectionParam(TypedDict, total=False):
+class ServerVad(TypedDict, total=False):
+ type: Required[Literal["server_vad"]]
+ """Type of turn detection, `server_vad` to turn on simple Server VAD."""
+
create_response: bool
"""
Whether or not to automatically generate a response when a VAD stop event
occurs.
"""
- eagerness: Literal["low", "medium", "high", "auto"]
- """Used only for `semantic_vad` mode.
+ idle_timeout_ms: Optional[int]
+ """Optional timeout after which a model response will be triggered automatically.
- The eagerness of the model to respond. `low` will wait longer for the user to
- continue speaking, `high` will respond more quickly. `auto` is the default and
- is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
- 4s, and 2s respectively.
- """
+ This is useful for situations in which a long pause from the user is unexpected,
+ such as a phone call. The model will effectively prompt the user to continue the
+ conversation based on the current context.
- idle_timeout_ms: Optional[int]
- """
- Optional idle timeout after which turn detection will auto-timeout when no
- additional audio is received and emits a `timeout_triggered` event.
+ The timeout value will be applied after the last model response's audio has
+ finished playing, i.e. it's set to the `response.done` time plus audio playback
+ duration.
+
+ An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+ Response) will be emitted when the timeout is reached. Idle timeout is currently
+ only supported for `server_vad` mode.
"""
interrupt_response: bool
@@ -60,5 +64,32 @@ class RealtimeAudioInputTurnDetectionParam(TypedDict, total=False):
perform better in noisy environments.
"""
- type: Literal["server_vad", "semantic_vad"]
- """Type of turn detection."""
+
+class SemanticVad(TypedDict, total=False):
+ type: Required[Literal["semantic_vad"]]
+ """Type of turn detection, `semantic_vad` to turn on Semantic VAD."""
+
+ create_response: bool
+ """
+ Whether or not to automatically generate a response when a VAD stop event
+ occurs.
+ """
+
+ eagerness: Literal["low", "medium", "high", "auto"]
+ """Used only for `semantic_vad` mode.
+
+ The eagerness of the model to respond. `low` will wait longer for the user to
+ continue speaking, `high` will respond more quickly. `auto` is the default and
+ is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
+ 4s, and 2s respectively.
+ """
+
+ interrupt_response: bool
+ """
+ Whether or not to automatically interrupt any ongoing response with output to
+ the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+ occurs.
+ """
+
+
+RealtimeAudioInputTurnDetectionParam: TypeAlias = Union[ServerVad, SemanticVad]
src/openai/types/realtime/realtime_session_create_response.py
@@ -1,8 +1,9 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, TypeAlias
+from typing_extensions import Literal, Annotated, TypeAlias
+from ..._utils import PropertyInfo
from ..._models import BaseModel
from .audio_transcription import AudioTranscription
from .realtime_truncation import RealtimeTruncation
@@ -21,6 +22,8 @@ __all__ = [
"AudioInput",
"AudioInputNoiseReduction",
"AudioInputTurnDetection",
+ "AudioInputTurnDetectionServerVad",
+ "AudioInputTurnDetectionSemanticVad",
"AudioOutput",
"ToolChoice",
"Tool",
@@ -45,26 +48,30 @@ class AudioInputNoiseReduction(BaseModel):
"""
-class AudioInputTurnDetection(BaseModel):
+class AudioInputTurnDetectionServerVad(BaseModel):
+ type: Literal["server_vad"]
+ """Type of turn detection, `server_vad` to turn on simple Server VAD."""
+
create_response: Optional[bool] = None
"""
Whether or not to automatically generate a response when a VAD stop event
occurs.
"""
- eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
- """Used only for `semantic_vad` mode.
+ idle_timeout_ms: Optional[int] = None
+ """Optional timeout after which a model response will be triggered automatically.
- The eagerness of the model to respond. `low` will wait longer for the user to
- continue speaking, `high` will respond more quickly. `auto` is the default and
- is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
- 4s, and 2s respectively.
- """
+ This is useful for situations in which a long pause from the user is unexpected,
+ such as a phone call. The model will effectively prompt the user to continue the
+ conversation based on the current context.
- idle_timeout_ms: Optional[int] = None
- """
- Optional idle timeout after which turn detection will auto-timeout when no
- additional audio is received and emits a `timeout_triggered` event.
+ The timeout value will be applied after the last model response's audio has
+ finished playing, i.e. it's set to the `response.done` time plus audio playback
+ duration.
+
+ An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+ Response) will be emitted when the timeout is reached. Idle timeout is currently
+ only supported for `server_vad` mode.
"""
interrupt_response: Optional[bool] = None
@@ -97,8 +104,38 @@ class AudioInputTurnDetection(BaseModel):
perform better in noisy environments.
"""
- type: Optional[Literal["server_vad", "semantic_vad"]] = None
- """Type of turn detection."""
+
+class AudioInputTurnDetectionSemanticVad(BaseModel):
+ type: Literal["semantic_vad"]
+ """Type of turn detection, `semantic_vad` to turn on Semantic VAD."""
+
+ create_response: Optional[bool] = None
+ """
+ Whether or not to automatically generate a response when a VAD stop event
+ occurs.
+ """
+
+ eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
+ """Used only for `semantic_vad` mode.
+
+ The eagerness of the model to respond. `low` will wait longer for the user to
+ continue speaking, `high` will respond more quickly. `auto` is the default and
+ is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
+ 4s, and 2s respectively.
+ """
+
+ interrupt_response: Optional[bool] = None
+ """
+ Whether or not to automatically interrupt any ongoing response with output to
+ the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+ occurs.
+ """
+
+
+AudioInputTurnDetection: TypeAlias = Annotated[
+ Union[AudioInputTurnDetectionServerVad, AudioInputTurnDetectionSemanticVad, None],
+ PropertyInfo(discriminator="type"),
+]
class AudioInput(BaseModel):
@@ -130,8 +167,11 @@ class AudioInput(BaseModel):
"""Configuration for turn detection, ether Server VAD or Semantic VAD.
This can be set to `null` to turn off, in which case the client must manually
- trigger model response. Server VAD means that the model will detect the start
- and end of speech based on audio volume and respond at the end of user speech.
+ trigger model response.
+
+ Server VAD means that the model will detect the start and end of speech based on
+ audio volume and respond at the end of user speech.
+
Semantic VAD is more advanced and uses a turn detection model (in conjunction
with VAD) to semantically estimate whether the user has finished speaking, then
dynamically sets a timeout based on this probability. For example, if user audio
src/openai/types/realtime/realtime_transcription_session_audio_input.py
@@ -51,8 +51,11 @@ class RealtimeTranscriptionSessionAudioInput(BaseModel):
"""Configuration for turn detection, ether Server VAD or Semantic VAD.
This can be set to `null` to turn off, in which case the client must manually
- trigger model response. Server VAD means that the model will detect the start
- and end of speech based on audio volume and respond at the end of user speech.
+ trigger model response.
+
+ Server VAD means that the model will detect the start and end of speech based on
+ audio volume and respond at the end of user speech.
+
Semantic VAD is more advanced and uses a turn detection model (in conjunction
with VAD) to semantically estimate whether the user has finished speaking, then
dynamically sets a timeout based on this probability. For example, if user audio
src/openai/types/realtime/realtime_transcription_session_audio_input_param.py
@@ -2,6 +2,7 @@
from __future__ import annotations
+from typing import Optional
from typing_extensions import TypedDict
from .noise_reduction_type import NoiseReductionType
@@ -48,12 +49,15 @@ class RealtimeTranscriptionSessionAudioInputParam(TypedDict, total=False):
transcription, these offer additional guidance to the transcription service.
"""
- turn_detection: RealtimeTranscriptionSessionAudioInputTurnDetectionParam
+ turn_detection: Optional[RealtimeTranscriptionSessionAudioInputTurnDetectionParam]
"""Configuration for turn detection, ether Server VAD or Semantic VAD.
This can be set to `null` to turn off, in which case the client must manually
- trigger model response. Server VAD means that the model will detect the start
- and end of speech based on audio volume and respond at the end of user speech.
+ trigger model response.
+
+ Server VAD means that the model will detect the start and end of speech based on
+ audio volume and respond at the end of user speech.
+
Semantic VAD is more advanced and uses a turn detection model (in conjunction
with VAD) to semantically estimate whether the user has finished speaking, then
dynamically sets a timeout based on this probability. For example, if user audio
src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py
@@ -1,32 +1,38 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import Optional
-from typing_extensions import Literal
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+from ..._utils import PropertyInfo
from ..._models import BaseModel
-__all__ = ["RealtimeTranscriptionSessionAudioInputTurnDetection"]
+__all__ = ["RealtimeTranscriptionSessionAudioInputTurnDetection", "ServerVad", "SemanticVad"]
-class RealtimeTranscriptionSessionAudioInputTurnDetection(BaseModel):
+class ServerVad(BaseModel):
+ type: Literal["server_vad"]
+ """Type of turn detection, `server_vad` to turn on simple Server VAD."""
+
create_response: Optional[bool] = None
"""
Whether or not to automatically generate a response when a VAD stop event
occurs.
"""
- eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
- """Used only for `semantic_vad` mode.
+ idle_timeout_ms: Optional[int] = None
+ """Optional timeout after which a model response will be triggered automatically.
- The eagerness of the model to respond. `low` will wait longer for the user to
- continue speaking, `high` will respond more quickly. `auto` is the default and
- is equivalent to `medium`.
- """
+ This is useful for situations in which a long pause from the user is unexpected,
+ such as a phone call. The model will effectively prompt the user to continue the
+ conversation based on the current context.
- idle_timeout_ms: Optional[int] = None
- """
- Optional idle timeout after which turn detection will auto-timeout when no
- additional audio is received.
+ The timeout value will be applied after the last model response's audio has
+ finished playing, i.e. it's set to the `response.done` time plus audio playback
+ duration.
+
+ An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+ Response) will be emitted when the timeout is reached. Idle timeout is currently
+ only supported for `server_vad` mode.
"""
interrupt_response: Optional[bool] = None
@@ -59,5 +65,34 @@ class RealtimeTranscriptionSessionAudioInputTurnDetection(BaseModel):
perform better in noisy environments.
"""
- type: Optional[Literal["server_vad", "semantic_vad"]] = None
- """Type of turn detection."""
+
+class SemanticVad(BaseModel):
+ type: Literal["semantic_vad"]
+ """Type of turn detection, `semantic_vad` to turn on Semantic VAD."""
+
+ create_response: Optional[bool] = None
+ """
+ Whether or not to automatically generate a response when a VAD stop event
+ occurs.
+ """
+
+ eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
+ """Used only for `semantic_vad` mode.
+
+ The eagerness of the model to respond. `low` will wait longer for the user to
+ continue speaking, `high` will respond more quickly. `auto` is the default and
+ is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
+ 4s, and 2s respectively.
+ """
+
+ interrupt_response: Optional[bool] = None
+ """
+ Whether or not to automatically interrupt any ongoing response with output to
+ the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+ occurs.
+ """
+
+
+RealtimeTranscriptionSessionAudioInputTurnDetection: TypeAlias = Annotated[
+ Union[ServerVad, SemanticVad, None], PropertyInfo(discriminator="type")
+]
src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py
@@ -2,31 +2,36 @@
from __future__ import annotations
-from typing import Optional
-from typing_extensions import Literal, TypedDict
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
-__all__ = ["RealtimeTranscriptionSessionAudioInputTurnDetectionParam"]
+__all__ = ["RealtimeTranscriptionSessionAudioInputTurnDetectionParam", "ServerVad", "SemanticVad"]
-class RealtimeTranscriptionSessionAudioInputTurnDetectionParam(TypedDict, total=False):
+class ServerVad(TypedDict, total=False):
+ type: Required[Literal["server_vad"]]
+ """Type of turn detection, `server_vad` to turn on simple Server VAD."""
+
create_response: bool
"""
Whether or not to automatically generate a response when a VAD stop event
occurs.
"""
- eagerness: Literal["low", "medium", "high", "auto"]
- """Used only for `semantic_vad` mode.
+ idle_timeout_ms: Optional[int]
+ """Optional timeout after which a model response will be triggered automatically.
- The eagerness of the model to respond. `low` will wait longer for the user to
- continue speaking, `high` will respond more quickly. `auto` is the default and
- is equivalent to `medium`.
- """
+ This is useful for situations in which a long pause from the user is unexpected,
+ such as a phone call. The model will effectively prompt the user to continue the
+ conversation based on the current context.
- idle_timeout_ms: Optional[int]
- """
- Optional idle timeout after which turn detection will auto-timeout when no
- additional audio is received.
+ The timeout value will be applied after the last model response's audio has
+ finished playing, i.e. it's set to the `response.done` time plus audio playback
+ duration.
+
+ An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+ Response) will be emitted when the timeout is reached. Idle timeout is currently
+ only supported for `server_vad` mode.
"""
interrupt_response: bool
@@ -59,5 +64,32 @@ class RealtimeTranscriptionSessionAudioInputTurnDetectionParam(TypedDict, total=
perform better in noisy environments.
"""
- type: Literal["server_vad", "semantic_vad"]
- """Type of turn detection."""
+
+class SemanticVad(TypedDict, total=False):
+ type: Required[Literal["semantic_vad"]]
+ """Type of turn detection, `semantic_vad` to turn on Semantic VAD."""
+
+ create_response: bool
+ """
+ Whether or not to automatically generate a response when a VAD stop event
+ occurs.
+ """
+
+ eagerness: Literal["low", "medium", "high", "auto"]
+ """Used only for `semantic_vad` mode.
+
+ The eagerness of the model to respond. `low` will wait longer for the user to
+ continue speaking, `high` will respond more quickly. `auto` is the default and
+ is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
+ 4s, and 2s respectively.
+ """
+
+ interrupt_response: bool
+ """
+ Whether or not to automatically interrupt any ongoing response with output to
+ the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+ occurs.
+ """
+
+
+RealtimeTranscriptionSessionAudioInputTurnDetectionParam: TypeAlias = Union[ServerVad, SemanticVad]
src/openai/types/responses/response.py
@@ -252,10 +252,10 @@ class Response(BaseModel):
truncation: Optional[Literal["auto", "disabled"]] = None
"""The truncation strategy to use for the model response.
- - `auto`: If the context of this response and previous ones exceeds the model's
- context window size, the model will truncate the response to fit the context
- window by dropping input items in the middle of the conversation.
- - `disabled` (default): If a model response will exceed the context window size
+ - `auto`: If the input to this Response exceeds the model's context window size,
+ the model will truncate the response to fit the context window by dropping
+ items from the beginning of the conversation.
+ - `disabled` (default): If the input size will exceed the context window size
for a model, the request will fail with a 400 error.
"""
src/openai/types/responses/response_create_params.py
@@ -252,10 +252,10 @@ class ResponseCreateParamsBase(TypedDict, total=False):
truncation: Optional[Literal["auto", "disabled"]]
"""The truncation strategy to use for the model response.
- - `auto`: If the context of this response and previous ones exceeds the model's
- context window size, the model will truncate the response to fit the context
- window by dropping input items in the middle of the conversation.
- - `disabled` (default): If a model response will exceed the context window size
+ - `auto`: If the input to this Response exceeds the model's context window size,
+ the model will truncate the response to fit the context window by dropping
+ items from the beginning of the conversation.
+ - `disabled` (default): If the input size will exceed the context window size
for a model, the request will fail with a 400 error.
"""
src/openai/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "openai"
-__version__ = "1.107.1" # x-release-please-version
+__version__ = "1.107.2" # x-release-please-version
tests/api_resources/realtime/test_client_secrets.py
@@ -44,14 +44,13 @@ class TestClientSecrets:
"prompt": "prompt",
},
"turn_detection": {
+ "type": "server_vad",
"create_response": True,
- "eagerness": "low",
- "idle_timeout_ms": 0,
+ "idle_timeout_ms": 5000,
"interrupt_response": True,
"prefix_padding_ms": 0,
"silence_duration_ms": 0,
"threshold": 0,
- "type": "server_vad",
},
},
"output": {
@@ -141,14 +140,13 @@ class TestAsyncClientSecrets:
"prompt": "prompt",
},
"turn_detection": {
+ "type": "server_vad",
"create_response": True,
- "eagerness": "low",
- "idle_timeout_ms": 0,
+ "idle_timeout_ms": 5000,
"interrupt_response": True,
"prefix_padding_ms": 0,
"silence_duration_ms": 0,
"threshold": 0,
- "type": "server_vad",
},
},
"output": {
tests/test_client.py
@@ -6,13 +6,10 @@ import gc
import os
import sys
import json
-import time
import asyncio
import inspect
-import subprocess
import tracemalloc
from typing import Any, Union, Protocol, cast
-from textwrap import dedent
from unittest import mock
from typing_extensions import Literal
@@ -23,6 +20,7 @@ from pydantic import ValidationError
from openai import OpenAI, AsyncOpenAI, APIResponseValidationError
from openai._types import Omit
+from openai._utils import asyncify
from openai._models import BaseModel, FinalRequestOptions
from openai._streaming import Stream, AsyncStream
from openai._exceptions import OpenAIError, APIStatusError, APITimeoutError, APIResponseValidationError
@@ -30,8 +28,10 @@ from openai._base_client import (
DEFAULT_TIMEOUT,
HTTPX_DEFAULT_TIMEOUT,
BaseClient,
+ OtherPlatform,
DefaultHttpxClient,
DefaultAsyncHttpxClient,
+ get_platform,
make_request_options,
)
@@ -1857,50 +1857,9 @@ class TestAsyncOpenAI:
assert response.retries_taken == failures_before_success
assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
- def test_get_platform(self) -> None:
- # A previous implementation of asyncify could leave threads unterminated when
- # used with nest_asyncio.
- #
- # Since nest_asyncio.apply() is global and cannot be un-applied, this
- # test is run in a separate process to avoid affecting other tests.
- test_code = dedent("""
- import asyncio
- import nest_asyncio
- import threading
-
- from openai._utils import asyncify
- from openai._base_client import get_platform
-
- async def test_main() -> None:
- result = await asyncify(get_platform)()
- print(result)
- for thread in threading.enumerate():
- print(thread.name)
-
- nest_asyncio.apply()
- asyncio.run(test_main())
- """)
- with subprocess.Popen(
- [sys.executable, "-c", test_code],
- text=True,
- ) as process:
- timeout = 10 # seconds
-
- start_time = time.monotonic()
- while True:
- return_code = process.poll()
- if return_code is not None:
- if return_code != 0:
- raise AssertionError("calling get_platform using asyncify resulted in a non-zero exit code")
-
- # success
- break
-
- if time.monotonic() - start_time > timeout:
- process.kill()
- raise AssertionError("calling get_platform using asyncify resulted in a hung process")
-
- time.sleep(0.1)
+ async def test_get_platform(self) -> None:
+ platform = await asyncify(get_platform)()
+ assert isinstance(platform, (str, OtherPlatform))
async def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch) -> None:
# Test that the proxy environment variables are set correctly
.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "1.107.1"
+ ".": "1.107.2"
}
\ No newline at end of file
.stats.yml
@@ -1,4 +1,4 @@
configured_endpoints: 118
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-16cb18bed32bae8c5840fb39a1bf664026cc40463ad0c487dcb0df1bd3d72db0.yml
-openapi_spec_hash: 4cb51b22f98dee1a90bc7add82d1d132
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-94b1e3cb0bdc616ff0c2f267c33dadd95f133b1f64e647aab6c64afb292b2793.yml
+openapi_spec_hash: 2395319ac9befd59b6536ae7f9564a05
config_hash: 930dac3aa861344867e4ac84f037b5df
CHANGELOG.md
@@ -1,5 +1,14 @@
# Changelog
+## 1.107.2 (2025-09-12)
+
+Full Changelog: [v1.107.1...v1.107.2](https://github.com/openai/openai-python/compare/v1.107.1...v1.107.2)
+
+### Chores
+
+* **api:** Minor docs and type updates for realtime ([ab6a10d](https://github.com/openai/openai-python/commit/ab6a10da4ed7e6386695b6f5f29149d4870f85c9))
+* **tests:** simplify `get_platform` test ([01f03e0](https://github.com/openai/openai-python/commit/01f03e0ad1f9ab3f2ed8b7c13d652263c6d06378))
+
## 1.107.1 (2025-09-10)
Full Changelog: [v1.107.0...v1.107.1](https://github.com/openai/openai-python/compare/v1.107.0...v1.107.1)
pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "openai"
-version = "1.107.1"
+version = "1.107.2"
description = "The official Python library for the openai API"
dynamic = ["readme"]
license = "Apache-2.0"
requirements-dev.lock
@@ -70,7 +70,7 @@ filelock==3.12.4
frozenlist==1.7.0
# via aiohttp
# via aiosignal
-griffe==1.14.0
+griffe==1.13.0
h11==0.16.0
# via httpcore
httpcore==1.0.9
@@ -108,7 +108,6 @@ multidict==6.5.0
mypy==1.14.1
mypy-extensions==1.0.0
# via mypy
-nest-asyncio==1.6.0
nodeenv==1.8.0
# via pyright
nox==2023.4.22