Commit `28d60d9e`

stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>

2025-05-16 05:34:07

feat(api): responses x eval api

1 parent 1701df1

@@ -57,6 +57,7 @@ class Transcriptions(SyncAPIResource):
         *,
         file: FileTypes,
         model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
@@ -118,6 +119,7 @@ class Transcriptions(SyncAPIResource):
         file: FileTypes,
         model: Union[str, AudioModel],
         stream: Literal[True],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -152,6 +154,11 @@ class Transcriptions(SyncAPIResource):
 
               Note: Streaming is not supported for the `whisper-1` model and will be ignored.
 
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
           include: Additional information to include in the transcription response. `logprobs` will
               return the log probabilities of the tokens in the response to understand the
               model's confidence in the transcription. `logprobs` only works with
@@ -200,6 +207,7 @@ class Transcriptions(SyncAPIResource):
         file: FileTypes,
         model: Union[str, AudioModel],
         stream: bool,
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -234,6 +242,11 @@ class Transcriptions(SyncAPIResource):
 
               Note: Streaming is not supported for the `whisper-1` model and will be ignored.
 
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
           include: Additional information to include in the transcription response. `logprobs` will
               return the log probabilities of the tokens in the response to understand the
               model's confidence in the transcription. `logprobs` only works with
@@ -281,6 +294,7 @@ class Transcriptions(SyncAPIResource):
         *,
         file: FileTypes,
         model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -299,6 +313,7 @@ class Transcriptions(SyncAPIResource):
             {
                 "file": file,
                 "model": model,
+                "chunking_strategy": chunking_strategy,
                 "include": include,
                 "language": language,
                 "prompt": prompt,
@@ -357,6 +372,8 @@ class AsyncTranscriptions(AsyncAPIResource):
         *,
         file: FileTypes,
         model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -369,7 +386,68 @@ class AsyncTranscriptions(AsyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Transcription: ...
+    ) -> TranscriptionCreateResponse:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+              Whisper V2 model).
+
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+              the only supported format is `json`.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+          """
 
     @overload
     async def create(
@@ -418,6 +496,7 @@ class AsyncTranscriptions(AsyncAPIResource):
         file: FileTypes,
         model: Union[str, AudioModel],
         stream: Literal[True],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -452,6 +531,11 @@ class AsyncTranscriptions(AsyncAPIResource):
 
               Note: Streaming is not supported for the `whisper-1` model and will be ignored.
 
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
           include: Additional information to include in the transcription response. `logprobs` will
               return the log probabilities of the tokens in the response to understand the
               model's confidence in the transcription. `logprobs` only works with
@@ -500,6 +584,7 @@ class AsyncTranscriptions(AsyncAPIResource):
         file: FileTypes,
         model: Union[str, AudioModel],
         stream: bool,
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -534,6 +619,11 @@ class AsyncTranscriptions(AsyncAPIResource):
 
               Note: Streaming is not supported for the `whisper-1` model and will be ignored.
 
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
           include: Additional information to include in the transcription response. `logprobs` will
               return the log probabilities of the tokens in the response to understand the
               model's confidence in the transcription. `logprobs` only works with
@@ -581,6 +671,7 @@ class AsyncTranscriptions(AsyncAPIResource):
         *,
         file: FileTypes,
         model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -599,6 +690,7 @@ class AsyncTranscriptions(AsyncAPIResource):
             {
                 "file": file,
                 "model": model,
+                "chunking_strategy": chunking_strategy,
                 "include": include,
                 "language": language,
                 "prompt": prompt,

@@ -66,11 +66,12 @@ class Embeddings(SyncAPIResource):
           input: Input text to embed, encoded as a string or array of tokens. To embed multiple
               inputs in a single request, pass an array of strings or array of token arrays.
               The input must not exceed the max input tokens for the model (8192 tokens for
-              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              all embedding models), cannot be an empty string, and any array must be 2048
               dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens. Some models may also impose a limit on total number of
-              tokens summed across inputs.
+              for counting tokens. In addition to the per-input token limit, all embedding
+              models enforce a maximum of 300,000 tokens summed across all inputs in a single
+              request.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
@@ -181,11 +182,12 @@ class AsyncEmbeddings(AsyncAPIResource):
           input: Input text to embed, encoded as a string or array of tokens. To embed multiple
               inputs in a single request, pass an array of strings or array of token arrays.
               The input must not exceed the max input tokens for the model (8192 tokens for
-              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              all embedding models), cannot be an empty string, and any array must be 2048
               dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens. Some models may also impose a limit on total number of
-              tokens summed across inputs.
+              for counting tokens. In addition to the per-input token limit, all embedding
+              models enforce a maximum of 300,000 tokens summed across all inputs in a single
+              request.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to

@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from typing import List, Union, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from ..._types import FileTypes
 from ..audio_model import AudioModel
@@ -12,6 +12,8 @@ from ..audio_response_format import AudioResponseFormat
 
 __all__ = [
     "TranscriptionCreateParamsBase",
+    "ChunkingStrategy",
+    "ChunkingStrategyVadConfig",
     "TranscriptionCreateParamsNonStreaming",
     "TranscriptionCreateParamsStreaming",
 ]
@@ -31,6 +33,15 @@ class TranscriptionCreateParamsBase(TypedDict, total=False):
     (which is powered by our open source Whisper V2 model).
     """
 
+    chunking_strategy: Optional[ChunkingStrategy]
+    """Controls how the audio is cut into chunks.
+
+    When set to `"auto"`, the server first normalizes loudness and then uses voice
+    activity detection (VAD) to choose boundaries. `server_vad` object can be
+    provided to tweak VAD detection parameters manually. If unset, the audio is
+    transcribed as a single block.
+    """
+
     include: List[TranscriptionInclude]
     """Additional information to include in the transcription response.
 
@@ -82,6 +93,31 @@ class TranscriptionCreateParamsBase(TypedDict, total=False):
     """
 
 
+class ChunkingStrategyVadConfig(TypedDict, total=False):
+    type: Required[Literal["server_vad"]]
+    """Must be set to `server_vad` to enable manual chunking using server side VAD."""
+
+    prefix_padding_ms: int
+    """Amount of audio to include before the VAD detected speech (in milliseconds)."""
+
+    silence_duration_ms: int
+    """
+    Duration of silence to detect speech stop (in milliseconds). With shorter values
+    the model will respond more quickly, but may jump in on short pauses from the
+    user.
+    """
+
+    threshold: float
+    """Sensitivity threshold (0.0 to 1.0) for voice activity detection.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+
+ChunkingStrategy: TypeAlias = Union[Literal["auto"], ChunkingStrategyVadConfig]
+
+
 class TranscriptionCreateParamsNonStreaming(TranscriptionCreateParamsBase, total=False):
     stream: Optional[Literal[False]]
     """

@@ -11,12 +11,16 @@ from .run_create_response import RunCreateResponse as RunCreateResponse
 from .run_delete_response import RunDeleteResponse as RunDeleteResponse
 from .run_retrieve_response import RunRetrieveResponse as RunRetrieveResponse
 from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource as CreateEvalJSONLRunDataSource
+from .create_eval_responses_run_data_source import CreateEvalResponsesRunDataSource as CreateEvalResponsesRunDataSource
 from .create_eval_completions_run_data_source import (
     CreateEvalCompletionsRunDataSource as CreateEvalCompletionsRunDataSource,
 )
 from .create_eval_jsonl_run_data_source_param import (
     CreateEvalJSONLRunDataSourceParam as CreateEvalJSONLRunDataSourceParam,
 )
+from .create_eval_responses_run_data_source_param import (
+    CreateEvalResponsesRunDataSourceParam as CreateEvalResponsesRunDataSourceParam,
+)
 from .create_eval_completions_run_data_source_param import (
     CreateEvalCompletionsRunDataSourceParam as CreateEvalCompletionsRunDataSourceParam,
 )

@@ -0,0 +1,206 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+
+__all__ = [
+    "CreateEvalResponsesRunDataSource",
+    "Source",
+    "SourceFileContent",
+    "SourceFileContentContent",
+    "SourceFileID",
+    "SourceResponses",
+    "InputMessages",
+    "InputMessagesTemplate",
+    "InputMessagesTemplateTemplate",
+    "InputMessagesTemplateTemplateChatMessage",
+    "InputMessagesTemplateTemplateEvalItem",
+    "InputMessagesTemplateTemplateEvalItemContent",
+    "InputMessagesTemplateTemplateEvalItemContentOutputText",
+    "InputMessagesItemReference",
+    "SamplingParams",
+]
+
+
+class SourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class SourceFileContent(BaseModel):
+    content: List[SourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class SourceResponses(BaseModel):
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    created_after: Optional[int] = None
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int] = None
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    has_tool_calls: Optional[bool] = None
+    """Whether the response has tool calls.
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str] = None
+    """Optional string to search the 'instructions' field.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object] = None
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """Optional reasoning effort parameter.
+
+    This is a query parameter used to select responses.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    tools: Optional[List[str]] = None
+    """List of tool names. This is a query parameter used to select responses."""
+
+    top_p: Optional[float] = None
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]] = None
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+Source: TypeAlias = Annotated[
+    Union[SourceFileContent, SourceFileID, SourceResponses], PropertyInfo(discriminator="type")
+]
+
+
+class InputMessagesTemplateTemplateChatMessage(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class InputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+InputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str, ResponseInputText, InputMessagesTemplateTemplateEvalItemContentOutputText
+]
+
+
+class InputMessagesTemplateTemplateEvalItem(BaseModel):
+    content: InputMessagesTemplateTemplateEvalItemContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+InputMessagesTemplateTemplate: TypeAlias = Union[
+    InputMessagesTemplateTemplateChatMessage, InputMessagesTemplateTemplateEvalItem
+]
+
+
+class InputMessagesTemplate(BaseModel):
+    template: List[InputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the "item" namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class InputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the "item" namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+InputMessages: TypeAlias = Annotated[
+    Union[InputMessagesTemplate, InputMessagesItemReference], PropertyInfo(discriminator="type")
+]
+
+
+class SamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class CreateEvalResponsesRunDataSource(BaseModel):
+    source: Source
+    """A EvalResponsesSource object describing a run data source configuration."""
+
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    input_messages: Optional[InputMessages] = None
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[SamplingParams] = None

@@ -0,0 +1,202 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text_param import ResponseInputTextParam
+
+__all__ = [
+    "CreateEvalResponsesRunDataSourceParam",
+    "Source",
+    "SourceFileContent",
+    "SourceFileContentContent",
+    "SourceFileID",
+    "SourceResponses",
+    "InputMessages",
+    "InputMessagesTemplate",
+    "InputMessagesTemplateTemplate",
+    "InputMessagesTemplateTemplateChatMessage",
+    "InputMessagesTemplateTemplateEvalItem",
+    "InputMessagesTemplateTemplateEvalItemContent",
+    "InputMessagesTemplateTemplateEvalItemContentOutputText",
+    "InputMessagesItemReference",
+    "SamplingParams",
+]
+
+
+class SourceFileContentContent(TypedDict, total=False):
+    item: Required[Dict[str, object]]
+
+    sample: Dict[str, object]
+
+
+class SourceFileContent(TypedDict, total=False):
+    content: Required[Iterable[SourceFileContentContent]]
+    """The content of the jsonl file."""
+
+    type: Required[Literal["file_content"]]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(TypedDict, total=False):
+    id: Required[str]
+    """The identifier of the file."""
+
+    type: Required[Literal["file_id"]]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class SourceResponses(TypedDict, total=False):
+    type: Required[Literal["responses"]]
+    """The type of run data source. Always `responses`."""
+
+    created_after: Optional[int]
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int]
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    has_tool_calls: Optional[bool]
+    """Whether the response has tool calls.
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str]
+    """Optional string to search the 'instructions' field.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object]
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str]
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort]
+    """Optional reasoning effort parameter.
+
+    This is a query parameter used to select responses.
+    """
+
+    temperature: Optional[float]
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    tools: Optional[List[str]]
+    """List of tool names. This is a query parameter used to select responses."""
+
+    top_p: Optional[float]
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]]
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+Source: TypeAlias = Union[SourceFileContent, SourceFileID, SourceResponses]
+
+
+class InputMessagesTemplateTemplateChatMessage(TypedDict, total=False):
+    content: Required[str]
+    """The content of the message."""
+
+    role: Required[str]
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class InputMessagesTemplateTemplateEvalItemContentOutputText(TypedDict, total=False):
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+InputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str, ResponseInputTextParam, InputMessagesTemplateTemplateEvalItemContentOutputText
+]
+
+
+class InputMessagesTemplateTemplateEvalItem(TypedDict, total=False):
+    content: Required[InputMessagesTemplateTemplateEvalItemContent]
+    """Text inputs to the model - can contain template strings."""
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+InputMessagesTemplateTemplate: TypeAlias = Union[
+    InputMessagesTemplateTemplateChatMessage, InputMessagesTemplateTemplateEvalItem
+]
+
+
+class InputMessagesTemplate(TypedDict, total=False):
+    template: Required[Iterable[InputMessagesTemplateTemplate]]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the "item" namespace, ie {{item.name}}.
+    """
+
+    type: Required[Literal["template"]]
+    """The type of input messages. Always `template`."""
+
+
+class InputMessagesItemReference(TypedDict, total=False):
+    item_reference: Required[str]
+    """A reference to a variable in the "item" namespace. Ie, "item.name" """
+
+    type: Required[Literal["item_reference"]]
+    """The type of input messages. Always `item_reference`."""
+
+
+InputMessages: TypeAlias = Union[InputMessagesTemplate, InputMessagesItemReference]
+
+
+class SamplingParams(TypedDict, total=False):
+    max_completion_tokens: int
+    """The maximum number of tokens in the generated output."""
+
+    seed: int
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: float
+    """A higher temperature increases randomness in the outputs."""
+
+    top_p: float
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class CreateEvalResponsesRunDataSourceParam(TypedDict, total=False):
+    source: Required[Source]
+    """A EvalResponsesSource object describing a run data source configuration."""
+
+    type: Required[Literal["responses"]]
+    """The type of run data source. Always `responses`."""
+
+    input_messages: InputMessages
+
+    model: str
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: SamplingParams

@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Dict, List, Union, Optional
+from typing import List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from pydantic import Field as FieldInfo
@@ -9,224 +9,14 @@ from ..._utils import PropertyInfo
 from ..._models import BaseModel
 from .eval_api_error import EvalAPIError
 from ..shared.metadata import Metadata
-from ..shared.reasoning_effort import ReasoningEffort
-from ..responses.response_input_text import ResponseInputText
 from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from .create_eval_responses_run_data_source import CreateEvalResponsesRunDataSource
 from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
 
-__all__ = [
-    "RunCancelResponse",
-    "DataSource",
-    "DataSourceCompletions",
-    "DataSourceCompletionsSource",
-    "DataSourceCompletionsSourceFileContent",
-    "DataSourceCompletionsSourceFileContentContent",
-    "DataSourceCompletionsSourceFileID",
-    "DataSourceCompletionsSourceResponses",
-    "DataSourceCompletionsInputMessages",
-    "DataSourceCompletionsInputMessagesTemplate",
-    "DataSourceCompletionsInputMessagesTemplateTemplate",
-    "DataSourceCompletionsInputMessagesTemplateTemplateChatMessage",
-    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItem",
-    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent",
-    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText",
-    "DataSourceCompletionsInputMessagesItemReference",
-    "DataSourceCompletionsSamplingParams",
-    "PerModelUsage",
-    "PerTestingCriteriaResult",
-    "ResultCounts",
-]
-
-
-class DataSourceCompletionsSourceFileContentContent(BaseModel):
-    item: Dict[str, object]
-
-    sample: Optional[Dict[str, object]] = None
-
-
-class DataSourceCompletionsSourceFileContent(BaseModel):
-    content: List[DataSourceCompletionsSourceFileContentContent]
-    """The content of the jsonl file."""
-
-    type: Literal["file_content"]
-    """The type of jsonl source. Always `file_content`."""
-
-
-class DataSourceCompletionsSourceFileID(BaseModel):
-    id: str
-    """The identifier of the file."""
-
-    type: Literal["file_id"]
-    """The type of jsonl source. Always `file_id`."""
-
-
-class DataSourceCompletionsSourceResponses(BaseModel):
-    type: Literal["responses"]
-    """The type of run data source. Always `responses`."""
-
-    allow_parallel_tool_calls: Optional[bool] = None
-    """Whether to allow parallel tool calls.
-
-    This is a query parameter used to select responses.
-    """
-
-    created_after: Optional[int] = None
-    """Only include items created after this timestamp (inclusive).
-
-    This is a query parameter used to select responses.
-    """
-
-    created_before: Optional[int] = None
-    """Only include items created before this timestamp (inclusive).
-
-    This is a query parameter used to select responses.
-    """
-
-    has_tool_calls: Optional[bool] = None
-    """Whether the response has tool calls.
-
-    This is a query parameter used to select responses.
-    """
-
-    instructions_search: Optional[str] = None
-    """Optional search string for instructions.
-
-    This is a query parameter used to select responses.
-    """
-
-    metadata: Optional[object] = None
-    """Metadata filter for the responses.
-
-    This is a query parameter used to select responses.
-    """
-
-    model: Optional[str] = None
-    """The name of the model to find responses for.
-
-    This is a query parameter used to select responses.
-    """
-
-    reasoning_effort: Optional[ReasoningEffort] = None
-    """Optional reasoning effort parameter.
-
-    This is a query parameter used to select responses.
-    """
-
-    temperature: Optional[float] = None
-    """Sampling temperature. This is a query parameter used to select responses."""
-
-    top_p: Optional[float] = None
-    """Nucleus sampling parameter. This is a query parameter used to select responses."""
-
-    users: Optional[List[str]] = None
-    """List of user identifiers. This is a query parameter used to select responses."""
-
-
-DataSourceCompletionsSource: TypeAlias = Annotated[
-    Union[
-        DataSourceCompletionsSourceFileContent, DataSourceCompletionsSourceFileID, DataSourceCompletionsSourceResponses
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataSourceCompletionsInputMessagesTemplateTemplateChatMessage(BaseModel):
-    content: str
-    """The content of the message."""
-
-    role: str
-    """The role of the message (e.g. "system", "assistant", "user")."""
-
-
-class DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
-    text: str
-    """The text output from the model."""
-
-    type: Literal["output_text"]
-    """The type of the output text. Always `output_text`."""
-
-
-DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
-    str, ResponseInputText, DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText
-]
-
-
-class DataSourceCompletionsInputMessagesTemplateTemplateEvalItem(BaseModel):
-    content: DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent
-    """Text inputs to the model - can contain template strings."""
-
-    role: Literal["user", "assistant", "system", "developer"]
-    """The role of the message input.
-
-    One of `user`, `assistant`, `system`, or `developer`.
-    """
-
-    type: Optional[Literal["message"]] = None
-    """The type of the message input. Always `message`."""
-
-
-DataSourceCompletionsInputMessagesTemplateTemplate: TypeAlias = Union[
-    DataSourceCompletionsInputMessagesTemplateTemplateChatMessage,
-    DataSourceCompletionsInputMessagesTemplateTemplateEvalItem,
-]
-
-
-class DataSourceCompletionsInputMessagesTemplate(BaseModel):
-    template: List[DataSourceCompletionsInputMessagesTemplateTemplate]
-    """A list of chat messages forming the prompt or context.
-
-    May include variable references to the "item" namespace, ie {{item.name}}.
-    """
-
-    type: Literal["template"]
-    """The type of input messages. Always `template`."""
-
-
-class DataSourceCompletionsInputMessagesItemReference(BaseModel):
-    item_reference: str
-    """A reference to a variable in the "item" namespace. Ie, "item.name" """
-
-    type: Literal["item_reference"]
-    """The type of input messages. Always `item_reference`."""
-
-
-DataSourceCompletionsInputMessages: TypeAlias = Annotated[
-    Union[DataSourceCompletionsInputMessagesTemplate, DataSourceCompletionsInputMessagesItemReference],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataSourceCompletionsSamplingParams(BaseModel):
-    max_completion_tokens: Optional[int] = None
-    """The maximum number of tokens in the generated output."""
-
-    seed: Optional[int] = None
-    """A seed value to initialize the randomness, during sampling."""
-
-    temperature: Optional[float] = None
-    """A higher temperature increases randomness in the outputs."""
-
-    top_p: Optional[float] = None
-    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
-
-
-class DataSourceCompletions(BaseModel):
-    source: DataSourceCompletionsSource
-    """A EvalResponsesSource object describing a run data source configuration."""
-
-    type: Literal["completions"]
-    """The type of run data source. Always `completions`."""
-
-    input_messages: Optional[DataSourceCompletionsInputMessages] = None
-
-    model: Optional[str] = None
-    """The name of the model to use for generating completions (e.g. "o3-mini")."""
-
-    sampling_params: Optional[DataSourceCompletionsSamplingParams] = None
-
+__all__ = ["RunCancelResponse", "DataSource", "PerModelUsage", "PerTestingCriteriaResult", "ResultCounts"]
 
 DataSource: TypeAlias = Annotated[
-    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceCompletions],
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, CreateEvalResponsesRunDataSource],
     PropertyInfo(discriminator="type"),
 ]

@@ -2,34 +2,15 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
+from typing import Union, Optional
+from typing_extensions import Required, TypeAlias, TypedDict
 
 from ..shared_params.metadata import Metadata
-from ..shared.reasoning_effort import ReasoningEffort
-from ..responses.response_input_text_param import ResponseInputTextParam
 from .create_eval_jsonl_run_data_source_param import CreateEvalJSONLRunDataSourceParam
+from .create_eval_responses_run_data_source_param import CreateEvalResponsesRunDataSourceParam
 from .create_eval_completions_run_data_source_param import CreateEvalCompletionsRunDataSourceParam
 
-__all__ = [
-    "RunCreateParams",
-    "DataSource",
-    "DataSourceCreateEvalResponsesRunDataSource",
-    "DataSourceCreateEvalResponsesRunDataSourceSource",
-    "DataSourceCreateEvalResponsesRunDataSourceSourceFileContent",
-    "DataSourceCreateEvalResponsesRunDataSourceSourceFileContentContent",
-    "DataSourceCreateEvalResponsesRunDataSourceSourceFileID",
-    "DataSourceCreateEvalResponsesRunDataSourceSourceResponses",
-    "DataSourceCreateEvalResponsesRunDataSourceInputMessages",
-    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplate",
-    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplate",
-    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateChatMessage",
-    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItem",
-    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContent",
-    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentOutputText",
-    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference",
-    "DataSourceCreateEvalResponsesRunDataSourceSamplingParams",
-]
+__all__ = ["RunCreateParams", "DataSource"]
 
 
 class RunCreateParams(TypedDict, total=False):
@@ -50,198 +31,6 @@ class RunCreateParams(TypedDict, total=False):
     """The name of the run."""
 
 
-class DataSourceCreateEvalResponsesRunDataSourceSourceFileContentContent(TypedDict, total=False):
-    item: Required[Dict[str, object]]
-
-    sample: Dict[str, object]
-
-
-class DataSourceCreateEvalResponsesRunDataSourceSourceFileContent(TypedDict, total=False):
-    content: Required[Iterable[DataSourceCreateEvalResponsesRunDataSourceSourceFileContentContent]]
-    """The content of the jsonl file."""
-
-    type: Required[Literal["file_content"]]
-    """The type of jsonl source. Always `file_content`."""
-
-
-class DataSourceCreateEvalResponsesRunDataSourceSourceFileID(TypedDict, total=False):
-    id: Required[str]
-    """The identifier of the file."""
-
-    type: Required[Literal["file_id"]]
-    """The type of jsonl source. Always `file_id`."""
-
-
-class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total=False):
-    type: Required[Literal["responses"]]
-    """The type of run data source. Always `responses`."""
-
-    allow_parallel_tool_calls: Optional[bool]
-    """Whether to allow parallel tool calls.
-
-    This is a query parameter used to select responses.
-    """
-
-    created_after: Optional[int]
-    """Only include items created after this timestamp (inclusive).
-
-    This is a query parameter used to select responses.
-    """
-
-    created_before: Optional[int]
-    """Only include items created before this timestamp (inclusive).
-
-    This is a query parameter used to select responses.
-    """
-
-    has_tool_calls: Optional[bool]
-    """Whether the response has tool calls.
-
-    This is a query parameter used to select responses.
-    """
-
-    instructions_search: Optional[str]
-    """Optional search string for instructions.
-
-    This is a query parameter used to select responses.
-    """
-
-    metadata: Optional[object]
-    """Metadata filter for the responses.
-
-    This is a query parameter used to select responses.
-    """
-
-    model: Optional[str]
-    """The name of the model to find responses for.
-
-    This is a query parameter used to select responses.
-    """
-
-    reasoning_effort: Optional[ReasoningEffort]
-    """Optional reasoning effort parameter.
-
-    This is a query parameter used to select responses.
-    """
-
-    temperature: Optional[float]
-    """Sampling temperature. This is a query parameter used to select responses."""
-
-    top_p: Optional[float]
-    """Nucleus sampling parameter. This is a query parameter used to select responses."""
-
-    users: Optional[List[str]]
-    """List of user identifiers. This is a query parameter used to select responses."""
-
-
-DataSourceCreateEvalResponsesRunDataSourceSource: TypeAlias = Union[
-    DataSourceCreateEvalResponsesRunDataSourceSourceFileContent,
-    DataSourceCreateEvalResponsesRunDataSourceSourceFileID,
-    DataSourceCreateEvalResponsesRunDataSourceSourceResponses,
-]
-
-
-class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateChatMessage(TypedDict, total=False):
-    content: Required[str]
-    """The content of the message."""
-
-    role: Required[str]
-    """The role of the message (e.g. "system", "assistant", "user")."""
-
-
-class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentOutputText(
-    TypedDict, total=False
-):
-    text: Required[str]
-    """The text output from the model."""
-
-    type: Required[Literal["output_text"]]
-    """The type of the output text. Always `output_text`."""
-
-
-DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
-    str,
-    ResponseInputTextParam,
-    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentOutputText,
-]
-
-
-class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItem(TypedDict, total=False):
-    content: Required[DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContent]
-    """Text inputs to the model - can contain template strings."""
-
-    role: Required[Literal["user", "assistant", "system", "developer"]]
-    """The role of the message input.
-
-    One of `user`, `assistant`, `system`, or `developer`.
-    """
-
-    type: Literal["message"]
-    """The type of the message input. Always `message`."""
-
-
-DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplate: TypeAlias = Union[
-    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateChatMessage,
-    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItem,
-]
-
-
-class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplate(TypedDict, total=False):
-    template: Required[Iterable[DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplate]]
-    """A list of chat messages forming the prompt or context.
-
-    May include variable references to the "item" namespace, ie {{item.name}}.
-    """
-
-    type: Required[Literal["template"]]
-    """The type of input messages. Always `template`."""
-
-
-class DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference(TypedDict, total=False):
-    item_reference: Required[str]
-    """A reference to a variable in the "item" namespace. Ie, "item.name" """
-
-    type: Required[Literal["item_reference"]]
-    """The type of input messages. Always `item_reference`."""
-
-
-DataSourceCreateEvalResponsesRunDataSourceInputMessages: TypeAlias = Union[
-    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplate,
-    DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference,
-]
-
-
-class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=False):
-    max_completion_tokens: int
-    """The maximum number of tokens in the generated output."""
-
-    seed: int
-    """A seed value to initialize the randomness, during sampling."""
-
-    temperature: float
-    """A higher temperature increases randomness in the outputs."""
-
-    top_p: float
-    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
-
-
-class DataSourceCreateEvalResponsesRunDataSource(TypedDict, total=False):
-    source: Required[DataSourceCreateEvalResponsesRunDataSourceSource]
-    """A EvalResponsesSource object describing a run data source configuration."""
-
-    type: Required[Literal["completions"]]
-    """The type of run data source. Always `completions`."""
-
-    input_messages: DataSourceCreateEvalResponsesRunDataSourceInputMessages
-
-    model: str
-    """The name of the model to use for generating completions (e.g. "o3-mini")."""
-
-    sampling_params: DataSourceCreateEvalResponsesRunDataSourceSamplingParams
-
-
 DataSource: TypeAlias = Union[
-    CreateEvalJSONLRunDataSourceParam,
-    CreateEvalCompletionsRunDataSourceParam,
-    DataSourceCreateEvalResponsesRunDataSource,
+    CreateEvalJSONLRunDataSourceParam, CreateEvalCompletionsRunDataSourceParam, CreateEvalResponsesRunDataSourceParam
 ]

@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Dict, List, Union, Optional
+from typing import List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from pydantic import Field as FieldInfo
@@ -9,224 +9,14 @@ from ..._utils import PropertyInfo
 from ..._models import BaseModel
 from .eval_api_error import EvalAPIError
 from ..shared.metadata import Metadata
-from ..shared.reasoning_effort import ReasoningEffort
-from ..responses.response_input_text import ResponseInputText
 from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from .create_eval_responses_run_data_source import CreateEvalResponsesRunDataSource
 from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
 
-__all__ = [
-    "RunCreateResponse",
-    "DataSource",
-    "DataSourceCompletions",
-    "DataSourceCompletionsSource",
-    "DataSourceCompletionsSourceFileContent",
-    "DataSourceCompletionsSourceFileContentContent",
-    "DataSourceCompletionsSourceFileID",
-    "DataSourceCompletionsSourceResponses",
-    "DataSourceCompletionsInputMessages",
-    "DataSourceCompletionsInputMessagesTemplate",
-    "DataSourceCompletionsInputMessagesTemplateTemplate",
-    "DataSourceCompletionsInputMessagesTemplateTemplateChatMessage",
-    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItem",
-    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent",
-    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText",
-    "DataSourceCompletionsInputMessagesItemReference",
-    "DataSourceCompletionsSamplingParams",
-    "PerModelUsage",
-    "PerTestingCriteriaResult",
-    "ResultCounts",
-]
-
-
-class DataSourceCompletionsSourceFileContentContent(BaseModel):
-    item: Dict[str, object]
-
-    sample: Optional[Dict[str, object]] = None
-
-
-class DataSourceCompletionsSourceFileContent(BaseModel):
-    content: List[DataSourceCompletionsSourceFileContentContent]
-    """The content of the jsonl file."""
-
-    type: Literal["file_content"]
-    """The type of jsonl source. Always `file_content`."""
-
-
-class DataSourceCompletionsSourceFileID(BaseModel):
-    id: str
-    """The identifier of the file."""
-
-    type: Literal["file_id"]
-    """The type of jsonl source. Always `file_id`."""
-
-
-class DataSourceCompletionsSourceResponses(BaseModel):
-    type: Literal["responses"]
-    """The type of run data source. Always `responses`."""
-
-    allow_parallel_tool_calls: Optional[bool] = None
-    """Whether to allow parallel tool calls.
-
-    This is a query parameter used to select responses.
-    """
-
-    created_after: Optional[int] = None
-    """Only include items created after this timestamp (inclusive).
-
-    This is a query parameter used to select responses.
-    """
-
-    created_before: Optional[int] = None
-    """Only include items created before this timestamp (inclusive).
-
-    This is a query parameter used to select responses.
-    """
-
-    has_tool_calls: Optional[bool] = None
-    """Whether the response has tool calls.
-
-    This is a query parameter used to select responses.
-    """
-
-    instructions_search: Optional[str] = None
-    """Optional search string for instructions.
-
-    This is a query parameter used to select responses.
-    """
-
-    metadata: Optional[object] = None
-    """Metadata filter for the responses.
-
-    This is a query parameter used to select responses.
-    """
-
-    model: Optional[str] = None
-    """The name of the model to find responses for.
-
-    This is a query parameter used to select responses.
-    """
-
-    reasoning_effort: Optional[ReasoningEffort] = None
-    """Optional reasoning effort parameter.
-
-    This is a query parameter used to select responses.
-    """
-
-    temperature: Optional[float] = None
-    """Sampling temperature. This is a query parameter used to select responses."""
-
-    top_p: Optional[float] = None
-    """Nucleus sampling parameter. This is a query parameter used to select responses."""
-
-    users: Optional[List[str]] = None
-    """List of user identifiers. This is a query parameter used to select responses."""
-
-
-DataSourceCompletionsSource: TypeAlias = Annotated[
-    Union[
-        DataSourceCompletionsSourceFileContent, DataSourceCompletionsSourceFileID, DataSourceCompletionsSourceResponses
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataSourceCompletionsInputMessagesTemplateTemplateChatMessage(BaseModel):
-    content: str
-    """The content of the message."""
-
-    role: str
-    """The role of the message (e.g. "system", "assistant", "user")."""
-
-
-class DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
-    text: str
-    """The text output from the model."""
-
-    type: Literal["output_text"]
-    """The type of the output text. Always `output_text`."""
-
-
-DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
-    str, ResponseInputText, DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText
-]
-
-
-class DataSourceCompletionsInputMessagesTemplateTemplateEvalItem(BaseModel):
-    content: DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent
-    """Text inputs to the model - can contain template strings."""
-
-    role: Literal["user", "assistant", "system", "developer"]
-    """The role of the message input.
-
-    One of `user`, `assistant`, `system`, or `developer`.
-    """
-
-    type: Optional[Literal["message"]] = None
-    """The type of the message input. Always `message`."""
-
-
-DataSourceCompletionsInputMessagesTemplateTemplate: TypeAlias = Union[
-    DataSourceCompletionsInputMessagesTemplateTemplateChatMessage,
-    DataSourceCompletionsInputMessagesTemplateTemplateEvalItem,
-]
-
-
-class DataSourceCompletionsInputMessagesTemplate(BaseModel):
-    template: List[DataSourceCompletionsInputMessagesTemplateTemplate]
-    """A list of chat messages forming the prompt or context.
-
-    May include variable references to the "item" namespace, ie {{item.name}}.
-    """
-
-    type: Literal["template"]
-    """The type of input messages. Always `template`."""
-
-
-class DataSourceCompletionsInputMessagesItemReference(BaseModel):
-    item_reference: str
-    """A reference to a variable in the "item" namespace. Ie, "item.name" """
-
-    type: Literal["item_reference"]
-    """The type of input messages. Always `item_reference`."""
-
-
-DataSourceCompletionsInputMessages: TypeAlias = Annotated[
-    Union[DataSourceCompletionsInputMessagesTemplate, DataSourceCompletionsInputMessagesItemReference],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataSourceCompletionsSamplingParams(BaseModel):
-    max_completion_tokens: Optional[int] = None
-    """The maximum number of tokens in the generated output."""
-
-    seed: Optional[int] = None
-    """A seed value to initialize the randomness, during sampling."""
-
-    temperature: Optional[float] = None
-    """A higher temperature increases randomness in the outputs."""
-
-    top_p: Optional[float] = None
-    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
-
-
-class DataSourceCompletions(BaseModel):
-    source: DataSourceCompletionsSource
-    """A EvalResponsesSource object describing a run data source configuration."""
-
-    type: Literal["completions"]
-    """The type of run data source. Always `completions`."""
-
-    input_messages: Optional[DataSourceCompletionsInputMessages] = None
-
-    model: Optional[str] = None
-    """The name of the model to use for generating completions (e.g. "o3-mini")."""
-
-    sampling_params: Optional[DataSourceCompletionsSamplingParams] = None
-
+__all__ = ["RunCreateResponse", "DataSource", "PerModelUsage", "PerTestingCriteriaResult", "ResultCounts"]
 
 DataSource: TypeAlias = Annotated[
-    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceCompletions],
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, CreateEvalResponsesRunDataSource],
     PropertyInfo(discriminator="type"),
 ]

@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Dict, List, Union, Optional
+from typing import List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from pydantic import Field as FieldInfo
@@ -9,224 +9,14 @@ from ..._utils import PropertyInfo
 from ..._models import BaseModel
 from .eval_api_error import EvalAPIError
 from ..shared.metadata import Metadata
-from ..shared.reasoning_effort import ReasoningEffort
-from ..responses.response_input_text import ResponseInputText
 from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from .create_eval_responses_run_data_source import CreateEvalResponsesRunDataSource
 from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
 
-__all__ = [
-    "RunListResponse",
-    "DataSource",
-    "DataSourceCompletions",
-    "DataSourceCompletionsSource",
-    "DataSourceCompletionsSourceFileContent",
-    "DataSourceCompletionsSourceFileContentContent",
-    "DataSourceCompletionsSourceFileID",
-    "DataSourceCompletionsSourceResponses",
-    "DataSourceCompletionsInputMessages",
-    "DataSourceCompletionsInputMessagesTemplate",
-    "DataSourceCompletionsInputMessagesTemplateTemplate",
-    "DataSourceCompletionsInputMessagesTemplateTemplateChatMessage",
-    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItem",
-    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent",
-    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText",
-    "DataSourceCompletionsInputMessagesItemReference",
-    "DataSourceCompletionsSamplingParams",
-    "PerModelUsage",
-    "PerTestingCriteriaResult",
-    "ResultCounts",
-]
-
-
-class DataSourceCompletionsSourceFileContentContent(BaseModel):
-    item: Dict[str, object]
-
-    sample: Optional[Dict[str, object]] = None
-
-
-class DataSourceCompletionsSourceFileContent(BaseModel):
-    content: List[DataSourceCompletionsSourceFileContentContent]
-    """The content of the jsonl file."""
-
-    type: Literal["file_content"]
-    """The type of jsonl source. Always `file_content`."""
-
-
-class DataSourceCompletionsSourceFileID(BaseModel):
-    id: str
-    """The identifier of the file."""
-
-    type: Literal["file_id"]
-    """The type of jsonl source. Always `file_id`."""
-
-
-class DataSourceCompletionsSourceResponses(BaseModel):
-    type: Literal["responses"]
-    """The type of run data source. Always `responses`."""
-
-    allow_parallel_tool_calls: Optional[bool] = None
-    """Whether to allow parallel tool calls.
-
-    This is a query parameter used to select responses.
-    """
-
-    created_after: Optional[int] = None
-    """Only include items created after this timestamp (inclusive).
-
-    This is a query parameter used to select responses.
-    """
-
-    created_before: Optional[int] = None
-    """Only include items created before this timestamp (inclusive).
-
-    This is a query parameter used to select responses.
-    """
-
-    has_tool_calls: Optional[bool] = None
-    """Whether the response has tool calls.
-
-    This is a query parameter used to select responses.
-    """
-
-    instructions_search: Optional[str] = None
-    """Optional search string for instructions.
-
-    This is a query parameter used to select responses.
-    """
-
-    metadata: Optional[object] = None
-    """Metadata filter for the responses.
-
-    This is a query parameter used to select responses.
-    """
-
-    model: Optional[str] = None
-    """The name of the model to find responses for.
-
-    This is a query parameter used to select responses.
-    """
-
-    reasoning_effort: Optional[ReasoningEffort] = None
-    """Optional reasoning effort parameter.
-
-    This is a query parameter used to select responses.
-    """
-
-    temperature: Optional[float] = None
-    """Sampling temperature. This is a query parameter used to select responses."""
-
-    top_p: Optional[float] = None
-    """Nucleus sampling parameter. This is a query parameter used to select responses."""
-
-    users: Optional[List[str]] = None
-    """List of user identifiers. This is a query parameter used to select responses."""
-
-
-DataSourceCompletionsSource: TypeAlias = Annotated[
-    Union[
-        DataSourceCompletionsSourceFileContent, DataSourceCompletionsSourceFileID, DataSourceCompletionsSourceResponses
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataSourceCompletionsInputMessagesTemplateTemplateChatMessage(BaseModel):
-    content: str
-    """The content of the message."""
-
-    role: str
-    """The role of the message (e.g. "system", "assistant", "user")."""
-
-
-class DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
-    text: str
-    """The text output from the model."""
-
-    type: Literal["output_text"]
-    """The type of the output text. Always `output_text`."""
-
-
-DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
-    str, ResponseInputText, DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText
-]
-
-
-class DataSourceCompletionsInputMessagesTemplateTemplateEvalItem(BaseModel):
-    content: DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent
-    """Text inputs to the model - can contain template strings."""
-
-    role: Literal["user", "assistant", "system", "developer"]
-    """The role of the message input.
-
-    One of `user`, `assistant`, `system`, or `developer`.
-    """
-
-    type: Optional[Literal["message"]] = None
-    """The type of the message input. Always `message`."""
-
-
-DataSourceCompletionsInputMessagesTemplateTemplate: TypeAlias = Union[
-    DataSourceCompletionsInputMessagesTemplateTemplateChatMessage,
-    DataSourceCompletionsInputMessagesTemplateTemplateEvalItem,
-]
-
-
-class DataSourceCompletionsInputMessagesTemplate(BaseModel):
-    template: List[DataSourceCompletionsInputMessagesTemplateTemplate]
-    """A list of chat messages forming the prompt or context.
-
-    May include variable references to the "item" namespace, ie {{item.name}}.
-    """
-
-    type: Literal["template"]
-    """The type of input messages. Always `template`."""
-
-
-class DataSourceCompletionsInputMessagesItemReference(BaseModel):
-    item_reference: str
-    """A reference to a variable in the "item" namespace. Ie, "item.name" """
-
-    type: Literal["item_reference"]
-    """The type of input messages. Always `item_reference`."""
-
-
-DataSourceCompletionsInputMessages: TypeAlias = Annotated[
-    Union[DataSourceCompletionsInputMessagesTemplate, DataSourceCompletionsInputMessagesItemReference],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataSourceCompletionsSamplingParams(BaseModel):
-    max_completion_tokens: Optional[int] = None
-    """The maximum number of tokens in the generated output."""
-
-    seed: Optional[int] = None
-    """A seed value to initialize the randomness, during sampling."""
-
-    temperature: Optional[float] = None
-    """A higher temperature increases randomness in the outputs."""
-
-    top_p: Optional[float] = None
-    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
-
-
-class DataSourceCompletions(BaseModel):
-    source: DataSourceCompletionsSource
-    """A EvalResponsesSource object describing a run data source configuration."""
-
-    type: Literal["completions"]
-    """The type of run data source. Always `completions`."""
-
-    input_messages: Optional[DataSourceCompletionsInputMessages] = None
-
-    model: Optional[str] = None
-    """The name of the model to use for generating completions (e.g. "o3-mini")."""
-
-    sampling_params: Optional[DataSourceCompletionsSamplingParams] = None
-
+__all__ = ["RunListResponse", "DataSource", "PerModelUsage", "PerTestingCriteriaResult", "ResultCounts"]
 
 DataSource: TypeAlias = Annotated[
-    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceCompletions],
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, CreateEvalResponsesRunDataSource],
     PropertyInfo(discriminator="type"),
 ]

@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Dict, List, Union, Optional
+from typing import List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from pydantic import Field as FieldInfo
@@ -9,224 +9,14 @@ from ..._utils import PropertyInfo
 from ..._models import BaseModel
 from .eval_api_error import EvalAPIError
 from ..shared.metadata import Metadata
-from ..shared.reasoning_effort import ReasoningEffort
-from ..responses.response_input_text import ResponseInputText
 from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from .create_eval_responses_run_data_source import CreateEvalResponsesRunDataSource
 from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
 
-__all__ = [
-    "RunRetrieveResponse",
-    "DataSource",
-    "DataSourceCompletions",
-    "DataSourceCompletionsSource",
-    "DataSourceCompletionsSourceFileContent",
-    "DataSourceCompletionsSourceFileContentContent",
-    "DataSourceCompletionsSourceFileID",
-    "DataSourceCompletionsSourceResponses",
-    "DataSourceCompletionsInputMessages",
-    "DataSourceCompletionsInputMessagesTemplate",
-    "DataSourceCompletionsInputMessagesTemplateTemplate",
-    "DataSourceCompletionsInputMessagesTemplateTemplateChatMessage",
-    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItem",
-    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent",
-    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText",
-    "DataSourceCompletionsInputMessagesItemReference",
-    "DataSourceCompletionsSamplingParams",
-    "PerModelUsage",
-    "PerTestingCriteriaResult",
-    "ResultCounts",
-]
-
-
-class DataSourceCompletionsSourceFileContentContent(BaseModel):
-    item: Dict[str, object]
-
-    sample: Optional[Dict[str, object]] = None
-
-
-class DataSourceCompletionsSourceFileContent(BaseModel):
-    content: List[DataSourceCompletionsSourceFileContentContent]
-    """The content of the jsonl file."""
-
-    type: Literal["file_content"]
-    """The type of jsonl source. Always `file_content`."""
-
-
-class DataSourceCompletionsSourceFileID(BaseModel):
-    id: str
-    """The identifier of the file."""
-
-    type: Literal["file_id"]
-    """The type of jsonl source. Always `file_id`."""
-
-
-class DataSourceCompletionsSourceResponses(BaseModel):
-    type: Literal["responses"]
-    """The type of run data source. Always `responses`."""
-
-    allow_parallel_tool_calls: Optional[bool] = None
-    """Whether to allow parallel tool calls.
-
-    This is a query parameter used to select responses.
-    """
-
-    created_after: Optional[int] = None
-    """Only include items created after this timestamp (inclusive).
-
-    This is a query parameter used to select responses.
-    """
-
-    created_before: Optional[int] = None
-    """Only include items created before this timestamp (inclusive).
-
-    This is a query parameter used to select responses.
-    """
-
-    has_tool_calls: Optional[bool] = None
-    """Whether the response has tool calls.
-
-    This is a query parameter used to select responses.
-    """
-
-    instructions_search: Optional[str] = None
-    """Optional search string for instructions.
-
-    This is a query parameter used to select responses.
-    """
-
-    metadata: Optional[object] = None
-    """Metadata filter for the responses.
-
-    This is a query parameter used to select responses.
-    """
-
-    model: Optional[str] = None
-    """The name of the model to find responses for.
-
-    This is a query parameter used to select responses.
-    """
-
-    reasoning_effort: Optional[ReasoningEffort] = None
-    """Optional reasoning effort parameter.
-
-    This is a query parameter used to select responses.
-    """
-
-    temperature: Optional[float] = None
-    """Sampling temperature. This is a query parameter used to select responses."""
-
-    top_p: Optional[float] = None
-    """Nucleus sampling parameter. This is a query parameter used to select responses."""
-
-    users: Optional[List[str]] = None
-    """List of user identifiers. This is a query parameter used to select responses."""
-
-
-DataSourceCompletionsSource: TypeAlias = Annotated[
-    Union[
-        DataSourceCompletionsSourceFileContent, DataSourceCompletionsSourceFileID, DataSourceCompletionsSourceResponses
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataSourceCompletionsInputMessagesTemplateTemplateChatMessage(BaseModel):
-    content: str
-    """The content of the message."""
-
-    role: str
-    """The role of the message (e.g. "system", "assistant", "user")."""
-
-
-class DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
-    text: str
-    """The text output from the model."""
-
-    type: Literal["output_text"]
-    """The type of the output text. Always `output_text`."""
-
-
-DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
-    str, ResponseInputText, DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText
-]
-
-
-class DataSourceCompletionsInputMessagesTemplateTemplateEvalItem(BaseModel):
-    content: DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent
-    """Text inputs to the model - can contain template strings."""
-
-    role: Literal["user", "assistant", "system", "developer"]
-    """The role of the message input.
-
-    One of `user`, `assistant`, `system`, or `developer`.
-    """
-
-    type: Optional[Literal["message"]] = None
-    """The type of the message input. Always `message`."""
-
-
-DataSourceCompletionsInputMessagesTemplateTemplate: TypeAlias = Union[
-    DataSourceCompletionsInputMessagesTemplateTemplateChatMessage,
-    DataSourceCompletionsInputMessagesTemplateTemplateEvalItem,
-]
-
-
-class DataSourceCompletionsInputMessagesTemplate(BaseModel):
-    template: List[DataSourceCompletionsInputMessagesTemplateTemplate]
-    """A list of chat messages forming the prompt or context.
-
-    May include variable references to the "item" namespace, ie {{item.name}}.
-    """
-
-    type: Literal["template"]
-    """The type of input messages. Always `template`."""
-
-
-class DataSourceCompletionsInputMessagesItemReference(BaseModel):
-    item_reference: str
-    """A reference to a variable in the "item" namespace. Ie, "item.name" """
-
-    type: Literal["item_reference"]
-    """The type of input messages. Always `item_reference`."""
-
-
-DataSourceCompletionsInputMessages: TypeAlias = Annotated[
-    Union[DataSourceCompletionsInputMessagesTemplate, DataSourceCompletionsInputMessagesItemReference],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataSourceCompletionsSamplingParams(BaseModel):
-    max_completion_tokens: Optional[int] = None
-    """The maximum number of tokens in the generated output."""
-
-    seed: Optional[int] = None
-    """A seed value to initialize the randomness, during sampling."""
-
-    temperature: Optional[float] = None
-    """A higher temperature increases randomness in the outputs."""
-
-    top_p: Optional[float] = None
-    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
-
-
-class DataSourceCompletions(BaseModel):
-    source: DataSourceCompletionsSource
-    """A EvalResponsesSource object describing a run data source configuration."""
-
-    type: Literal["completions"]
-    """The type of run data source. Always `completions`."""
-
-    input_messages: Optional[DataSourceCompletionsInputMessages] = None
-
-    model: Optional[str] = None
-    """The name of the model to use for generating completions (e.g. "o3-mini")."""
-
-    sampling_params: Optional[DataSourceCompletionsSamplingParams] = None
-
+__all__ = ["RunRetrieveResponse", "DataSource", "PerModelUsage", "PerTestingCriteriaResult", "ResultCounts"]
 
 DataSource: TypeAlias = Annotated[
-    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceCompletions],
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, CreateEvalResponsesRunDataSource],
     PropertyInfo(discriminator="type"),
 ]

@@ -28,7 +28,7 @@ class Error(BaseModel):
 
 
 class Hyperparameters(BaseModel):
-    batch_size: Union[Literal["auto"], int, None] = None
+    batch_size: Union[Literal["auto"], int, Optional[object], None] = None
     """Number of examples in each batch.
 
     A larger batch size means that model parameters are updated less frequently, but

@@ -70,6 +70,7 @@ from .vector_store_create_params import VectorStoreCreateParams as VectorStoreCr
 from .vector_store_search_params import VectorStoreSearchParams as VectorStoreSearchParams
 from .vector_store_update_params import VectorStoreUpdateParams as VectorStoreUpdateParams
 from .moderation_text_input_param import ModerationTextInputParam as ModerationTextInputParam
+from .eval_logs_data_source_config import EvalLogsDataSourceConfig as EvalLogsDataSourceConfig
 from .file_chunking_strategy_param import FileChunkingStrategyParam as FileChunkingStrategyParam
 from .vector_store_search_response import VectorStoreSearchResponse as VectorStoreSearchResponse
 from .websocket_connection_options import WebsocketConnectionOptions as WebsocketConnectionOptions

@@ -16,11 +16,12 @@ class EmbeddingCreateParams(TypedDict, total=False):
 
     To embed multiple inputs in a single request, pass an array of strings or array
     of token arrays. The input must not exceed the max input tokens for the model
-    (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any
-    array must be 2048 dimensions or less.
+    (8192 tokens for all embedding models), cannot be an empty string, and any array
+    must be 2048 dimensions or less.
     [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens. Some models may also impose a limit on total number of
-    tokens summed across inputs.
+    for counting tokens. In addition to the per-input token limit, all embedding
+    models enforce a maximum of 300,000 tokens summed across all inputs in a single
+    request.
     """
 
     model: Required[Union[str, EmbeddingModel]]

@@ -16,6 +16,7 @@ __all__ = [
     "EvalCreateParams",
     "DataSourceConfig",
     "DataSourceConfigCustom",
+    "DataSourceConfigLogs",
     "DataSourceConfigStoredCompletions",
     "TestingCriterion",
     "TestingCriterionLabelModel",
@@ -65,15 +66,23 @@ class DataSourceConfigCustom(TypedDict, total=False):
     """
 
 
+class DataSourceConfigLogs(TypedDict, total=False):
+    type: Required[Literal["logs"]]
+    """The type of data source. Always `logs`."""
+
+    metadata: Dict[str, object]
+    """Metadata filters for the logs data source."""
+
+
 class DataSourceConfigStoredCompletions(TypedDict, total=False):
-    type: Required[Literal["stored_completions"]]
-    """The type of data source. Always `stored_completions`."""
+    type: Required[Literal["stored-completions"]]
+    """The type of data source. Always `stored-completions`."""
 
     metadata: Dict[str, object]
     """Metadata filters for the stored completions data source."""
 
 
-DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigStoredCompletions]
+DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigLogs, DataSourceConfigStoredCompletions]
 
 
 class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False):

@@ -10,6 +10,7 @@ from .graders.python_grader import PythonGrader
 from .graders.label_model_grader import LabelModelGrader
 from .graders.score_model_grader import ScoreModelGrader
 from .graders.string_check_grader import StringCheckGrader
+from .eval_logs_data_source_config import EvalLogsDataSourceConfig
 from .eval_custom_data_source_config import EvalCustomDataSourceConfig
 from .graders.text_similarity_grader import TextSimilarityGrader
 from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
@@ -24,7 +25,8 @@ __all__ = [
 ]
 
 DataSourceConfig: TypeAlias = Annotated[
-    Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
+    Union[EvalCustomDataSourceConfig, EvalLogsDataSourceConfig, EvalStoredCompletionsDataSourceConfig],
+    PropertyInfo(discriminator="type"),
 ]

@@ -10,6 +10,7 @@ from .graders.python_grader import PythonGrader
 from .graders.label_model_grader import LabelModelGrader
 from .graders.score_model_grader import ScoreModelGrader
 from .graders.string_check_grader import StringCheckGrader
+from .eval_logs_data_source_config import EvalLogsDataSourceConfig
 from .eval_custom_data_source_config import EvalCustomDataSourceConfig
 from .graders.text_similarity_grader import TextSimilarityGrader
 from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
@@ -24,7 +25,8 @@ __all__ = [
 ]
 
 DataSourceConfig: TypeAlias = Annotated[
-    Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
+    Union[EvalCustomDataSourceConfig, EvalLogsDataSourceConfig, EvalStoredCompletionsDataSourceConfig],
+    PropertyInfo(discriminator="type"),
 ]

@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+from .shared.metadata import Metadata
+
+__all__ = ["EvalLogsDataSourceConfig"]
+
+
+class EvalLogsDataSourceConfig(BaseModel):
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["logs"]
+    """The type of data source. Always `logs`."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """

@@ -10,6 +10,7 @@ from .graders.python_grader import PythonGrader
 from .graders.label_model_grader import LabelModelGrader
 from .graders.score_model_grader import ScoreModelGrader
 from .graders.string_check_grader import StringCheckGrader
+from .eval_logs_data_source_config import EvalLogsDataSourceConfig
 from .eval_custom_data_source_config import EvalCustomDataSourceConfig
 from .graders.text_similarity_grader import TextSimilarityGrader
 from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
@@ -24,7 +25,8 @@ __all__ = [
 ]
 
 DataSourceConfig: TypeAlias = Annotated[
-    Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
+    Union[EvalCustomDataSourceConfig, EvalLogsDataSourceConfig, EvalStoredCompletionsDataSourceConfig],
+    PropertyInfo(discriminator="type"),
 ]

@@ -18,8 +18,8 @@ class EvalStoredCompletionsDataSourceConfig(BaseModel):
     [here](https://json-schema.org/).
     """
 
-    type: Literal["stored_completions"]
-    """The type of data source. Always `stored_completions`."""
+    type: Literal["stored-completions"]
+    """The type of data source. Always `stored-completions`."""
 
     metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.

@@ -10,6 +10,7 @@ from .graders.python_grader import PythonGrader
 from .graders.label_model_grader import LabelModelGrader
 from .graders.score_model_grader import ScoreModelGrader
 from .graders.string_check_grader import StringCheckGrader
+from .eval_logs_data_source_config import EvalLogsDataSourceConfig
 from .eval_custom_data_source_config import EvalCustomDataSourceConfig
 from .graders.text_similarity_grader import TextSimilarityGrader
 from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
@@ -24,7 +25,8 @@ __all__ = [
 ]
 
 DataSourceConfig: TypeAlias = Annotated[
-    Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
+    Union[EvalCustomDataSourceConfig, EvalLogsDataSourceConfig, EvalStoredCompletionsDataSourceConfig],
+    PropertyInfo(discriminator="type"),
 ]

@@ -30,6 +30,7 @@ class TestTranscriptions:
         transcription = client.audio.transcriptions.create(
             file=b"raw file contents",
             model="gpt-4o-transcribe",
+            chunking_strategy="auto",
             include=["logprobs"],
             language="language",
             prompt="prompt",
@@ -81,6 +82,7 @@ class TestTranscriptions:
             file=b"raw file contents",
             model="gpt-4o-transcribe",
             stream=True,
+            chunking_strategy="auto",
             include=["logprobs"],
             language="language",
             prompt="prompt",
@@ -134,6 +136,7 @@ class TestAsyncTranscriptions:
         transcription = await async_client.audio.transcriptions.create(
             file=b"raw file contents",
             model="gpt-4o-transcribe",
+            chunking_strategy="auto",
             include=["logprobs"],
             language="language",
             prompt="prompt",
@@ -185,6 +188,7 @@ class TestAsyncTranscriptions:
             file=b"raw file contents",
             model="gpt-4o-transcribe",
             stream=True,
+            chunking_strategy="auto",
             include=["logprobs"],
             language="language",
             prompt="prompt",

@@ -1,4 +1,4 @@
 configured_endpoints: 101
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-794a6ed3c3d3d77887564755168056af8a426b17cf1ec721e3a300503dc22a41.yml
-openapi_spec_hash: 25a81c220713cd5b0bafc221d1dfa79a
-config_hash: 0b768ed1b56c6d82816f0fa40dc4aaf5
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-161ca7f1cfd7b33c1fc07d0ce25dfe4be5a7271c394f4cb526b7fb21b0729900.yml
+openapi_spec_hash: 602e14add4bee018c6774e320ce309b8
+config_hash: 7da27f7260075e8813ddcea542fba1bf

@@ -787,6 +787,7 @@ Types:
 ```python
 from openai.types import (
     EvalCustomDataSourceConfig,
+    EvalLogsDataSourceConfig,
     EvalStoredCompletionsDataSourceConfig,
     EvalCreateResponse,
     EvalRetrieveResponse,
@@ -812,6 +813,7 @@ Types:
 from openai.types.evals import (
     CreateEvalCompletionsRunDataSource,
     CreateEvalJSONLRunDataSource,
+    CreateEvalResponsesRunDataSource,
     EvalAPIError,
     RunCreateResponse,
     RunRetrieveResponse,

Commit 28d60d9e

Commit `28d60d9e`