Commit ed9e2ddc

stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>
2025-11-04 07:07:04
feat(api): Realtime API token_limits, Hybrid searching ranking options
1 parent 0393d90
src/openai/resources/realtime/calls.py
@@ -195,8 +195,19 @@ class Calls(SyncAPIResource):
               `auto` will create a trace for the session with default values for the workflow
               name, group id, and metadata.
 
-          truncation: Controls how the realtime conversation is truncated prior to model inference.
-              The default is `auto`.
+          truncation: When the number of tokens in a conversation exceeds the model's input token
+              limit, the conversation be truncated, meaning messages (starting from the
+              oldest) will not be included in the model's context. A 32k context model with
+              4,096 max output tokens can only include 28,224 tokens in the context before
+              truncation occurs. Clients can configure truncation behavior to truncate with a
+              lower max token limit, which is an effective way to control token usage and
+              cost. Truncation will reduce the number of cached tokens on the next turn
+              (busting the cache), since messages are dropped from the beginning of the
+              context. However, clients can also configure truncation to retain messages up to
+              a fraction of the maximum context size, which will reduce the need for future
+              truncations and thus improve the cache rate. Truncation can be disabled
+              entirely, which means the server will never truncate but would instead return an
+              error if the conversation exceeds the model's input token limit.
 
           extra_headers: Send extra headers
 
@@ -504,8 +515,19 @@ class AsyncCalls(AsyncAPIResource):
               `auto` will create a trace for the session with default values for the workflow
               name, group id, and metadata.
 
-          truncation: Controls how the realtime conversation is truncated prior to model inference.
-              The default is `auto`.
+          truncation: When the number of tokens in a conversation exceeds the model's input token
+              limit, the conversation be truncated, meaning messages (starting from the
+              oldest) will not be included in the model's context. A 32k context model with
+              4,096 max output tokens can only include 28,224 tokens in the context before
+              truncation occurs. Clients can configure truncation behavior to truncate with a
+              lower max token limit, which is an effective way to control token usage and
+              cost. Truncation will reduce the number of cached tokens on the next turn
+              (busting the cache), since messages are dropped from the beginning of the
+              context. However, clients can also configure truncation to retain messages up to
+              a fraction of the maximum context size, which will reduce the need for future
+              truncations and thus improve the cache rate. Truncation can be disabled
+              entirely, which means the server will never truncate but would instead return an
+              error if the conversation exceeds the model's input token limit.
 
           extra_headers: Send extra headers
 
src/openai/resources/vector_stores/file_batches.py
@@ -52,9 +52,10 @@ class FileBatches(SyncAPIResource):
         self,
         vector_store_id: str,
         *,
-        file_ids: SequenceNotStr[str],
         attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit,
         chunking_strategy: FileChunkingStrategyParam | Omit = omit,
+        file_ids: SequenceNotStr[str] | Omit = omit,
+        files: Iterable[file_batch_create_params.File] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -66,10 +67,6 @@ class FileBatches(SyncAPIResource):
         Create a vector store file batch.
 
         Args:
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-              the vector store should use. Useful for tools like `file_search` that can access
-              files.
-
           attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
               querying for objects via API or the dashboard. Keys are strings with a maximum
@@ -79,6 +76,16 @@ class FileBatches(SyncAPIResource):
           chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
               strategy. Only applicable if `file_ids` is non-empty.
 
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files. If `attributes` or `chunking_strategy` are provided, they will be applied
+              to all files in the batch. Mutually exclusive with `files`.
+
+          files: A list of objects that each include a `file_id` plus optional `attributes` or
+              `chunking_strategy`. Use this when you need to override metadata for specific
+              files. The global `attributes` or `chunking_strategy` will be ignored and must
+              be specified for each file. Mutually exclusive with `file_ids`.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -94,9 +101,10 @@ class FileBatches(SyncAPIResource):
             f"/vector_stores/{vector_store_id}/file_batches",
             body=maybe_transform(
                 {
-                    "file_ids": file_ids,
                     "attributes": attributes,
                     "chunking_strategy": chunking_strategy,
+                    "file_ids": file_ids,
+                    "files": files,
                 },
                 file_batch_create_params.FileBatchCreateParams,
             ),
@@ -389,9 +397,10 @@ class AsyncFileBatches(AsyncAPIResource):
         self,
         vector_store_id: str,
         *,
-        file_ids: SequenceNotStr[str],
         attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit,
         chunking_strategy: FileChunkingStrategyParam | Omit = omit,
+        file_ids: SequenceNotStr[str] | Omit = omit,
+        files: Iterable[file_batch_create_params.File] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -403,10 +412,6 @@ class AsyncFileBatches(AsyncAPIResource):
         Create a vector store file batch.
 
         Args:
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-              the vector store should use. Useful for tools like `file_search` that can access
-              files.
-
           attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
               querying for objects via API or the dashboard. Keys are strings with a maximum
@@ -416,6 +421,16 @@ class AsyncFileBatches(AsyncAPIResource):
           chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
               strategy. Only applicable if `file_ids` is non-empty.
 
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files. If `attributes` or `chunking_strategy` are provided, they will be applied
+              to all files in the batch. Mutually exclusive with `files`.
+
+          files: A list of objects that each include a `file_id` plus optional `attributes` or
+              `chunking_strategy`. Use this when you need to override metadata for specific
+              files. The global `attributes` or `chunking_strategy` will be ignored and must
+              be specified for each file. Mutually exclusive with `file_ids`.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -431,9 +446,10 @@ class AsyncFileBatches(AsyncAPIResource):
             f"/vector_stores/{vector_store_id}/file_batches",
             body=await async_maybe_transform(
                 {
-                    "file_ids": file_ids,
                     "attributes": attributes,
                     "chunking_strategy": chunking_strategy,
+                    "file_ids": file_ids,
+                    "files": files,
                 },
                 file_batch_create_params.FileBatchCreateParams,
             ),
src/openai/resources/images.py
@@ -168,7 +168,10 @@ class Images(SyncAPIResource):
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -282,7 +285,10 @@ class Images(SyncAPIResource):
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -392,7 +398,10 @@ class Images(SyncAPIResource):
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -1046,7 +1055,10 @@ class AsyncImages(AsyncAPIResource):
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -1160,7 +1172,10 @@ class AsyncImages(AsyncAPIResource):
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -1270,7 +1285,10 @@ class AsyncImages(AsyncAPIResource):
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
src/openai/types/realtime/call_accept_params.py
@@ -106,6 +106,17 @@ class CallAcceptParams(TypedDict, total=False):
 
     truncation: RealtimeTruncationParam
     """
-    Controls how the realtime conversation is truncated prior to model inference.
-    The default is `auto`.
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs. Clients can configure truncation behavior to truncate with a
+    lower max token limit, which is an effective way to control token usage and
+    cost. Truncation will reduce the number of cached tokens on the next turn
+    (busting the cache), since messages are dropped from the beginning of the
+    context. However, clients can also configure truncation to retain messages up to
+    a fraction of the maximum context size, which will reduce the need for future
+    truncations and thus improve the cache rate. Truncation can be disabled
+    entirely, which means the server will never truncate but would instead return an
+    error if the conversation exceeds the model's input token limit.
     """
src/openai/types/realtime/realtime_session_create_request.py
@@ -106,6 +106,17 @@ class RealtimeSessionCreateRequest(BaseModel):
 
     truncation: Optional[RealtimeTruncation] = None
     """
-    Controls how the realtime conversation is truncated prior to model inference.
-    The default is `auto`.
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs. Clients can configure truncation behavior to truncate with a
+    lower max token limit, which is an effective way to control token usage and
+    cost. Truncation will reduce the number of cached tokens on the next turn
+    (busting the cache), since messages are dropped from the beginning of the
+    context. However, clients can also configure truncation to retain messages up to
+    a fraction of the maximum context size, which will reduce the need for future
+    truncations and thus improve the cache rate. Truncation can be disabled
+    entirely, which means the server will never truncate but would instead return an
+    error if the conversation exceeds the model's input token limit.
     """
src/openai/types/realtime/realtime_session_create_request_param.py
@@ -106,6 +106,17 @@ class RealtimeSessionCreateRequestParam(TypedDict, total=False):
 
     truncation: RealtimeTruncationParam
     """
-    Controls how the realtime conversation is truncated prior to model inference.
-    The default is `auto`.
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs. Clients can configure truncation behavior to truncate with a
+    lower max token limit, which is an effective way to control token usage and
+    cost. Truncation will reduce the number of cached tokens on the next turn
+    (busting the cache), since messages are dropped from the beginning of the
+    context. However, clients can also configure truncation to retain messages up to
+    a fraction of the maximum context size, which will reduce the need for future
+    truncations and thus improve the cache rate. Truncation can be disabled
+    entirely, which means the server will never truncate but would instead return an
+    error if the conversation exceeds the model's input token limit.
     """
src/openai/types/realtime/realtime_session_create_response.py
@@ -459,6 +459,17 @@ class RealtimeSessionCreateResponse(BaseModel):
 
     truncation: Optional[RealtimeTruncation] = None
     """
-    Controls how the realtime conversation is truncated prior to model inference.
-    The default is `auto`.
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs. Clients can configure truncation behavior to truncate with a
+    lower max token limit, which is an effective way to control token usage and
+    cost. Truncation will reduce the number of cached tokens on the next turn
+    (busting the cache), since messages are dropped from the beginning of the
+    context. However, clients can also configure truncation to retain messages up to
+    a fraction of the maximum context size, which will reduce the need for future
+    truncations and thus improve the cache rate. Truncation can be disabled
+    entirely, which means the server will never truncate but would instead return an
+    error if the conversation exceeds the model's input token limit.
     """
src/openai/types/realtime/realtime_truncation_retention_ratio.py
@@ -1,18 +1,38 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
 
-__all__ = ["RealtimeTruncationRetentionRatio"]
+__all__ = ["RealtimeTruncationRetentionRatio", "TokenLimits"]
+
+
+class TokenLimits(BaseModel):
+    post_instructions: Optional[int] = None
+    """
+    Maximum tokens allowed in the conversation after instructions (which including
+    tool definitions). For example, setting this to 5,000 would mean that truncation
+    would occur when the conversation exceeds 5,000 tokens after instructions. This
+    cannot be higher than the model's context window size minus the maximum output
+    tokens.
+    """
 
 
 class RealtimeTruncationRetentionRatio(BaseModel):
     retention_ratio: float
     """
-    Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
-    conversation exceeds the input token limit.
+    Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
+    the conversation exceeds the input token limit. Setting this to `0.8` means that
+    messages will be dropped until 80% of the maximum allowed tokens are used. This
+    helps reduce the frequency of truncations and improve cache rates.
     """
 
     type: Literal["retention_ratio"]
     """Use retention ratio truncation."""
+
+    token_limits: Optional[TokenLimits] = None
+    """Optional custom token limits for this truncation strategy.
+
+    If not provided, the model's default token limits will be used.
+    """
src/openai/types/realtime/realtime_truncation_retention_ratio_param.py
@@ -4,15 +4,34 @@ from __future__ import annotations
 
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["RealtimeTruncationRetentionRatioParam"]
+__all__ = ["RealtimeTruncationRetentionRatioParam", "TokenLimits"]
+
+
+class TokenLimits(TypedDict, total=False):
+    post_instructions: int
+    """
+    Maximum tokens allowed in the conversation after instructions (which including
+    tool definitions). For example, setting this to 5,000 would mean that truncation
+    would occur when the conversation exceeds 5,000 tokens after instructions. This
+    cannot be higher than the model's context window size minus the maximum output
+    tokens.
+    """
 
 
 class RealtimeTruncationRetentionRatioParam(TypedDict, total=False):
     retention_ratio: Required[float]
     """
-    Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
-    conversation exceeds the input token limit.
+    Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
+    the conversation exceeds the input token limit. Setting this to `0.8` means that
+    messages will be dropped until 80% of the maximum allowed tokens are used. This
+    helps reduce the frequency of truncations and improve cache rates.
     """
 
     type: Required[Literal["retention_ratio"]]
     """Use retention ratio truncation."""
+
+    token_limits: TokenLimits
+    """Optional custom token limits for this truncation strategy.
+
+    If not provided, the model's default token limits will be used.
+    """
src/openai/types/responses/file_search_tool.py
@@ -7,12 +7,26 @@ from ..._models import BaseModel
 from ..shared.compound_filter import CompoundFilter
 from ..shared.comparison_filter import ComparisonFilter
 
-__all__ = ["FileSearchTool", "Filters", "RankingOptions"]
+__all__ = ["FileSearchTool", "Filters", "RankingOptions", "RankingOptionsHybridSearch"]
 
 Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter, None]
 
 
+class RankingOptionsHybridSearch(BaseModel):
+    embedding_weight: float
+    """The weight of the embedding in the reciprocal ranking fusion."""
+
+    text_weight: float
+    """The weight of the text in the reciprocal ranking fusion."""
+
+
 class RankingOptions(BaseModel):
+    hybrid_search: Optional[RankingOptionsHybridSearch] = None
+    """
+    Weights that control how reciprocal rank fusion balances semantic embedding
+    matches versus sparse keyword matches when hybrid search is enabled.
+    """
+
     ranker: Optional[Literal["auto", "default-2024-11-15"]] = None
     """The ranker to use for the file search."""
 
src/openai/types/responses/file_search_tool_param.py
@@ -9,12 +9,26 @@ from ..._types import SequenceNotStr
 from ..shared_params.compound_filter import CompoundFilter
 from ..shared_params.comparison_filter import ComparisonFilter
 
-__all__ = ["FileSearchToolParam", "Filters", "RankingOptions"]
+__all__ = ["FileSearchToolParam", "Filters", "RankingOptions", "RankingOptionsHybridSearch"]
 
 Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter]
 
 
+class RankingOptionsHybridSearch(TypedDict, total=False):
+    embedding_weight: Required[float]
+    """The weight of the embedding in the reciprocal ranking fusion."""
+
+    text_weight: Required[float]
+    """The weight of the text in the reciprocal ranking fusion."""
+
+
 class RankingOptions(TypedDict, total=False):
+    hybrid_search: RankingOptionsHybridSearch
+    """
+    Weights that control how reciprocal rank fusion balances semantic embedding
+    matches versus sparse keyword matches when hybrid search is enabled.
+    """
+
     ranker: Literal["auto", "default-2024-11-15"]
     """The ranker to use for the file search."""
 
src/openai/types/responses/response_output_text.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List, Union, Optional
+from typing import List, Union
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from ..._utils import PropertyInfo
@@ -108,10 +108,10 @@ class ResponseOutputText(BaseModel):
     annotations: List[Annotation]
     """The annotations of the text output."""
 
+    logprobs: List[Logprob]
+
     text: str
     """The text output from the model."""
 
     type: Literal["output_text"]
     """The type of the output text. Always `output_text`."""
-
-    logprobs: Optional[List[Logprob]] = None
src/openai/types/responses/response_output_text_param.py
@@ -106,10 +106,10 @@ class ResponseOutputTextParam(TypedDict, total=False):
     annotations: Required[Iterable[Annotation]]
     """The annotations of the text output."""
 
+    logprobs: Required[Iterable[Logprob]]
+
     text: Required[str]
     """The text output from the model."""
 
     type: Required[Literal["output_text"]]
     """The type of the output text. Always `output_text`."""
-
-    logprobs: Iterable[Logprob]
src/openai/types/responses/tool.py
@@ -161,6 +161,8 @@ class CodeInterpreterContainerCodeInterpreterToolAuto(BaseModel):
     file_ids: Optional[List[str]] = None
     """An optional list of uploaded files to make available to your code."""
 
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+
 
 CodeInterpreterContainer: TypeAlias = Union[str, CodeInterpreterContainerCodeInterpreterToolAuto]
 
src/openai/types/responses/tool_param.py
@@ -161,6 +161,8 @@ class CodeInterpreterContainerCodeInterpreterToolAuto(TypedDict, total=False):
     file_ids: SequenceNotStr[str]
     """An optional list of uploaded files to make available to your code."""
 
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]]
+
 
 CodeInterpreterContainer: TypeAlias = Union[str, CodeInterpreterContainerCodeInterpreterToolAuto]
 
src/openai/types/vector_stores/file_batch_create_params.py
@@ -2,20 +2,54 @@
 
 from __future__ import annotations
 
-from typing import Dict, Union, Optional
+from typing import Dict, Union, Iterable, Optional
 from typing_extensions import Required, TypedDict
 
 from ..._types import SequenceNotStr
 from ..file_chunking_strategy_param import FileChunkingStrategyParam
 
-__all__ = ["FileBatchCreateParams"]
+__all__ = ["FileBatchCreateParams", "File"]
 
 
 class FileBatchCreateParams(TypedDict, total=False):
-    file_ids: Required[SequenceNotStr[str]]
+    attributes: Optional[Dict[str, Union[str, float, bool]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    chunking_strategy: FileChunkingStrategyParam
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
+    """
+
+    file_ids: SequenceNotStr[str]
     """
     A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
     the vector store should use. Useful for tools like `file_search` that can access
+    files. If `attributes` or `chunking_strategy` are provided, they will be applied
+    to all files in the batch. Mutually exclusive with `files`.
+    """
+
+    files: Iterable[File]
+    """
+    A list of objects that each include a `file_id` plus optional `attributes` or
+    `chunking_strategy`. Use this when you need to override metadata for specific
+    files. The global `attributes` or `chunking_strategy` will be ignored and must
+    be specified for each file. Mutually exclusive with `file_ids`.
+    """
+
+
+class File(TypedDict, total=False):
+    file_id: Required[str]
+    """
+    A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+    vector store should use. Useful for tools like `file_search` that can access
     files.
     """
 
src/openai/types/video.py
@@ -37,6 +37,9 @@ class Video(BaseModel):
     progress: int
     """Approximate completion percentage for the generation task."""
 
+    prompt: Optional[str] = None
+    """The prompt that was used to generate the video."""
+
     remixed_from_video_id: Optional[str] = None
     """Identifier of the source video if this video is a remix."""
 
tests/api_resources/vector_stores/test_file_batches.py
@@ -25,7 +25,6 @@ class TestFileBatches:
     def test_method_create(self, client: OpenAI) -> None:
         file_batch = client.vector_stores.file_batches.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
@@ -33,9 +32,16 @@ class TestFileBatches:
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         file_batch = client.vector_stores.file_batches.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
             attributes={"foo": "string"},
             chunking_strategy={"type": "auto"},
+            file_ids=["string"],
+            files=[
+                {
+                    "file_id": "file_id",
+                    "attributes": {"foo": "string"},
+                    "chunking_strategy": {"type": "auto"},
+                }
+            ],
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
@@ -43,7 +49,6 @@ class TestFileBatches:
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.vector_stores.file_batches.with_raw_response.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
         )
 
         assert response.is_closed is True
@@ -55,7 +60,6 @@ class TestFileBatches:
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.vector_stores.file_batches.with_streaming_response.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -70,7 +74,6 @@ class TestFileBatches:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
             client.vector_stores.file_batches.with_raw_response.create(
                 vector_store_id="",
-                file_ids=["string"],
             )
 
     @parametrize
@@ -240,7 +243,6 @@ class TestAsyncFileBatches:
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         file_batch = await async_client.vector_stores.file_batches.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
@@ -248,9 +250,16 @@ class TestAsyncFileBatches:
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         file_batch = await async_client.vector_stores.file_batches.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
             attributes={"foo": "string"},
             chunking_strategy={"type": "auto"},
+            file_ids=["string"],
+            files=[
+                {
+                    "file_id": "file_id",
+                    "attributes": {"foo": "string"},
+                    "chunking_strategy": {"type": "auto"},
+                }
+            ],
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
@@ -258,7 +267,6 @@ class TestAsyncFileBatches:
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.vector_stores.file_batches.with_raw_response.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
         )
 
         assert response.is_closed is True
@@ -270,7 +278,6 @@ class TestAsyncFileBatches:
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.vector_stores.file_batches.with_streaming_response.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -285,7 +292,6 @@ class TestAsyncFileBatches:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
             await async_client.vector_stores.file_batches.with_raw_response.create(
                 vector_store_id="",
-                file_ids=["string"],
             )
 
     @parametrize
.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 136
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-f68f718cd45ac3f9336603601bccc38a718af44d0b26601031de3d0a71b7ce2f.yml
-openapi_spec_hash: 1560717860bba4105936647dde8f618d
-config_hash: 50ee3382a63c021a9f821a935950e926
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-3c5d1593d7c6f2b38a7d78d7906041465ee9d6e9022f0651e1da194654488108.yml
+openapi_spec_hash: 0a4d8ad2469823ce24a3fd94f23f1c2b
+config_hash: 032995825500a503a76da119f5354905