Commit ed9e2ddc
Changed files (19)
src
openai
resources
types
realtime
responses
vector_stores
tests
api_resources
vector_stores
src/openai/resources/realtime/calls.py
@@ -195,8 +195,19 @@ class Calls(SyncAPIResource):
`auto` will create a trace for the session with default values for the workflow
name, group id, and metadata.
- truncation: Controls how the realtime conversation is truncated prior to model inference.
- The default is `auto`.
+ truncation: When the number of tokens in a conversation exceeds the model's input token
+ limit, the conversation be truncated, meaning messages (starting from the
+ oldest) will not be included in the model's context. A 32k context model with
+ 4,096 max output tokens can only include 28,224 tokens in the context before
+ truncation occurs. Clients can configure truncation behavior to truncate with a
+ lower max token limit, which is an effective way to control token usage and
+ cost. Truncation will reduce the number of cached tokens on the next turn
+ (busting the cache), since messages are dropped from the beginning of the
+ context. However, clients can also configure truncation to retain messages up to
+ a fraction of the maximum context size, which will reduce the need for future
+ truncations and thus improve the cache rate. Truncation can be disabled
+ entirely, which means the server will never truncate but would instead return an
+ error if the conversation exceeds the model's input token limit.
extra_headers: Send extra headers
@@ -504,8 +515,19 @@ class AsyncCalls(AsyncAPIResource):
`auto` will create a trace for the session with default values for the workflow
name, group id, and metadata.
- truncation: Controls how the realtime conversation is truncated prior to model inference.
- The default is `auto`.
+ truncation: When the number of tokens in a conversation exceeds the model's input token
+ limit, the conversation be truncated, meaning messages (starting from the
+ oldest) will not be included in the model's context. A 32k context model with
+ 4,096 max output tokens can only include 28,224 tokens in the context before
+ truncation occurs. Clients can configure truncation behavior to truncate with a
+ lower max token limit, which is an effective way to control token usage and
+ cost. Truncation will reduce the number of cached tokens on the next turn
+ (busting the cache), since messages are dropped from the beginning of the
+ context. However, clients can also configure truncation to retain messages up to
+ a fraction of the maximum context size, which will reduce the need for future
+ truncations and thus improve the cache rate. Truncation can be disabled
+ entirely, which means the server will never truncate but would instead return an
+ error if the conversation exceeds the model's input token limit.
extra_headers: Send extra headers
src/openai/resources/vector_stores/file_batches.py
@@ -52,9 +52,10 @@ class FileBatches(SyncAPIResource):
self,
vector_store_id: str,
*,
- file_ids: SequenceNotStr[str],
attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit,
chunking_strategy: FileChunkingStrategyParam | Omit = omit,
+ file_ids: SequenceNotStr[str] | Omit = omit,
+ files: Iterable[file_batch_create_params.File] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -66,10 +67,6 @@ class FileBatches(SyncAPIResource):
Create a vector store file batch.
Args:
- file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
- the vector store should use. Useful for tools like `file_search` that can access
- files.
-
attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
querying for objects via API or the dashboard. Keys are strings with a maximum
@@ -79,6 +76,16 @@ class FileBatches(SyncAPIResource):
chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
strategy. Only applicable if `file_ids` is non-empty.
+ file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+ the vector store should use. Useful for tools like `file_search` that can access
+ files. If `attributes` or `chunking_strategy` are provided, they will be applied
+ to all files in the batch. Mutually exclusive with `files`.
+
+ files: A list of objects that each include a `file_id` plus optional `attributes` or
+ `chunking_strategy`. Use this when you need to override metadata for specific
+ files. The global `attributes` or `chunking_strategy` will be ignored and must
+ be specified for each file. Mutually exclusive with `file_ids`.
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -94,9 +101,10 @@ class FileBatches(SyncAPIResource):
f"/vector_stores/{vector_store_id}/file_batches",
body=maybe_transform(
{
- "file_ids": file_ids,
"attributes": attributes,
"chunking_strategy": chunking_strategy,
+ "file_ids": file_ids,
+ "files": files,
},
file_batch_create_params.FileBatchCreateParams,
),
@@ -389,9 +397,10 @@ class AsyncFileBatches(AsyncAPIResource):
self,
vector_store_id: str,
*,
- file_ids: SequenceNotStr[str],
attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit,
chunking_strategy: FileChunkingStrategyParam | Omit = omit,
+ file_ids: SequenceNotStr[str] | Omit = omit,
+ files: Iterable[file_batch_create_params.File] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -403,10 +412,6 @@ class AsyncFileBatches(AsyncAPIResource):
Create a vector store file batch.
Args:
- file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
- the vector store should use. Useful for tools like `file_search` that can access
- files.
-
attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
querying for objects via API or the dashboard. Keys are strings with a maximum
@@ -416,6 +421,16 @@ class AsyncFileBatches(AsyncAPIResource):
chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
strategy. Only applicable if `file_ids` is non-empty.
+ file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+ the vector store should use. Useful for tools like `file_search` that can access
+ files. If `attributes` or `chunking_strategy` are provided, they will be applied
+ to all files in the batch. Mutually exclusive with `files`.
+
+ files: A list of objects that each include a `file_id` plus optional `attributes` or
+ `chunking_strategy`. Use this when you need to override metadata for specific
+ files. The global `attributes` or `chunking_strategy` will be ignored and must
+ be specified for each file. Mutually exclusive with `file_ids`.
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -431,9 +446,10 @@ class AsyncFileBatches(AsyncAPIResource):
f"/vector_stores/{vector_store_id}/file_batches",
body=await async_maybe_transform(
{
- "file_ids": file_ids,
"attributes": attributes,
"chunking_strategy": chunking_strategy,
+ "file_ids": file_ids,
+ "files": files,
},
file_batch_create_params.FileBatchCreateParams,
),
src/openai/resources/images.py
@@ -168,7 +168,10 @@ class Images(SyncAPIResource):
If `transparent`, the output format needs to support transparency, so it should
be set to either `png` (default value) or `webp`.
- input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+ input_fidelity: Control how much effort the model will exert to match the style and features,
+ especially facial features, of input images. This parameter is only supported
+ for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+ `low`. Defaults to `low`.
mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
indicate where `image` should be edited. If there are multiple images provided,
@@ -282,7 +285,10 @@ class Images(SyncAPIResource):
If `transparent`, the output format needs to support transparency, so it should
be set to either `png` (default value) or `webp`.
- input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+ input_fidelity: Control how much effort the model will exert to match the style and features,
+ especially facial features, of input images. This parameter is only supported
+ for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+ `low`. Defaults to `low`.
mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
indicate where `image` should be edited. If there are multiple images provided,
@@ -392,7 +398,10 @@ class Images(SyncAPIResource):
If `transparent`, the output format needs to support transparency, so it should
be set to either `png` (default value) or `webp`.
- input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+ input_fidelity: Control how much effort the model will exert to match the style and features,
+ especially facial features, of input images. This parameter is only supported
+ for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+ `low`. Defaults to `low`.
mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
indicate where `image` should be edited. If there are multiple images provided,
@@ -1046,7 +1055,10 @@ class AsyncImages(AsyncAPIResource):
If `transparent`, the output format needs to support transparency, so it should
be set to either `png` (default value) or `webp`.
- input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+ input_fidelity: Control how much effort the model will exert to match the style and features,
+ especially facial features, of input images. This parameter is only supported
+ for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+ `low`. Defaults to `low`.
mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
indicate where `image` should be edited. If there are multiple images provided,
@@ -1160,7 +1172,10 @@ class AsyncImages(AsyncAPIResource):
If `transparent`, the output format needs to support transparency, so it should
be set to either `png` (default value) or `webp`.
- input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+ input_fidelity: Control how much effort the model will exert to match the style and features,
+ especially facial features, of input images. This parameter is only supported
+ for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+ `low`. Defaults to `low`.
mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
indicate where `image` should be edited. If there are multiple images provided,
@@ -1270,7 +1285,10 @@ class AsyncImages(AsyncAPIResource):
If `transparent`, the output format needs to support transparency, so it should
be set to either `png` (default value) or `webp`.
- input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+ input_fidelity: Control how much effort the model will exert to match the style and features,
+ especially facial features, of input images. This parameter is only supported
+ for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+ `low`. Defaults to `low`.
mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
indicate where `image` should be edited. If there are multiple images provided,
src/openai/types/realtime/call_accept_params.py
@@ -106,6 +106,17 @@ class CallAcceptParams(TypedDict, total=False):
truncation: RealtimeTruncationParam
"""
- Controls how the realtime conversation is truncated prior to model inference.
- The default is `auto`.
+ When the number of tokens in a conversation exceeds the model's input token
+ limit, the conversation be truncated, meaning messages (starting from the
+ oldest) will not be included in the model's context. A 32k context model with
+ 4,096 max output tokens can only include 28,224 tokens in the context before
+ truncation occurs. Clients can configure truncation behavior to truncate with a
+ lower max token limit, which is an effective way to control token usage and
+ cost. Truncation will reduce the number of cached tokens on the next turn
+ (busting the cache), since messages are dropped from the beginning of the
+ context. However, clients can also configure truncation to retain messages up to
+ a fraction of the maximum context size, which will reduce the need for future
+ truncations and thus improve the cache rate. Truncation can be disabled
+ entirely, which means the server will never truncate but would instead return an
+ error if the conversation exceeds the model's input token limit.
"""
src/openai/types/realtime/realtime_session_create_request.py
@@ -106,6 +106,17 @@ class RealtimeSessionCreateRequest(BaseModel):
truncation: Optional[RealtimeTruncation] = None
"""
- Controls how the realtime conversation is truncated prior to model inference.
- The default is `auto`.
+ When the number of tokens in a conversation exceeds the model's input token
+ limit, the conversation be truncated, meaning messages (starting from the
+ oldest) will not be included in the model's context. A 32k context model with
+ 4,096 max output tokens can only include 28,224 tokens in the context before
+ truncation occurs. Clients can configure truncation behavior to truncate with a
+ lower max token limit, which is an effective way to control token usage and
+ cost. Truncation will reduce the number of cached tokens on the next turn
+ (busting the cache), since messages are dropped from the beginning of the
+ context. However, clients can also configure truncation to retain messages up to
+ a fraction of the maximum context size, which will reduce the need for future
+ truncations and thus improve the cache rate. Truncation can be disabled
+ entirely, which means the server will never truncate but would instead return an
+ error if the conversation exceeds the model's input token limit.
"""
src/openai/types/realtime/realtime_session_create_request_param.py
@@ -106,6 +106,17 @@ class RealtimeSessionCreateRequestParam(TypedDict, total=False):
truncation: RealtimeTruncationParam
"""
- Controls how the realtime conversation is truncated prior to model inference.
- The default is `auto`.
+ When the number of tokens in a conversation exceeds the model's input token
+ limit, the conversation be truncated, meaning messages (starting from the
+ oldest) will not be included in the model's context. A 32k context model with
+ 4,096 max output tokens can only include 28,224 tokens in the context before
+ truncation occurs. Clients can configure truncation behavior to truncate with a
+ lower max token limit, which is an effective way to control token usage and
+ cost. Truncation will reduce the number of cached tokens on the next turn
+ (busting the cache), since messages are dropped from the beginning of the
+ context. However, clients can also configure truncation to retain messages up to
+ a fraction of the maximum context size, which will reduce the need for future
+ truncations and thus improve the cache rate. Truncation can be disabled
+ entirely, which means the server will never truncate but would instead return an
+ error if the conversation exceeds the model's input token limit.
"""
src/openai/types/realtime/realtime_session_create_response.py
@@ -459,6 +459,17 @@ class RealtimeSessionCreateResponse(BaseModel):
truncation: Optional[RealtimeTruncation] = None
"""
- Controls how the realtime conversation is truncated prior to model inference.
- The default is `auto`.
+ When the number of tokens in a conversation exceeds the model's input token
+ limit, the conversation be truncated, meaning messages (starting from the
+ oldest) will not be included in the model's context. A 32k context model with
+ 4,096 max output tokens can only include 28,224 tokens in the context before
+ truncation occurs. Clients can configure truncation behavior to truncate with a
+ lower max token limit, which is an effective way to control token usage and
+ cost. Truncation will reduce the number of cached tokens on the next turn
+ (busting the cache), since messages are dropped from the beginning of the
+ context. However, clients can also configure truncation to retain messages up to
+ a fraction of the maximum context size, which will reduce the need for future
+ truncations and thus improve the cache rate. Truncation can be disabled
+ entirely, which means the server will never truncate but would instead return an
+ error if the conversation exceeds the model's input token limit.
"""
src/openai/types/realtime/realtime_truncation_retention_ratio.py
@@ -1,18 +1,38 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from typing import Optional
from typing_extensions import Literal
from ..._models import BaseModel
-__all__ = ["RealtimeTruncationRetentionRatio"]
+__all__ = ["RealtimeTruncationRetentionRatio", "TokenLimits"]
+
+
+class TokenLimits(BaseModel):
+ post_instructions: Optional[int] = None
+ """
+ Maximum tokens allowed in the conversation after instructions (which including
+ tool definitions). For example, setting this to 5,000 would mean that truncation
+ would occur when the conversation exceeds 5,000 tokens after instructions. This
+ cannot be higher than the model's context window size minus the maximum output
+ tokens.
+ """
class RealtimeTruncationRetentionRatio(BaseModel):
retention_ratio: float
"""
- Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
- conversation exceeds the input token limit.
+ Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
+ the conversation exceeds the input token limit. Setting this to `0.8` means that
+ messages will be dropped until 80% of the maximum allowed tokens are used. This
+ helps reduce the frequency of truncations and improve cache rates.
"""
type: Literal["retention_ratio"]
"""Use retention ratio truncation."""
+
+ token_limits: Optional[TokenLimits] = None
+ """Optional custom token limits for this truncation strategy.
+
+ If not provided, the model's default token limits will be used.
+ """
src/openai/types/realtime/realtime_truncation_retention_ratio_param.py
@@ -4,15 +4,34 @@ from __future__ import annotations
from typing_extensions import Literal, Required, TypedDict
-__all__ = ["RealtimeTruncationRetentionRatioParam"]
+__all__ = ["RealtimeTruncationRetentionRatioParam", "TokenLimits"]
+
+
+class TokenLimits(TypedDict, total=False):
+ post_instructions: int
+ """
+ Maximum tokens allowed in the conversation after instructions (which including
+ tool definitions). For example, setting this to 5,000 would mean that truncation
+ would occur when the conversation exceeds 5,000 tokens after instructions. This
+ cannot be higher than the model's context window size minus the maximum output
+ tokens.
+ """
class RealtimeTruncationRetentionRatioParam(TypedDict, total=False):
retention_ratio: Required[float]
"""
- Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
- conversation exceeds the input token limit.
+ Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
+ the conversation exceeds the input token limit. Setting this to `0.8` means that
+ messages will be dropped until 80% of the maximum allowed tokens are used. This
+ helps reduce the frequency of truncations and improve cache rates.
"""
type: Required[Literal["retention_ratio"]]
"""Use retention ratio truncation."""
+
+ token_limits: TokenLimits
+ """Optional custom token limits for this truncation strategy.
+
+ If not provided, the model's default token limits will be used.
+ """
src/openai/types/responses/file_search_tool.py
@@ -7,12 +7,26 @@ from ..._models import BaseModel
from ..shared.compound_filter import CompoundFilter
from ..shared.comparison_filter import ComparisonFilter
-__all__ = ["FileSearchTool", "Filters", "RankingOptions"]
+__all__ = ["FileSearchTool", "Filters", "RankingOptions", "RankingOptionsHybridSearch"]
Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter, None]
+class RankingOptionsHybridSearch(BaseModel):
+ embedding_weight: float
+ """The weight of the embedding in the reciprocal ranking fusion."""
+
+ text_weight: float
+ """The weight of the text in the reciprocal ranking fusion."""
+
+
class RankingOptions(BaseModel):
+ hybrid_search: Optional[RankingOptionsHybridSearch] = None
+ """
+ Weights that control how reciprocal rank fusion balances semantic embedding
+ matches versus sparse keyword matches when hybrid search is enabled.
+ """
+
ranker: Optional[Literal["auto", "default-2024-11-15"]] = None
"""The ranker to use for the file search."""
src/openai/types/responses/file_search_tool_param.py
@@ -9,12 +9,26 @@ from ..._types import SequenceNotStr
from ..shared_params.compound_filter import CompoundFilter
from ..shared_params.comparison_filter import ComparisonFilter
-__all__ = ["FileSearchToolParam", "Filters", "RankingOptions"]
+__all__ = ["FileSearchToolParam", "Filters", "RankingOptions", "RankingOptionsHybridSearch"]
Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter]
+class RankingOptionsHybridSearch(TypedDict, total=False):
+ embedding_weight: Required[float]
+ """The weight of the embedding in the reciprocal ranking fusion."""
+
+ text_weight: Required[float]
+ """The weight of the text in the reciprocal ranking fusion."""
+
+
class RankingOptions(TypedDict, total=False):
+ hybrid_search: RankingOptionsHybridSearch
+ """
+ Weights that control how reciprocal rank fusion balances semantic embedding
+ matches versus sparse keyword matches when hybrid search is enabled.
+ """
+
ranker: Literal["auto", "default-2024-11-15"]
"""The ranker to use for the file search."""
src/openai/types/responses/response_output_text.py
@@ -1,6 +1,6 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import List, Union, Optional
+from typing import List, Union
from typing_extensions import Literal, Annotated, TypeAlias
from ..._utils import PropertyInfo
@@ -108,10 +108,10 @@ class ResponseOutputText(BaseModel):
annotations: List[Annotation]
"""The annotations of the text output."""
+ logprobs: List[Logprob]
+
text: str
"""The text output from the model."""
type: Literal["output_text"]
"""The type of the output text. Always `output_text`."""
-
- logprobs: Optional[List[Logprob]] = None
src/openai/types/responses/response_output_text_param.py
@@ -106,10 +106,10 @@ class ResponseOutputTextParam(TypedDict, total=False):
annotations: Required[Iterable[Annotation]]
"""The annotations of the text output."""
+ logprobs: Required[Iterable[Logprob]]
+
text: Required[str]
"""The text output from the model."""
type: Required[Literal["output_text"]]
"""The type of the output text. Always `output_text`."""
-
- logprobs: Iterable[Logprob]
src/openai/types/responses/tool.py
@@ -161,6 +161,8 @@ class CodeInterpreterContainerCodeInterpreterToolAuto(BaseModel):
file_ids: Optional[List[str]] = None
"""An optional list of uploaded files to make available to your code."""
+ memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+
CodeInterpreterContainer: TypeAlias = Union[str, CodeInterpreterContainerCodeInterpreterToolAuto]
src/openai/types/responses/tool_param.py
@@ -161,6 +161,8 @@ class CodeInterpreterContainerCodeInterpreterToolAuto(TypedDict, total=False):
file_ids: SequenceNotStr[str]
"""An optional list of uploaded files to make available to your code."""
+ memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]]
+
CodeInterpreterContainer: TypeAlias = Union[str, CodeInterpreterContainerCodeInterpreterToolAuto]
src/openai/types/vector_stores/file_batch_create_params.py
@@ -2,20 +2,54 @@
from __future__ import annotations
-from typing import Dict, Union, Optional
+from typing import Dict, Union, Iterable, Optional
from typing_extensions import Required, TypedDict
from ..._types import SequenceNotStr
from ..file_chunking_strategy_param import FileChunkingStrategyParam
-__all__ = ["FileBatchCreateParams"]
+__all__ = ["FileBatchCreateParams", "File"]
class FileBatchCreateParams(TypedDict, total=False):
- file_ids: Required[SequenceNotStr[str]]
+ attributes: Optional[Dict[str, Union[str, float, bool]]]
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard. Keys are
+ strings with a maximum length of 64 characters. Values are strings with a
+ maximum length of 512 characters, booleans, or numbers.
+ """
+
+ chunking_strategy: FileChunkingStrategyParam
+ """The chunking strategy used to chunk the file(s).
+
+ If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+ non-empty.
+ """
+
+ file_ids: SequenceNotStr[str]
"""
A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
the vector store should use. Useful for tools like `file_search` that can access
+ files. If `attributes` or `chunking_strategy` are provided, they will be applied
+ to all files in the batch. Mutually exclusive with `files`.
+ """
+
+ files: Iterable[File]
+ """
+ A list of objects that each include a `file_id` plus optional `attributes` or
+ `chunking_strategy`. Use this when you need to override metadata for specific
+ files. The global `attributes` or `chunking_strategy` will be ignored and must
+ be specified for each file. Mutually exclusive with `file_ids`.
+ """
+
+
+class File(TypedDict, total=False):
+ file_id: Required[str]
+ """
+ A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+ vector store should use. Useful for tools like `file_search` that can access
files.
"""
src/openai/types/video.py
@@ -37,6 +37,9 @@ class Video(BaseModel):
progress: int
"""Approximate completion percentage for the generation task."""
+ prompt: Optional[str] = None
+ """The prompt that was used to generate the video."""
+
remixed_from_video_id: Optional[str] = None
"""Identifier of the source video if this video is a remix."""
tests/api_resources/vector_stores/test_file_batches.py
@@ -25,7 +25,6 @@ class TestFileBatches:
def test_method_create(self, client: OpenAI) -> None:
file_batch = client.vector_stores.file_batches.create(
vector_store_id="vs_abc123",
- file_ids=["string"],
)
assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
@@ -33,9 +32,16 @@ class TestFileBatches:
def test_method_create_with_all_params(self, client: OpenAI) -> None:
file_batch = client.vector_stores.file_batches.create(
vector_store_id="vs_abc123",
- file_ids=["string"],
attributes={"foo": "string"},
chunking_strategy={"type": "auto"},
+ file_ids=["string"],
+ files=[
+ {
+ "file_id": "file_id",
+ "attributes": {"foo": "string"},
+ "chunking_strategy": {"type": "auto"},
+ }
+ ],
)
assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
@@ -43,7 +49,6 @@ class TestFileBatches:
def test_raw_response_create(self, client: OpenAI) -> None:
response = client.vector_stores.file_batches.with_raw_response.create(
vector_store_id="vs_abc123",
- file_ids=["string"],
)
assert response.is_closed is True
@@ -55,7 +60,6 @@ class TestFileBatches:
def test_streaming_response_create(self, client: OpenAI) -> None:
with client.vector_stores.file_batches.with_streaming_response.create(
vector_store_id="vs_abc123",
- file_ids=["string"],
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -70,7 +74,6 @@ class TestFileBatches:
with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
client.vector_stores.file_batches.with_raw_response.create(
vector_store_id="",
- file_ids=["string"],
)
@parametrize
@@ -240,7 +243,6 @@ class TestAsyncFileBatches:
async def test_method_create(self, async_client: AsyncOpenAI) -> None:
file_batch = await async_client.vector_stores.file_batches.create(
vector_store_id="vs_abc123",
- file_ids=["string"],
)
assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
@@ -248,9 +250,16 @@ class TestAsyncFileBatches:
async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
file_batch = await async_client.vector_stores.file_batches.create(
vector_store_id="vs_abc123",
- file_ids=["string"],
attributes={"foo": "string"},
chunking_strategy={"type": "auto"},
+ file_ids=["string"],
+ files=[
+ {
+ "file_id": "file_id",
+ "attributes": {"foo": "string"},
+ "chunking_strategy": {"type": "auto"},
+ }
+ ],
)
assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
@@ -258,7 +267,6 @@ class TestAsyncFileBatches:
async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
response = await async_client.vector_stores.file_batches.with_raw_response.create(
vector_store_id="vs_abc123",
- file_ids=["string"],
)
assert response.is_closed is True
@@ -270,7 +278,6 @@ class TestAsyncFileBatches:
async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
async with async_client.vector_stores.file_batches.with_streaming_response.create(
vector_store_id="vs_abc123",
- file_ids=["string"],
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -285,7 +292,6 @@ class TestAsyncFileBatches:
with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
await async_client.vector_stores.file_batches.with_raw_response.create(
vector_store_id="",
- file_ids=["string"],
)
@parametrize
.stats.yml
@@ -1,4 +1,4 @@
configured_endpoints: 136
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-f68f718cd45ac3f9336603601bccc38a718af44d0b26601031de3d0a71b7ce2f.yml
-openapi_spec_hash: 1560717860bba4105936647dde8f618d
-config_hash: 50ee3382a63c021a9f821a935950e926
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-3c5d1593d7c6f2b38a7d78d7906041465ee9d6e9022f0651e1da194654488108.yml
+openapi_spec_hash: 0a4d8ad2469823ce24a3fd94f23f1c2b
+config_hash: 032995825500a503a76da119f5354905