main
   1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
   2
   3from __future__ import annotations
   4
   5from typing import Union, Iterable, Optional
   6from typing_extensions import Literal
   7
   8import httpx
   9
  10from ... import _legacy_response
  11from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
  12from ..._utils import maybe_transform, async_maybe_transform
  13from ..._compat import cached_property
  14from ..._resource import SyncAPIResource, AsyncAPIResource
  15from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
  16from ...pagination import SyncCursorPage, AsyncCursorPage
  17from ...types.beta import (
  18    assistant_list_params,
  19    assistant_create_params,
  20    assistant_update_params,
  21)
  22from ..._base_client import AsyncPaginator, make_request_options
  23from ...types.beta.assistant import Assistant
  24from ...types.shared.chat_model import ChatModel
  25from ...types.beta.assistant_deleted import AssistantDeleted
  26from ...types.shared_params.metadata import Metadata
  27from ...types.shared.reasoning_effort import ReasoningEffort
  28from ...types.beta.assistant_tool_param import AssistantToolParam
  29from ...types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
  30
  31__all__ = ["Assistants", "AsyncAssistants"]
  32
  33
  34class Assistants(SyncAPIResource):
  35    @cached_property
  36    def with_raw_response(self) -> AssistantsWithRawResponse:
  37        """
  38        This property can be used as a prefix for any HTTP method call to return
  39        the raw response object instead of the parsed content.
  40
  41        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  42        """
  43        return AssistantsWithRawResponse(self)
  44
  45    @cached_property
  46    def with_streaming_response(self) -> AssistantsWithStreamingResponse:
  47        """
  48        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  49
  50        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  51        """
  52        return AssistantsWithStreamingResponse(self)
  53
  54    def create(
  55        self,
  56        *,
  57        model: Union[str, ChatModel],
  58        description: Optional[str] | Omit = omit,
  59        instructions: Optional[str] | Omit = omit,
  60        metadata: Optional[Metadata] | Omit = omit,
  61        name: Optional[str] | Omit = omit,
  62        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
  63        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  64        temperature: Optional[float] | Omit = omit,
  65        tool_resources: Optional[assistant_create_params.ToolResources] | Omit = omit,
  66        tools: Iterable[AssistantToolParam] | Omit = omit,
  67        top_p: Optional[float] | Omit = omit,
  68        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  69        # The extra values given here take precedence over values defined on the client or passed to this method.
  70        extra_headers: Headers | None = None,
  71        extra_query: Query | None = None,
  72        extra_body: Body | None = None,
  73        timeout: float | httpx.Timeout | None | NotGiven = not_given,
  74    ) -> Assistant:
  75        """
  76        Create an assistant with a model and instructions.
  77
  78        Args:
  79          model: ID of the model to use. You can use the
  80              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
  81              see all of your available models, or see our
  82              [Model overview](https://platform.openai.com/docs/models) for descriptions of
  83              them.
  84
  85          description: The description of the assistant. The maximum length is 512 characters.
  86
  87          instructions: The system instructions that the assistant uses. The maximum length is 256,000
  88              characters.
  89
  90          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  91              for storing additional information about the object in a structured format, and
  92              querying for objects via API or the dashboard.
  93
  94              Keys are strings with a maximum length of 64 characters. Values are strings with
  95              a maximum length of 512 characters.
  96
  97          name: The name of the assistant. The maximum length is 256 characters.
  98
  99          reasoning_effort: Constrains effort on reasoning for
 100              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
 101              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
 102              Reducing reasoning effort can result in faster responses and fewer tokens used
 103              on reasoning in a response.
 104
 105              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
 106                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
 107                calls are supported for all reasoning values in gpt-5.1.
 108              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
 109                support `none`.
 110              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
 111              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 112
 113          response_format: Specifies the format that the model must output. Compatible with
 114              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
 115              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
 116              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 117
 118              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
 119              Outputs which ensures the model will match your supplied JSON schema. Learn more
 120              in the
 121              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 122
 123              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
 124              message the model generates is valid JSON.
 125
 126              **Important:** when using JSON mode, you **must** also instruct the model to
 127              produce JSON yourself via a system or user message. Without this, the model may
 128              generate an unending stream of whitespace until the generation reaches the token
 129              limit, resulting in a long-running and seemingly "stuck" request. Also note that
 130              the message content may be partially cut off if `finish_reason="length"`, which
 131              indicates the generation exceeded `max_tokens` or the conversation exceeded the
 132              max context length.
 133
 134          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 135              make the output more random, while lower values like 0.2 will make it more
 136              focused and deterministic.
 137
 138          tool_resources: A set of resources that are used by the assistant's tools. The resources are
 139              specific to the type of tool. For example, the `code_interpreter` tool requires
 140              a list of file IDs, while the `file_search` tool requires a list of vector store
 141              IDs.
 142
 143          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
 144              assistant. Tools can be of types `code_interpreter`, `file_search`, or
 145              `function`.
 146
 147          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 148              model considers the results of the tokens with top_p probability mass. So 0.1
 149              means only the tokens comprising the top 10% probability mass are considered.
 150
 151              We generally recommend altering this or temperature but not both.
 152
 153          extra_headers: Send extra headers
 154
 155          extra_query: Add additional query parameters to the request
 156
 157          extra_body: Add additional JSON properties to the request
 158
 159          timeout: Override the client-level default timeout for this request, in seconds
 160        """
 161        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 162        return self._post(
 163            "/assistants",
 164            body=maybe_transform(
 165                {
 166                    "model": model,
 167                    "description": description,
 168                    "instructions": instructions,
 169                    "metadata": metadata,
 170                    "name": name,
 171                    "reasoning_effort": reasoning_effort,
 172                    "response_format": response_format,
 173                    "temperature": temperature,
 174                    "tool_resources": tool_resources,
 175                    "tools": tools,
 176                    "top_p": top_p,
 177                },
 178                assistant_create_params.AssistantCreateParams,
 179            ),
 180            options=make_request_options(
 181                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 182            ),
 183            cast_to=Assistant,
 184        )
 185
 186    def retrieve(
 187        self,
 188        assistant_id: str,
 189        *,
 190        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 191        # The extra values given here take precedence over values defined on the client or passed to this method.
 192        extra_headers: Headers | None = None,
 193        extra_query: Query | None = None,
 194        extra_body: Body | None = None,
 195        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 196    ) -> Assistant:
 197        """
 198        Retrieves an assistant.
 199
 200        Args:
 201          extra_headers: Send extra headers
 202
 203          extra_query: Add additional query parameters to the request
 204
 205          extra_body: Add additional JSON properties to the request
 206
 207          timeout: Override the client-level default timeout for this request, in seconds
 208        """
 209        if not assistant_id:
 210            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
 211        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 212        return self._get(
 213            f"/assistants/{assistant_id}",
 214            options=make_request_options(
 215                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 216            ),
 217            cast_to=Assistant,
 218        )
 219
 220    def update(
 221        self,
 222        assistant_id: str,
 223        *,
 224        description: Optional[str] | Omit = omit,
 225        instructions: Optional[str] | Omit = omit,
 226        metadata: Optional[Metadata] | Omit = omit,
 227        model: Union[
 228            str,
 229            Literal[
 230                "gpt-5",
 231                "gpt-5-mini",
 232                "gpt-5-nano",
 233                "gpt-5-2025-08-07",
 234                "gpt-5-mini-2025-08-07",
 235                "gpt-5-nano-2025-08-07",
 236                "gpt-4.1",
 237                "gpt-4.1-mini",
 238                "gpt-4.1-nano",
 239                "gpt-4.1-2025-04-14",
 240                "gpt-4.1-mini-2025-04-14",
 241                "gpt-4.1-nano-2025-04-14",
 242                "o3-mini",
 243                "o3-mini-2025-01-31",
 244                "o1",
 245                "o1-2024-12-17",
 246                "gpt-4o",
 247                "gpt-4o-2024-11-20",
 248                "gpt-4o-2024-08-06",
 249                "gpt-4o-2024-05-13",
 250                "gpt-4o-mini",
 251                "gpt-4o-mini-2024-07-18",
 252                "gpt-4.5-preview",
 253                "gpt-4.5-preview-2025-02-27",
 254                "gpt-4-turbo",
 255                "gpt-4-turbo-2024-04-09",
 256                "gpt-4-0125-preview",
 257                "gpt-4-turbo-preview",
 258                "gpt-4-1106-preview",
 259                "gpt-4-vision-preview",
 260                "gpt-4",
 261                "gpt-4-0314",
 262                "gpt-4-0613",
 263                "gpt-4-32k",
 264                "gpt-4-32k-0314",
 265                "gpt-4-32k-0613",
 266                "gpt-3.5-turbo",
 267                "gpt-3.5-turbo-16k",
 268                "gpt-3.5-turbo-0613",
 269                "gpt-3.5-turbo-1106",
 270                "gpt-3.5-turbo-0125",
 271                "gpt-3.5-turbo-16k-0613",
 272            ],
 273        ]
 274        | Omit = omit,
 275        name: Optional[str] | Omit = omit,
 276        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
 277        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
 278        temperature: Optional[float] | Omit = omit,
 279        tool_resources: Optional[assistant_update_params.ToolResources] | Omit = omit,
 280        tools: Iterable[AssistantToolParam] | Omit = omit,
 281        top_p: Optional[float] | Omit = omit,
 282        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 283        # The extra values given here take precedence over values defined on the client or passed to this method.
 284        extra_headers: Headers | None = None,
 285        extra_query: Query | None = None,
 286        extra_body: Body | None = None,
 287        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 288    ) -> Assistant:
 289        """Modifies an assistant.
 290
 291        Args:
 292          description: The description of the assistant.
 293
 294        The maximum length is 512 characters.
 295
 296          instructions: The system instructions that the assistant uses. The maximum length is 256,000
 297              characters.
 298
 299          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 300              for storing additional information about the object in a structured format, and
 301              querying for objects via API or the dashboard.
 302
 303              Keys are strings with a maximum length of 64 characters. Values are strings with
 304              a maximum length of 512 characters.
 305
 306          model: ID of the model to use. You can use the
 307              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
 308              see all of your available models, or see our
 309              [Model overview](https://platform.openai.com/docs/models) for descriptions of
 310              them.
 311
 312          name: The name of the assistant. The maximum length is 256 characters.
 313
 314          reasoning_effort: Constrains effort on reasoning for
 315              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
 316              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
 317              Reducing reasoning effort can result in faster responses and fewer tokens used
 318              on reasoning in a response.
 319
 320              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
 321                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
 322                calls are supported for all reasoning values in gpt-5.1.
 323              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
 324                support `none`.
 325              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
 326              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 327
 328          response_format: Specifies the format that the model must output. Compatible with
 329              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
 330              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
 331              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 332
 333              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
 334              Outputs which ensures the model will match your supplied JSON schema. Learn more
 335              in the
 336              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 337
 338              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
 339              message the model generates is valid JSON.
 340
 341              **Important:** when using JSON mode, you **must** also instruct the model to
 342              produce JSON yourself via a system or user message. Without this, the model may
 343              generate an unending stream of whitespace until the generation reaches the token
 344              limit, resulting in a long-running and seemingly "stuck" request. Also note that
 345              the message content may be partially cut off if `finish_reason="length"`, which
 346              indicates the generation exceeded `max_tokens` or the conversation exceeded the
 347              max context length.
 348
 349          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 350              make the output more random, while lower values like 0.2 will make it more
 351              focused and deterministic.
 352
 353          tool_resources: A set of resources that are used by the assistant's tools. The resources are
 354              specific to the type of tool. For example, the `code_interpreter` tool requires
 355              a list of file IDs, while the `file_search` tool requires a list of vector store
 356              IDs.
 357
 358          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
 359              assistant. Tools can be of types `code_interpreter`, `file_search`, or
 360              `function`.
 361
 362          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 363              model considers the results of the tokens with top_p probability mass. So 0.1
 364              means only the tokens comprising the top 10% probability mass are considered.
 365
 366              We generally recommend altering this or temperature but not both.
 367
 368          extra_headers: Send extra headers
 369
 370          extra_query: Add additional query parameters to the request
 371
 372          extra_body: Add additional JSON properties to the request
 373
 374          timeout: Override the client-level default timeout for this request, in seconds
 375        """
 376        if not assistant_id:
 377            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
 378        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 379        return self._post(
 380            f"/assistants/{assistant_id}",
 381            body=maybe_transform(
 382                {
 383                    "description": description,
 384                    "instructions": instructions,
 385                    "metadata": metadata,
 386                    "model": model,
 387                    "name": name,
 388                    "reasoning_effort": reasoning_effort,
 389                    "response_format": response_format,
 390                    "temperature": temperature,
 391                    "tool_resources": tool_resources,
 392                    "tools": tools,
 393                    "top_p": top_p,
 394                },
 395                assistant_update_params.AssistantUpdateParams,
 396            ),
 397            options=make_request_options(
 398                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 399            ),
 400            cast_to=Assistant,
 401        )
 402
 403    def list(
 404        self,
 405        *,
 406        after: str | Omit = omit,
 407        before: str | Omit = omit,
 408        limit: int | Omit = omit,
 409        order: Literal["asc", "desc"] | Omit = omit,
 410        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 411        # The extra values given here take precedence over values defined on the client or passed to this method.
 412        extra_headers: Headers | None = None,
 413        extra_query: Query | None = None,
 414        extra_body: Body | None = None,
 415        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 416    ) -> SyncCursorPage[Assistant]:
 417        """Returns a list of assistants.
 418
 419        Args:
 420          after: A cursor for use in pagination.
 421
 422        `after` is an object ID that defines your place
 423              in the list. For instance, if you make a list request and receive 100 objects,
 424              ending with obj_foo, your subsequent call can include after=obj_foo in order to
 425              fetch the next page of the list.
 426
 427          before: A cursor for use in pagination. `before` is an object ID that defines your place
 428              in the list. For instance, if you make a list request and receive 100 objects,
 429              starting with obj_foo, your subsequent call can include before=obj_foo in order
 430              to fetch the previous page of the list.
 431
 432          limit: A limit on the number of objects to be returned. Limit can range between 1 and
 433              100, and the default is 20.
 434
 435          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
 436              order and `desc` for descending order.
 437
 438          extra_headers: Send extra headers
 439
 440          extra_query: Add additional query parameters to the request
 441
 442          extra_body: Add additional JSON properties to the request
 443
 444          timeout: Override the client-level default timeout for this request, in seconds
 445        """
 446        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 447        return self._get_api_list(
 448            "/assistants",
 449            page=SyncCursorPage[Assistant],
 450            options=make_request_options(
 451                extra_headers=extra_headers,
 452                extra_query=extra_query,
 453                extra_body=extra_body,
 454                timeout=timeout,
 455                query=maybe_transform(
 456                    {
 457                        "after": after,
 458                        "before": before,
 459                        "limit": limit,
 460                        "order": order,
 461                    },
 462                    assistant_list_params.AssistantListParams,
 463                ),
 464            ),
 465            model=Assistant,
 466        )
 467
 468    def delete(
 469        self,
 470        assistant_id: str,
 471        *,
 472        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 473        # The extra values given here take precedence over values defined on the client or passed to this method.
 474        extra_headers: Headers | None = None,
 475        extra_query: Query | None = None,
 476        extra_body: Body | None = None,
 477        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 478    ) -> AssistantDeleted:
 479        """
 480        Delete an assistant.
 481
 482        Args:
 483          extra_headers: Send extra headers
 484
 485          extra_query: Add additional query parameters to the request
 486
 487          extra_body: Add additional JSON properties to the request
 488
 489          timeout: Override the client-level default timeout for this request, in seconds
 490        """
 491        if not assistant_id:
 492            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
 493        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 494        return self._delete(
 495            f"/assistants/{assistant_id}",
 496            options=make_request_options(
 497                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 498            ),
 499            cast_to=AssistantDeleted,
 500        )
 501
 502
 503class AsyncAssistants(AsyncAPIResource):
 504    @cached_property
 505    def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
 506        """
 507        This property can be used as a prefix for any HTTP method call to return
 508        the raw response object instead of the parsed content.
 509
 510        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
 511        """
 512        return AsyncAssistantsWithRawResponse(self)
 513
 514    @cached_property
 515    def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
 516        """
 517        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 518
 519        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
 520        """
 521        return AsyncAssistantsWithStreamingResponse(self)
 522
 523    async def create(
 524        self,
 525        *,
 526        model: Union[str, ChatModel],
 527        description: Optional[str] | Omit = omit,
 528        instructions: Optional[str] | Omit = omit,
 529        metadata: Optional[Metadata] | Omit = omit,
 530        name: Optional[str] | Omit = omit,
 531        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
 532        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
 533        temperature: Optional[float] | Omit = omit,
 534        tool_resources: Optional[assistant_create_params.ToolResources] | Omit = omit,
 535        tools: Iterable[AssistantToolParam] | Omit = omit,
 536        top_p: Optional[float] | Omit = omit,
 537        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 538        # The extra values given here take precedence over values defined on the client or passed to this method.
 539        extra_headers: Headers | None = None,
 540        extra_query: Query | None = None,
 541        extra_body: Body | None = None,
 542        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 543    ) -> Assistant:
 544        """
 545        Create an assistant with a model and instructions.
 546
 547        Args:
 548          model: ID of the model to use. You can use the
 549              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
 550              see all of your available models, or see our
 551              [Model overview](https://platform.openai.com/docs/models) for descriptions of
 552              them.
 553
 554          description: The description of the assistant. The maximum length is 512 characters.
 555
 556          instructions: The system instructions that the assistant uses. The maximum length is 256,000
 557              characters.
 558
 559          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 560              for storing additional information about the object in a structured format, and
 561              querying for objects via API or the dashboard.
 562
 563              Keys are strings with a maximum length of 64 characters. Values are strings with
 564              a maximum length of 512 characters.
 565
 566          name: The name of the assistant. The maximum length is 256 characters.
 567
 568          reasoning_effort: Constrains effort on reasoning for
 569              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
 570              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
 571              Reducing reasoning effort can result in faster responses and fewer tokens used
 572              on reasoning in a response.
 573
 574              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
 575                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
 576                calls are supported for all reasoning values in gpt-5.1.
 577              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
 578                support `none`.
 579              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
 580              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 581
 582          response_format: Specifies the format that the model must output. Compatible with
 583              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
 584              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
 585              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 586
 587              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
 588              Outputs which ensures the model will match your supplied JSON schema. Learn more
 589              in the
 590              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 591
 592              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
 593              message the model generates is valid JSON.
 594
 595              **Important:** when using JSON mode, you **must** also instruct the model to
 596              produce JSON yourself via a system or user message. Without this, the model may
 597              generate an unending stream of whitespace until the generation reaches the token
 598              limit, resulting in a long-running and seemingly "stuck" request. Also note that
 599              the message content may be partially cut off if `finish_reason="length"`, which
 600              indicates the generation exceeded `max_tokens` or the conversation exceeded the
 601              max context length.
 602
 603          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 604              make the output more random, while lower values like 0.2 will make it more
 605              focused and deterministic.
 606
 607          tool_resources: A set of resources that are used by the assistant's tools. The resources are
 608              specific to the type of tool. For example, the `code_interpreter` tool requires
 609              a list of file IDs, while the `file_search` tool requires a list of vector store
 610              IDs.
 611
 612          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
 613              assistant. Tools can be of types `code_interpreter`, `file_search`, or
 614              `function`.
 615
 616          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 617              model considers the results of the tokens with top_p probability mass. So 0.1
 618              means only the tokens comprising the top 10% probability mass are considered.
 619
 620              We generally recommend altering this or temperature but not both.
 621
 622          extra_headers: Send extra headers
 623
 624          extra_query: Add additional query parameters to the request
 625
 626          extra_body: Add additional JSON properties to the request
 627
 628          timeout: Override the client-level default timeout for this request, in seconds
 629        """
 630        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 631        return await self._post(
 632            "/assistants",
 633            body=await async_maybe_transform(
 634                {
 635                    "model": model,
 636                    "description": description,
 637                    "instructions": instructions,
 638                    "metadata": metadata,
 639                    "name": name,
 640                    "reasoning_effort": reasoning_effort,
 641                    "response_format": response_format,
 642                    "temperature": temperature,
 643                    "tool_resources": tool_resources,
 644                    "tools": tools,
 645                    "top_p": top_p,
 646                },
 647                assistant_create_params.AssistantCreateParams,
 648            ),
 649            options=make_request_options(
 650                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 651            ),
 652            cast_to=Assistant,
 653        )
 654
 655    async def retrieve(
 656        self,
 657        assistant_id: str,
 658        *,
 659        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 660        # The extra values given here take precedence over values defined on the client or passed to this method.
 661        extra_headers: Headers | None = None,
 662        extra_query: Query | None = None,
 663        extra_body: Body | None = None,
 664        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 665    ) -> Assistant:
 666        """
 667        Retrieves an assistant.
 668
 669        Args:
 670          extra_headers: Send extra headers
 671
 672          extra_query: Add additional query parameters to the request
 673
 674          extra_body: Add additional JSON properties to the request
 675
 676          timeout: Override the client-level default timeout for this request, in seconds
 677        """
 678        if not assistant_id:
 679            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
 680        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 681        return await self._get(
 682            f"/assistants/{assistant_id}",
 683            options=make_request_options(
 684                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 685            ),
 686            cast_to=Assistant,
 687        )
 688
 689    async def update(
 690        self,
 691        assistant_id: str,
 692        *,
 693        description: Optional[str] | Omit = omit,
 694        instructions: Optional[str] | Omit = omit,
 695        metadata: Optional[Metadata] | Omit = omit,
 696        model: Union[
 697            str,
 698            Literal[
 699                "gpt-5",
 700                "gpt-5-mini",
 701                "gpt-5-nano",
 702                "gpt-5-2025-08-07",
 703                "gpt-5-mini-2025-08-07",
 704                "gpt-5-nano-2025-08-07",
 705                "gpt-4.1",
 706                "gpt-4.1-mini",
 707                "gpt-4.1-nano",
 708                "gpt-4.1-2025-04-14",
 709                "gpt-4.1-mini-2025-04-14",
 710                "gpt-4.1-nano-2025-04-14",
 711                "o3-mini",
 712                "o3-mini-2025-01-31",
 713                "o1",
 714                "o1-2024-12-17",
 715                "gpt-4o",
 716                "gpt-4o-2024-11-20",
 717                "gpt-4o-2024-08-06",
 718                "gpt-4o-2024-05-13",
 719                "gpt-4o-mini",
 720                "gpt-4o-mini-2024-07-18",
 721                "gpt-4.5-preview",
 722                "gpt-4.5-preview-2025-02-27",
 723                "gpt-4-turbo",
 724                "gpt-4-turbo-2024-04-09",
 725                "gpt-4-0125-preview",
 726                "gpt-4-turbo-preview",
 727                "gpt-4-1106-preview",
 728                "gpt-4-vision-preview",
 729                "gpt-4",
 730                "gpt-4-0314",
 731                "gpt-4-0613",
 732                "gpt-4-32k",
 733                "gpt-4-32k-0314",
 734                "gpt-4-32k-0613",
 735                "gpt-3.5-turbo",
 736                "gpt-3.5-turbo-16k",
 737                "gpt-3.5-turbo-0613",
 738                "gpt-3.5-turbo-1106",
 739                "gpt-3.5-turbo-0125",
 740                "gpt-3.5-turbo-16k-0613",
 741            ],
 742        ]
 743        | Omit = omit,
 744        name: Optional[str] | Omit = omit,
 745        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
 746        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
 747        temperature: Optional[float] | Omit = omit,
 748        tool_resources: Optional[assistant_update_params.ToolResources] | Omit = omit,
 749        tools: Iterable[AssistantToolParam] | Omit = omit,
 750        top_p: Optional[float] | Omit = omit,
 751        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 752        # The extra values given here take precedence over values defined on the client or passed to this method.
 753        extra_headers: Headers | None = None,
 754        extra_query: Query | None = None,
 755        extra_body: Body | None = None,
 756        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 757    ) -> Assistant:
 758        """Modifies an assistant.
 759
 760        Args:
 761          description: The description of the assistant.
 762
 763        The maximum length is 512 characters.
 764
 765          instructions: The system instructions that the assistant uses. The maximum length is 256,000
 766              characters.
 767
 768          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 769              for storing additional information about the object in a structured format, and
 770              querying for objects via API or the dashboard.
 771
 772              Keys are strings with a maximum length of 64 characters. Values are strings with
 773              a maximum length of 512 characters.
 774
 775          model: ID of the model to use. You can use the
 776              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
 777              see all of your available models, or see our
 778              [Model overview](https://platform.openai.com/docs/models) for descriptions of
 779              them.
 780
 781          name: The name of the assistant. The maximum length is 256 characters.
 782
 783          reasoning_effort: Constrains effort on reasoning for
 784              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
 785              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
 786              Reducing reasoning effort can result in faster responses and fewer tokens used
 787              on reasoning in a response.
 788
 789              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
 790                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
 791                calls are supported for all reasoning values in gpt-5.1.
 792              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
 793                support `none`.
 794              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
 795              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 796
 797          response_format: Specifies the format that the model must output. Compatible with
 798              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
 799              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
 800              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 801
 802              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
 803              Outputs which ensures the model will match your supplied JSON schema. Learn more
 804              in the
 805              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 806
 807              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
 808              message the model generates is valid JSON.
 809
 810              **Important:** when using JSON mode, you **must** also instruct the model to
 811              produce JSON yourself via a system or user message. Without this, the model may
 812              generate an unending stream of whitespace until the generation reaches the token
 813              limit, resulting in a long-running and seemingly "stuck" request. Also note that
 814              the message content may be partially cut off if `finish_reason="length"`, which
 815              indicates the generation exceeded `max_tokens` or the conversation exceeded the
 816              max context length.
 817
 818          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 819              make the output more random, while lower values like 0.2 will make it more
 820              focused and deterministic.
 821
 822          tool_resources: A set of resources that are used by the assistant's tools. The resources are
 823              specific to the type of tool. For example, the `code_interpreter` tool requires
 824              a list of file IDs, while the `file_search` tool requires a list of vector store
 825              IDs.
 826
 827          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
 828              assistant. Tools can be of types `code_interpreter`, `file_search`, or
 829              `function`.
 830
 831          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 832              model considers the results of the tokens with top_p probability mass. So 0.1
 833              means only the tokens comprising the top 10% probability mass are considered.
 834
 835              We generally recommend altering this or temperature but not both.
 836
 837          extra_headers: Send extra headers
 838
 839          extra_query: Add additional query parameters to the request
 840
 841          extra_body: Add additional JSON properties to the request
 842
 843          timeout: Override the client-level default timeout for this request, in seconds
 844        """
 845        if not assistant_id:
 846            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
 847        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 848        return await self._post(
 849            f"/assistants/{assistant_id}",
 850            body=await async_maybe_transform(
 851                {
 852                    "description": description,
 853                    "instructions": instructions,
 854                    "metadata": metadata,
 855                    "model": model,
 856                    "name": name,
 857                    "reasoning_effort": reasoning_effort,
 858                    "response_format": response_format,
 859                    "temperature": temperature,
 860                    "tool_resources": tool_resources,
 861                    "tools": tools,
 862                    "top_p": top_p,
 863                },
 864                assistant_update_params.AssistantUpdateParams,
 865            ),
 866            options=make_request_options(
 867                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 868            ),
 869            cast_to=Assistant,
 870        )
 871
 872    def list(
 873        self,
 874        *,
 875        after: str | Omit = omit,
 876        before: str | Omit = omit,
 877        limit: int | Omit = omit,
 878        order: Literal["asc", "desc"] | Omit = omit,
 879        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 880        # The extra values given here take precedence over values defined on the client or passed to this method.
 881        extra_headers: Headers | None = None,
 882        extra_query: Query | None = None,
 883        extra_body: Body | None = None,
 884        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 885    ) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]:
 886        """Returns a list of assistants.
 887
 888        Args:
 889          after: A cursor for use in pagination.
 890
 891        `after` is an object ID that defines your place
 892              in the list. For instance, if you make a list request and receive 100 objects,
 893              ending with obj_foo, your subsequent call can include after=obj_foo in order to
 894              fetch the next page of the list.
 895
 896          before: A cursor for use in pagination. `before` is an object ID that defines your place
 897              in the list. For instance, if you make a list request and receive 100 objects,
 898              starting with obj_foo, your subsequent call can include before=obj_foo in order
 899              to fetch the previous page of the list.
 900
 901          limit: A limit on the number of objects to be returned. Limit can range between 1 and
 902              100, and the default is 20.
 903
 904          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
 905              order and `desc` for descending order.
 906
 907          extra_headers: Send extra headers
 908
 909          extra_query: Add additional query parameters to the request
 910
 911          extra_body: Add additional JSON properties to the request
 912
 913          timeout: Override the client-level default timeout for this request, in seconds
 914        """
 915        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 916        return self._get_api_list(
 917            "/assistants",
 918            page=AsyncCursorPage[Assistant],
 919            options=make_request_options(
 920                extra_headers=extra_headers,
 921                extra_query=extra_query,
 922                extra_body=extra_body,
 923                timeout=timeout,
 924                query=maybe_transform(
 925                    {
 926                        "after": after,
 927                        "before": before,
 928                        "limit": limit,
 929                        "order": order,
 930                    },
 931                    assistant_list_params.AssistantListParams,
 932                ),
 933            ),
 934            model=Assistant,
 935        )
 936
 937    async def delete(
 938        self,
 939        assistant_id: str,
 940        *,
 941        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 942        # The extra values given here take precedence over values defined on the client or passed to this method.
 943        extra_headers: Headers | None = None,
 944        extra_query: Query | None = None,
 945        extra_body: Body | None = None,
 946        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 947    ) -> AssistantDeleted:
 948        """
 949        Delete an assistant.
 950
 951        Args:
 952          extra_headers: Send extra headers
 953
 954          extra_query: Add additional query parameters to the request
 955
 956          extra_body: Add additional JSON properties to the request
 957
 958          timeout: Override the client-level default timeout for this request, in seconds
 959        """
 960        if not assistant_id:
 961            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
 962        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 963        return await self._delete(
 964            f"/assistants/{assistant_id}",
 965            options=make_request_options(
 966                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 967            ),
 968            cast_to=AssistantDeleted,
 969        )
 970
 971
 972class AssistantsWithRawResponse:
 973    def __init__(self, assistants: Assistants) -> None:
 974        self._assistants = assistants
 975
 976        self.create = _legacy_response.to_raw_response_wrapper(
 977            assistants.create,
 978        )
 979        self.retrieve = _legacy_response.to_raw_response_wrapper(
 980            assistants.retrieve,
 981        )
 982        self.update = _legacy_response.to_raw_response_wrapper(
 983            assistants.update,
 984        )
 985        self.list = _legacy_response.to_raw_response_wrapper(
 986            assistants.list,
 987        )
 988        self.delete = _legacy_response.to_raw_response_wrapper(
 989            assistants.delete,
 990        )
 991
 992
 993class AsyncAssistantsWithRawResponse:
 994    def __init__(self, assistants: AsyncAssistants) -> None:
 995        self._assistants = assistants
 996
 997        self.create = _legacy_response.async_to_raw_response_wrapper(
 998            assistants.create,
 999        )
1000        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
1001            assistants.retrieve,
1002        )
1003        self.update = _legacy_response.async_to_raw_response_wrapper(
1004            assistants.update,
1005        )
1006        self.list = _legacy_response.async_to_raw_response_wrapper(
1007            assistants.list,
1008        )
1009        self.delete = _legacy_response.async_to_raw_response_wrapper(
1010            assistants.delete,
1011        )
1012
1013
1014class AssistantsWithStreamingResponse:
1015    def __init__(self, assistants: Assistants) -> None:
1016        self._assistants = assistants
1017
1018        self.create = to_streamed_response_wrapper(
1019            assistants.create,
1020        )
1021        self.retrieve = to_streamed_response_wrapper(
1022            assistants.retrieve,
1023        )
1024        self.update = to_streamed_response_wrapper(
1025            assistants.update,
1026        )
1027        self.list = to_streamed_response_wrapper(
1028            assistants.list,
1029        )
1030        self.delete = to_streamed_response_wrapper(
1031            assistants.delete,
1032        )
1033
1034
1035class AsyncAssistantsWithStreamingResponse:
1036    def __init__(self, assistants: AsyncAssistants) -> None:
1037        self._assistants = assistants
1038
1039        self.create = async_to_streamed_response_wrapper(
1040            assistants.create,
1041        )
1042        self.retrieve = async_to_streamed_response_wrapper(
1043            assistants.retrieve,
1044        )
1045        self.update = async_to_streamed_response_wrapper(
1046            assistants.update,
1047        )
1048        self.list = async_to_streamed_response_wrapper(
1049            assistants.list,
1050        )
1051        self.delete = async_to_streamed_response_wrapper(
1052            assistants.delete,
1053        )