openai-python/src/openai/resources/beta/threads/threads.py at main

   1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
   2
   3from __future__ import annotations
   4
   5import typing_extensions
   6from typing import Union, Iterable, Optional
   7from functools import partial
   8from typing_extensions import Literal, overload
   9
  10import httpx
  11
  12from .... import _legacy_response
  13from .messages import (
  14    Messages,
  15    AsyncMessages,
  16    MessagesWithRawResponse,
  17    AsyncMessagesWithRawResponse,
  18    MessagesWithStreamingResponse,
  19    AsyncMessagesWithStreamingResponse,
  20)
  21from ...._types import NOT_GIVEN, Body, Omit, Query, Headers, NotGiven, omit, not_given
  22from ...._utils import required_args, maybe_transform, async_maybe_transform
  23from .runs.runs import (
  24    Runs,
  25    AsyncRuns,
  26    RunsWithRawResponse,
  27    AsyncRunsWithRawResponse,
  28    RunsWithStreamingResponse,
  29    AsyncRunsWithStreamingResponse,
  30)
  31from ...._compat import cached_property
  32from ...._resource import SyncAPIResource, AsyncAPIResource
  33from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
  34from ...._streaming import Stream, AsyncStream
  35from ....types.beta import (
  36    thread_create_params,
  37    thread_update_params,
  38    thread_create_and_run_params,
  39)
  40from ...._base_client import make_request_options
  41from ....lib.streaming import (
  42    AssistantEventHandler,
  43    AssistantEventHandlerT,
  44    AssistantStreamManager,
  45    AsyncAssistantEventHandler,
  46    AsyncAssistantEventHandlerT,
  47    AsyncAssistantStreamManager,
  48)
  49from ....types.beta.thread import Thread
  50from ....types.beta.threads.run import Run
  51from ....types.shared.chat_model import ChatModel
  52from ....types.beta.thread_deleted import ThreadDeleted
  53from ....types.shared_params.metadata import Metadata
  54from ....types.beta.assistant_tool_param import AssistantToolParam
  55from ....types.beta.assistant_stream_event import AssistantStreamEvent
  56from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
  57from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
  58
  59__all__ = ["Threads", "AsyncThreads"]
  60
  61
  62class Threads(SyncAPIResource):
  63    @cached_property
  64    def runs(self) -> Runs:
  65        return Runs(self._client)
  66
  67    @cached_property
  68    def messages(self) -> Messages:
  69        return Messages(self._client)
  70
  71    @cached_property
  72    def with_raw_response(self) -> ThreadsWithRawResponse:
  73        """
  74        This property can be used as a prefix for any HTTP method call to return
  75        the raw response object instead of the parsed content.
  76
  77        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  78        """
  79        return ThreadsWithRawResponse(self)
  80
  81    @cached_property
  82    def with_streaming_response(self) -> ThreadsWithStreamingResponse:
  83        """
  84        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  85
  86        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  87        """
  88        return ThreadsWithStreamingResponse(self)
  89
  90    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  91    def create(
  92        self,
  93        *,
  94        messages: Iterable[thread_create_params.Message] | Omit = omit,
  95        metadata: Optional[Metadata] | Omit = omit,
  96        tool_resources: Optional[thread_create_params.ToolResources] | Omit = omit,
  97        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  98        # The extra values given here take precedence over values defined on the client or passed to this method.
  99        extra_headers: Headers | None = None,
 100        extra_query: Query | None = None,
 101        extra_body: Body | None = None,
 102        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 103    ) -> Thread:
 104        """
 105        Create a thread.
 106
 107        Args:
 108          messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
 109              start the thread with.
 110
 111          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 112              for storing additional information about the object in a structured format, and
 113              querying for objects via API or the dashboard.
 114
 115              Keys are strings with a maximum length of 64 characters. Values are strings with
 116              a maximum length of 512 characters.
 117
 118          tool_resources: A set of resources that are made available to the assistant's tools in this
 119              thread. The resources are specific to the type of tool. For example, the
 120              `code_interpreter` tool requires a list of file IDs, while the `file_search`
 121              tool requires a list of vector store IDs.
 122
 123          extra_headers: Send extra headers
 124
 125          extra_query: Add additional query parameters to the request
 126
 127          extra_body: Add additional JSON properties to the request
 128
 129          timeout: Override the client-level default timeout for this request, in seconds
 130        """
 131        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 132        return self._post(
 133            "/threads",
 134            body=maybe_transform(
 135                {
 136                    "messages": messages,
 137                    "metadata": metadata,
 138                    "tool_resources": tool_resources,
 139                },
 140                thread_create_params.ThreadCreateParams,
 141            ),
 142            options=make_request_options(
 143                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 144            ),
 145            cast_to=Thread,
 146        )
 147
 148    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
 149    def retrieve(
 150        self,
 151        thread_id: str,
 152        *,
 153        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 154        # The extra values given here take precedence over values defined on the client or passed to this method.
 155        extra_headers: Headers | None = None,
 156        extra_query: Query | None = None,
 157        extra_body: Body | None = None,
 158        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 159    ) -> Thread:
 160        """
 161        Retrieves a thread.
 162
 163        Args:
 164          extra_headers: Send extra headers
 165
 166          extra_query: Add additional query parameters to the request
 167
 168          extra_body: Add additional JSON properties to the request
 169
 170          timeout: Override the client-level default timeout for this request, in seconds
 171        """
 172        if not thread_id:
 173            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
 174        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 175        return self._get(
 176            f"/threads/{thread_id}",
 177            options=make_request_options(
 178                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 179            ),
 180            cast_to=Thread,
 181        )
 182
 183    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
 184    def update(
 185        self,
 186        thread_id: str,
 187        *,
 188        metadata: Optional[Metadata] | Omit = omit,
 189        tool_resources: Optional[thread_update_params.ToolResources] | Omit = omit,
 190        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 191        # The extra values given here take precedence over values defined on the client or passed to this method.
 192        extra_headers: Headers | None = None,
 193        extra_query: Query | None = None,
 194        extra_body: Body | None = None,
 195        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 196    ) -> Thread:
 197        """
 198        Modifies a thread.
 199
 200        Args:
 201          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 202              for storing additional information about the object in a structured format, and
 203              querying for objects via API or the dashboard.
 204
 205              Keys are strings with a maximum length of 64 characters. Values are strings with
 206              a maximum length of 512 characters.
 207
 208          tool_resources: A set of resources that are made available to the assistant's tools in this
 209              thread. The resources are specific to the type of tool. For example, the
 210              `code_interpreter` tool requires a list of file IDs, while the `file_search`
 211              tool requires a list of vector store IDs.
 212
 213          extra_headers: Send extra headers
 214
 215          extra_query: Add additional query parameters to the request
 216
 217          extra_body: Add additional JSON properties to the request
 218
 219          timeout: Override the client-level default timeout for this request, in seconds
 220        """
 221        if not thread_id:
 222            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
 223        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 224        return self._post(
 225            f"/threads/{thread_id}",
 226            body=maybe_transform(
 227                {
 228                    "metadata": metadata,
 229                    "tool_resources": tool_resources,
 230                },
 231                thread_update_params.ThreadUpdateParams,
 232            ),
 233            options=make_request_options(
 234                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 235            ),
 236            cast_to=Thread,
 237        )
 238
 239    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
 240    def delete(
 241        self,
 242        thread_id: str,
 243        *,
 244        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 245        # The extra values given here take precedence over values defined on the client or passed to this method.
 246        extra_headers: Headers | None = None,
 247        extra_query: Query | None = None,
 248        extra_body: Body | None = None,
 249        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 250    ) -> ThreadDeleted:
 251        """
 252        Delete a thread.
 253
 254        Args:
 255          extra_headers: Send extra headers
 256
 257          extra_query: Add additional query parameters to the request
 258
 259          extra_body: Add additional JSON properties to the request
 260
 261          timeout: Override the client-level default timeout for this request, in seconds
 262        """
 263        if not thread_id:
 264            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
 265        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 266        return self._delete(
 267            f"/threads/{thread_id}",
 268            options=make_request_options(
 269                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 270            ),
 271            cast_to=ThreadDeleted,
 272        )
 273
 274    @overload
 275    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
 276    def create_and_run(
 277        self,
 278        *,
 279        assistant_id: str,
 280        instructions: Optional[str] | Omit = omit,
 281        max_completion_tokens: Optional[int] | Omit = omit,
 282        max_prompt_tokens: Optional[int] | Omit = omit,
 283        metadata: Optional[Metadata] | Omit = omit,
 284        model: Union[str, ChatModel, None] | Omit = omit,
 285        parallel_tool_calls: bool | Omit = omit,
 286        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
 287        stream: Optional[Literal[False]] | Omit = omit,
 288        temperature: Optional[float] | Omit = omit,
 289        thread: thread_create_and_run_params.Thread | Omit = omit,
 290        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
 291        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
 292        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
 293        top_p: Optional[float] | Omit = omit,
 294        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
 295        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 296        # The extra values given here take precedence over values defined on the client or passed to this method.
 297        extra_headers: Headers | None = None,
 298        extra_query: Query | None = None,
 299        extra_body: Body | None = None,
 300        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 301    ) -> Run:
 302        """
 303        Create a thread and run it in one request.
 304
 305        Args:
 306          assistant_id: The ID of the
 307              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
 308              execute this run.
 309
 310          instructions: Override the default system message of the assistant. This is useful for
 311              modifying the behavior on a per-run basis.
 312
 313          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
 314              run. The run will make a best effort to use only the number of completion tokens
 315              specified, across multiple turns of the run. If the run exceeds the number of
 316              completion tokens specified, the run will end with status `incomplete`. See
 317              `incomplete_details` for more info.
 318
 319          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
 320              The run will make a best effort to use only the number of prompt tokens
 321              specified, across multiple turns of the run. If the run exceeds the number of
 322              prompt tokens specified, the run will end with status `incomplete`. See
 323              `incomplete_details` for more info.
 324
 325          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 326              for storing additional information about the object in a structured format, and
 327              querying for objects via API or the dashboard.
 328
 329              Keys are strings with a maximum length of 64 characters. Values are strings with
 330              a maximum length of 512 characters.
 331
 332          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
 333              be used to execute this run. If a value is provided here, it will override the
 334              model associated with the assistant. If not, the model associated with the
 335              assistant will be used.
 336
 337          parallel_tool_calls: Whether to enable
 338              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
 339              during tool use.
 340
 341          response_format: Specifies the format that the model must output. Compatible with
 342              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
 343              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
 344              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 345
 346              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
 347              Outputs which ensures the model will match your supplied JSON schema. Learn more
 348              in the
 349              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 350
 351              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
 352              message the model generates is valid JSON.
 353
 354              **Important:** when using JSON mode, you **must** also instruct the model to
 355              produce JSON yourself via a system or user message. Without this, the model may
 356              generate an unending stream of whitespace until the generation reaches the token
 357              limit, resulting in a long-running and seemingly "stuck" request. Also note that
 358              the message content may be partially cut off if `finish_reason="length"`, which
 359              indicates the generation exceeded `max_tokens` or the conversation exceeded the
 360              max context length.
 361
 362          stream: If `true`, returns a stream of events that happen during the Run as server-sent
 363              events, terminating when the Run enters a terminal state with a `data: [DONE]`
 364              message.
 365
 366          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 367              make the output more random, while lower values like 0.2 will make it more
 368              focused and deterministic.
 369
 370          thread: Options to create a new thread. If no thread is provided when running a request,
 371              an empty thread will be created.
 372
 373          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
 374              not call any tools and instead generates a message. `auto` is the default value
 375              and means the model can pick between generating a message or calling one or more
 376              tools. `required` means the model must call one or more tools before responding
 377              to the user. Specifying a particular tool like `{"type": "file_search"}` or
 378              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
 379              call that tool.
 380
 381          tool_resources: A set of resources that are used by the assistant's tools. The resources are
 382              specific to the type of tool. For example, the `code_interpreter` tool requires
 383              a list of file IDs, while the `file_search` tool requires a list of vector store
 384              IDs.
 385
 386          tools: Override the tools the assistant can use for this run. This is useful for
 387              modifying the behavior on a per-run basis.
 388
 389          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 390              model considers the results of the tokens with top_p probability mass. So 0.1
 391              means only the tokens comprising the top 10% probability mass are considered.
 392
 393              We generally recommend altering this or temperature but not both.
 394
 395          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
 396              control the initial context window of the run.
 397
 398          extra_headers: Send extra headers
 399
 400          extra_query: Add additional query parameters to the request
 401
 402          extra_body: Add additional JSON properties to the request
 403
 404          timeout: Override the client-level default timeout for this request, in seconds
 405        """
 406        ...
 407
 408    @overload
 409    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
 410    def create_and_run(
 411        self,
 412        *,
 413        assistant_id: str,
 414        stream: Literal[True],
 415        instructions: Optional[str] | Omit = omit,
 416        max_completion_tokens: Optional[int] | Omit = omit,
 417        max_prompt_tokens: Optional[int] | Omit = omit,
 418        metadata: Optional[Metadata] | Omit = omit,
 419        model: Union[str, ChatModel, None] | Omit = omit,
 420        parallel_tool_calls: bool | Omit = omit,
 421        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
 422        temperature: Optional[float] | Omit = omit,
 423        thread: thread_create_and_run_params.Thread | Omit = omit,
 424        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
 425        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
 426        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
 427        top_p: Optional[float] | Omit = omit,
 428        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
 429        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 430        # The extra values given here take precedence over values defined on the client or passed to this method.
 431        extra_headers: Headers | None = None,
 432        extra_query: Query | None = None,
 433        extra_body: Body | None = None,
 434        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 435    ) -> Stream[AssistantStreamEvent]:
 436        """
 437        Create a thread and run it in one request.
 438
 439        Args:
 440          assistant_id: The ID of the
 441              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
 442              execute this run.
 443
 444          stream: If `true`, returns a stream of events that happen during the Run as server-sent
 445              events, terminating when the Run enters a terminal state with a `data: [DONE]`
 446              message.
 447
 448          instructions: Override the default system message of the assistant. This is useful for
 449              modifying the behavior on a per-run basis.
 450
 451          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
 452              run. The run will make a best effort to use only the number of completion tokens
 453              specified, across multiple turns of the run. If the run exceeds the number of
 454              completion tokens specified, the run will end with status `incomplete`. See
 455              `incomplete_details` for more info.
 456
 457          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
 458              The run will make a best effort to use only the number of prompt tokens
 459              specified, across multiple turns of the run. If the run exceeds the number of
 460              prompt tokens specified, the run will end with status `incomplete`. See
 461              `incomplete_details` for more info.
 462
 463          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 464              for storing additional information about the object in a structured format, and
 465              querying for objects via API or the dashboard.
 466
 467              Keys are strings with a maximum length of 64 characters. Values are strings with
 468              a maximum length of 512 characters.
 469
 470          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
 471              be used to execute this run. If a value is provided here, it will override the
 472              model associated with the assistant. If not, the model associated with the
 473              assistant will be used.
 474
 475          parallel_tool_calls: Whether to enable
 476              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
 477              during tool use.
 478
 479          response_format: Specifies the format that the model must output. Compatible with
 480              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
 481              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
 482              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 483
 484              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
 485              Outputs which ensures the model will match your supplied JSON schema. Learn more
 486              in the
 487              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 488
 489              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
 490              message the model generates is valid JSON.
 491
 492              **Important:** when using JSON mode, you **must** also instruct the model to
 493              produce JSON yourself via a system or user message. Without this, the model may
 494              generate an unending stream of whitespace until the generation reaches the token
 495              limit, resulting in a long-running and seemingly "stuck" request. Also note that
 496              the message content may be partially cut off if `finish_reason="length"`, which
 497              indicates the generation exceeded `max_tokens` or the conversation exceeded the
 498              max context length.
 499
 500          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 501              make the output more random, while lower values like 0.2 will make it more
 502              focused and deterministic.
 503
 504          thread: Options to create a new thread. If no thread is provided when running a request,
 505              an empty thread will be created.
 506
 507          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
 508              not call any tools and instead generates a message. `auto` is the default value
 509              and means the model can pick between generating a message or calling one or more
 510              tools. `required` means the model must call one or more tools before responding
 511              to the user. Specifying a particular tool like `{"type": "file_search"}` or
 512              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
 513              call that tool.
 514
 515          tool_resources: A set of resources that are used by the assistant's tools. The resources are
 516              specific to the type of tool. For example, the `code_interpreter` tool requires
 517              a list of file IDs, while the `file_search` tool requires a list of vector store
 518              IDs.
 519
 520          tools: Override the tools the assistant can use for this run. This is useful for
 521              modifying the behavior on a per-run basis.
 522
 523          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 524              model considers the results of the tokens with top_p probability mass. So 0.1
 525              means only the tokens comprising the top 10% probability mass are considered.
 526
 527              We generally recommend altering this or temperature but not both.
 528
 529          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
 530              control the initial context window of the run.
 531
 532          extra_headers: Send extra headers
 533
 534          extra_query: Add additional query parameters to the request
 535
 536          extra_body: Add additional JSON properties to the request
 537
 538          timeout: Override the client-level default timeout for this request, in seconds
 539        """
 540        ...
 541
 542    @overload
 543    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
 544    def create_and_run(
 545        self,
 546        *,
 547        assistant_id: str,
 548        stream: bool,
 549        instructions: Optional[str] | Omit = omit,
 550        max_completion_tokens: Optional[int] | Omit = omit,
 551        max_prompt_tokens: Optional[int] | Omit = omit,
 552        metadata: Optional[Metadata] | Omit = omit,
 553        model: Union[str, ChatModel, None] | Omit = omit,
 554        parallel_tool_calls: bool | Omit = omit,
 555        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
 556        temperature: Optional[float] | Omit = omit,
 557        thread: thread_create_and_run_params.Thread | Omit = omit,
 558        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
 559        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
 560        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
 561        top_p: Optional[float] | Omit = omit,
 562        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
 563        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 564        # The extra values given here take precedence over values defined on the client or passed to this method.
 565        extra_headers: Headers | None = None,
 566        extra_query: Query | None = None,
 567        extra_body: Body | None = None,
 568        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 569    ) -> Run | Stream[AssistantStreamEvent]:
 570        """
 571        Create a thread and run it in one request.
 572
 573        Args:
 574          assistant_id: The ID of the
 575              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
 576              execute this run.
 577
 578          stream: If `true`, returns a stream of events that happen during the Run as server-sent
 579              events, terminating when the Run enters a terminal state with a `data: [DONE]`
 580              message.
 581
 582          instructions: Override the default system message of the assistant. This is useful for
 583              modifying the behavior on a per-run basis.
 584
 585          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
 586              run. The run will make a best effort to use only the number of completion tokens
 587              specified, across multiple turns of the run. If the run exceeds the number of
 588              completion tokens specified, the run will end with status `incomplete`. See
 589              `incomplete_details` for more info.
 590
 591          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
 592              The run will make a best effort to use only the number of prompt tokens
 593              specified, across multiple turns of the run. If the run exceeds the number of
 594              prompt tokens specified, the run will end with status `incomplete`. See
 595              `incomplete_details` for more info.
 596
 597          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 598              for storing additional information about the object in a structured format, and
 599              querying for objects via API or the dashboard.
 600
 601              Keys are strings with a maximum length of 64 characters. Values are strings with
 602              a maximum length of 512 characters.
 603
 604          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
 605              be used to execute this run. If a value is provided here, it will override the
 606              model associated with the assistant. If not, the model associated with the
 607              assistant will be used.
 608
 609          parallel_tool_calls: Whether to enable
 610              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
 611              during tool use.
 612
 613          response_format: Specifies the format that the model must output. Compatible with
 614              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
 615              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
 616              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 617
 618              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
 619              Outputs which ensures the model will match your supplied JSON schema. Learn more
 620              in the
 621              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 622
 623              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
 624              message the model generates is valid JSON.
 625
 626              **Important:** when using JSON mode, you **must** also instruct the model to
 627              produce JSON yourself via a system or user message. Without this, the model may
 628              generate an unending stream of whitespace until the generation reaches the token
 629              limit, resulting in a long-running and seemingly "stuck" request. Also note that
 630              the message content may be partially cut off if `finish_reason="length"`, which
 631              indicates the generation exceeded `max_tokens` or the conversation exceeded the
 632              max context length.
 633
 634          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 635              make the output more random, while lower values like 0.2 will make it more
 636              focused and deterministic.
 637
 638          thread: Options to create a new thread. If no thread is provided when running a request,
 639              an empty thread will be created.
 640
 641          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
 642              not call any tools and instead generates a message. `auto` is the default value
 643              and means the model can pick between generating a message or calling one or more
 644              tools. `required` means the model must call one or more tools before responding
 645              to the user. Specifying a particular tool like `{"type": "file_search"}` or
 646              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
 647              call that tool.
 648
 649          tool_resources: A set of resources that are used by the assistant's tools. The resources are
 650              specific to the type of tool. For example, the `code_interpreter` tool requires
 651              a list of file IDs, while the `file_search` tool requires a list of vector store
 652              IDs.
 653
 654          tools: Override the tools the assistant can use for this run. This is useful for
 655              modifying the behavior on a per-run basis.
 656
 657          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 658              model considers the results of the tokens with top_p probability mass. So 0.1
 659              means only the tokens comprising the top 10% probability mass are considered.
 660
 661              We generally recommend altering this or temperature but not both.
 662
 663          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
 664              control the initial context window of the run.
 665
 666          extra_headers: Send extra headers
 667
 668          extra_query: Add additional query parameters to the request
 669
 670          extra_body: Add additional JSON properties to the request
 671
 672          timeout: Override the client-level default timeout for this request, in seconds
 673        """
 674        ...
 675
 676    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
 677    @required_args(["assistant_id"], ["assistant_id", "stream"])
 678    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
 679    def create_and_run(
 680        self,
 681        *,
 682        assistant_id: str,
 683        instructions: Optional[str] | Omit = omit,
 684        max_completion_tokens: Optional[int] | Omit = omit,
 685        max_prompt_tokens: Optional[int] | Omit = omit,
 686        metadata: Optional[Metadata] | Omit = omit,
 687        model: Union[str, ChatModel, None] | Omit = omit,
 688        parallel_tool_calls: bool | Omit = omit,
 689        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
 690        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
 691        temperature: Optional[float] | Omit = omit,
 692        thread: thread_create_and_run_params.Thread | Omit = omit,
 693        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
 694        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
 695        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
 696        top_p: Optional[float] | Omit = omit,
 697        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
 698        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 699        # The extra values given here take precedence over values defined on the client or passed to this method.
 700        extra_headers: Headers | None = None,
 701        extra_query: Query | None = None,
 702        extra_body: Body | None = None,
 703        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 704    ) -> Run | Stream[AssistantStreamEvent]:
 705        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 706        return self._post(
 707            "/threads/runs",
 708            body=maybe_transform(
 709                {
 710                    "assistant_id": assistant_id,
 711                    "instructions": instructions,
 712                    "max_completion_tokens": max_completion_tokens,
 713                    "max_prompt_tokens": max_prompt_tokens,
 714                    "metadata": metadata,
 715                    "model": model,
 716                    "parallel_tool_calls": parallel_tool_calls,
 717                    "response_format": response_format,
 718                    "stream": stream,
 719                    "temperature": temperature,
 720                    "thread": thread,
 721                    "tool_choice": tool_choice,
 722                    "tool_resources": tool_resources,
 723                    "tools": tools,
 724                    "top_p": top_p,
 725                    "truncation_strategy": truncation_strategy,
 726                },
 727                thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
 728                if stream
 729                else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
 730            ),
 731            options=make_request_options(
 732                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 733            ),
 734            cast_to=Run,
 735            stream=stream or False,
 736            stream_cls=Stream[AssistantStreamEvent],
 737        )
 738
 739    def create_and_run_poll(
 740        self,
 741        *,
 742        assistant_id: str,
 743        instructions: Optional[str] | Omit = omit,
 744        max_completion_tokens: Optional[int] | Omit = omit,
 745        max_prompt_tokens: Optional[int] | Omit = omit,
 746        metadata: Optional[Metadata] | Omit = omit,
 747        model: Union[str, ChatModel, None] | Omit = omit,
 748        parallel_tool_calls: bool | Omit = omit,
 749        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
 750        temperature: Optional[float] | Omit = omit,
 751        thread: thread_create_and_run_params.Thread | Omit = omit,
 752        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
 753        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
 754        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
 755        top_p: Optional[float] | Omit = omit,
 756        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
 757        poll_interval_ms: int | Omit = omit,
 758        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 759        # The extra values given here take precedence over values defined on the client or passed to this method.
 760        extra_headers: Headers | None = None,
 761        extra_query: Query | None = None,
 762        extra_body: Body | None = None,
 763        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
 764    ) -> Run:
 765        """
 766        A helper to create a thread, start a run and then poll for a terminal state.
 767        More information on Run lifecycles can be found here:
 768        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
 769        """
 770        run = self.create_and_run(  # pyright: ignore[reportDeprecated]
 771            assistant_id=assistant_id,
 772            instructions=instructions,
 773            max_completion_tokens=max_completion_tokens,
 774            max_prompt_tokens=max_prompt_tokens,
 775            metadata=metadata,
 776            model=model,
 777            parallel_tool_calls=parallel_tool_calls,
 778            response_format=response_format,
 779            temperature=temperature,
 780            stream=False,
 781            thread=thread,
 782            tool_resources=tool_resources,
 783            tool_choice=tool_choice,
 784            truncation_strategy=truncation_strategy,
 785            top_p=top_p,
 786            tools=tools,
 787            extra_headers=extra_headers,
 788            extra_query=extra_query,
 789            extra_body=extra_body,
 790            timeout=timeout,
 791        )
 792        return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms)  # pyright: ignore[reportDeprecated]
 793
 794    @overload
 795    def create_and_run_stream(
 796        self,
 797        *,
 798        assistant_id: str,
 799        instructions: Optional[str] | Omit = omit,
 800        max_completion_tokens: Optional[int] | Omit = omit,
 801        max_prompt_tokens: Optional[int] | Omit = omit,
 802        metadata: Optional[Metadata] | Omit = omit,
 803        model: Union[str, ChatModel, None] | Omit = omit,
 804        parallel_tool_calls: bool | Omit = omit,
 805        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
 806        temperature: Optional[float] | Omit = omit,
 807        thread: thread_create_and_run_params.Thread | Omit = omit,
 808        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
 809        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
 810        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
 811        top_p: Optional[float] | Omit = omit,
 812        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
 813        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 814        # The extra values given here take precedence over values defined on the client or passed to this method.
 815        extra_headers: Headers | None = None,
 816        extra_query: Query | None = None,
 817        extra_body: Body | None = None,
 818        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
 819    ) -> AssistantStreamManager[AssistantEventHandler]:
 820        """Create a thread and stream the run back"""
 821        ...
 822
 823    @overload
 824    def create_and_run_stream(
 825        self,
 826        *,
 827        assistant_id: str,
 828        instructions: Optional[str] | Omit = omit,
 829        max_completion_tokens: Optional[int] | Omit = omit,
 830        max_prompt_tokens: Optional[int] | Omit = omit,
 831        metadata: Optional[Metadata] | Omit = omit,
 832        model: Union[str, ChatModel, None] | Omit = omit,
 833        parallel_tool_calls: bool | Omit = omit,
 834        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
 835        temperature: Optional[float] | Omit = omit,
 836        thread: thread_create_and_run_params.Thread | Omit = omit,
 837        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
 838        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
 839        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
 840        top_p: Optional[float] | Omit = omit,
 841        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
 842        event_handler: AssistantEventHandlerT,
 843        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 844        # The extra values given here take precedence over values defined on the client or passed to this method.
 845        extra_headers: Headers | None = None,
 846        extra_query: Query | None = None,
 847        extra_body: Body | None = None,
 848        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
 849    ) -> AssistantStreamManager[AssistantEventHandlerT]:
 850        """Create a thread and stream the run back"""
 851        ...
 852
 853    def create_and_run_stream(
 854        self,
 855        *,
 856        assistant_id: str,
 857        instructions: Optional[str] | Omit = omit,
 858        max_completion_tokens: Optional[int] | Omit = omit,
 859        max_prompt_tokens: Optional[int] | Omit = omit,
 860        metadata: Optional[Metadata] | Omit = omit,
 861        model: Union[str, ChatModel, None] | Omit = omit,
 862        parallel_tool_calls: bool | Omit = omit,
 863        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
 864        temperature: Optional[float] | Omit = omit,
 865        thread: thread_create_and_run_params.Thread | Omit = omit,
 866        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
 867        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
 868        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
 869        top_p: Optional[float] | Omit = omit,
 870        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
 871        event_handler: AssistantEventHandlerT | None = None,
 872        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 873        # The extra values given here take precedence over values defined on the client or passed to this method.
 874        extra_headers: Headers | None = None,
 875        extra_query: Query | None = None,
 876        extra_body: Body | None = None,
 877        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
 878    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
 879        """Create a thread and stream the run back"""
 880        extra_headers = {
 881            "OpenAI-Beta": "assistants=v2",
 882            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
 883            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
 884            **(extra_headers or {}),
 885        }
 886        make_request = partial(
 887            self._post,
 888            "/threads/runs",
 889            body=maybe_transform(
 890                {
 891                    "assistant_id": assistant_id,
 892                    "instructions": instructions,
 893                    "max_completion_tokens": max_completion_tokens,
 894                    "max_prompt_tokens": max_prompt_tokens,
 895                    "metadata": metadata,
 896                    "model": model,
 897                    "parallel_tool_calls": parallel_tool_calls,
 898                    "response_format": response_format,
 899                    "temperature": temperature,
 900                    "tool_choice": tool_choice,
 901                    "stream": True,
 902                    "thread": thread,
 903                    "tools": tools,
 904                    "tool_resources": tool_resources,
 905                    "truncation_strategy": truncation_strategy,
 906                    "top_p": top_p,
 907                },
 908                thread_create_and_run_params.ThreadCreateAndRunParams,
 909            ),
 910            options=make_request_options(
 911                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 912            ),
 913            cast_to=Run,
 914            stream=True,
 915            stream_cls=Stream[AssistantStreamEvent],
 916        )
 917        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
 918
 919
 920class AsyncThreads(AsyncAPIResource):
 921    @cached_property
 922    def runs(self) -> AsyncRuns:
 923        return AsyncRuns(self._client)
 924
 925    @cached_property
 926    def messages(self) -> AsyncMessages:
 927        return AsyncMessages(self._client)
 928
 929    @cached_property
 930    def with_raw_response(self) -> AsyncThreadsWithRawResponse:
 931        """
 932        This property can be used as a prefix for any HTTP method call to return
 933        the raw response object instead of the parsed content.
 934
 935        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
 936        """
 937        return AsyncThreadsWithRawResponse(self)
 938
 939    @cached_property
 940    def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
 941        """
 942        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 943
 944        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
 945        """
 946        return AsyncThreadsWithStreamingResponse(self)
 947
 948    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
 949    async def create(
 950        self,
 951        *,
 952        messages: Iterable[thread_create_params.Message] | Omit = omit,
 953        metadata: Optional[Metadata] | Omit = omit,
 954        tool_resources: Optional[thread_create_params.ToolResources] | Omit = omit,
 955        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 956        # The extra values given here take precedence over values defined on the client or passed to this method.
 957        extra_headers: Headers | None = None,
 958        extra_query: Query | None = None,
 959        extra_body: Body | None = None,
 960        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 961    ) -> Thread:
 962        """
 963        Create a thread.
 964
 965        Args:
 966          messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
 967              start the thread with.
 968
 969          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 970              for storing additional information about the object in a structured format, and
 971              querying for objects via API or the dashboard.
 972
 973              Keys are strings with a maximum length of 64 characters. Values are strings with
 974              a maximum length of 512 characters.
 975
 976          tool_resources: A set of resources that are made available to the assistant's tools in this
 977              thread. The resources are specific to the type of tool. For example, the
 978              `code_interpreter` tool requires a list of file IDs, while the `file_search`
 979              tool requires a list of vector store IDs.
 980
 981          extra_headers: Send extra headers
 982
 983          extra_query: Add additional query parameters to the request
 984
 985          extra_body: Add additional JSON properties to the request
 986
 987          timeout: Override the client-level default timeout for this request, in seconds
 988        """
 989        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
 990        return await self._post(
 991            "/threads",
 992            body=await async_maybe_transform(
 993                {
 994                    "messages": messages,
 995                    "metadata": metadata,
 996                    "tool_resources": tool_resources,
 997                },
 998                thread_create_params.ThreadCreateParams,
 999            ),
1000            options=make_request_options(
1001                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1002            ),
1003            cast_to=Thread,
1004        )
1005
1006    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1007    async def retrieve(
1008        self,
1009        thread_id: str,
1010        *,
1011        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1012        # The extra values given here take precedence over values defined on the client or passed to this method.
1013        extra_headers: Headers | None = None,
1014        extra_query: Query | None = None,
1015        extra_body: Body | None = None,
1016        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1017    ) -> Thread:
1018        """
1019        Retrieves a thread.
1020
1021        Args:
1022          extra_headers: Send extra headers
1023
1024          extra_query: Add additional query parameters to the request
1025
1026          extra_body: Add additional JSON properties to the request
1027
1028          timeout: Override the client-level default timeout for this request, in seconds
1029        """
1030        if not thread_id:
1031            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
1032        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
1033        return await self._get(
1034            f"/threads/{thread_id}",
1035            options=make_request_options(
1036                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1037            ),
1038            cast_to=Thread,
1039        )
1040
1041    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1042    async def update(
1043        self,
1044        thread_id: str,
1045        *,
1046        metadata: Optional[Metadata] | Omit = omit,
1047        tool_resources: Optional[thread_update_params.ToolResources] | Omit = omit,
1048        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1049        # The extra values given here take precedence over values defined on the client or passed to this method.
1050        extra_headers: Headers | None = None,
1051        extra_query: Query | None = None,
1052        extra_body: Body | None = None,
1053        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1054    ) -> Thread:
1055        """
1056        Modifies a thread.
1057
1058        Args:
1059          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1060              for storing additional information about the object in a structured format, and
1061              querying for objects via API or the dashboard.
1062
1063              Keys are strings with a maximum length of 64 characters. Values are strings with
1064              a maximum length of 512 characters.
1065
1066          tool_resources: A set of resources that are made available to the assistant's tools in this
1067              thread. The resources are specific to the type of tool. For example, the
1068              `code_interpreter` tool requires a list of file IDs, while the `file_search`
1069              tool requires a list of vector store IDs.
1070
1071          extra_headers: Send extra headers
1072
1073          extra_query: Add additional query parameters to the request
1074
1075          extra_body: Add additional JSON properties to the request
1076
1077          timeout: Override the client-level default timeout for this request, in seconds
1078        """
1079        if not thread_id:
1080            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
1081        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
1082        return await self._post(
1083            f"/threads/{thread_id}",
1084            body=await async_maybe_transform(
1085                {
1086                    "metadata": metadata,
1087                    "tool_resources": tool_resources,
1088                },
1089                thread_update_params.ThreadUpdateParams,
1090            ),
1091            options=make_request_options(
1092                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1093            ),
1094            cast_to=Thread,
1095        )
1096
1097    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1098    async def delete(
1099        self,
1100        thread_id: str,
1101        *,
1102        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1103        # The extra values given here take precedence over values defined on the client or passed to this method.
1104        extra_headers: Headers | None = None,
1105        extra_query: Query | None = None,
1106        extra_body: Body | None = None,
1107        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1108    ) -> ThreadDeleted:
1109        """
1110        Delete a thread.
1111
1112        Args:
1113          extra_headers: Send extra headers
1114
1115          extra_query: Add additional query parameters to the request
1116
1117          extra_body: Add additional JSON properties to the request
1118
1119          timeout: Override the client-level default timeout for this request, in seconds
1120        """
1121        if not thread_id:
1122            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
1123        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
1124        return await self._delete(
1125            f"/threads/{thread_id}",
1126            options=make_request_options(
1127                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1128            ),
1129            cast_to=ThreadDeleted,
1130        )
1131
1132    @overload
1133    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1134    async def create_and_run(
1135        self,
1136        *,
1137        assistant_id: str,
1138        instructions: Optional[str] | Omit = omit,
1139        max_completion_tokens: Optional[int] | Omit = omit,
1140        max_prompt_tokens: Optional[int] | Omit = omit,
1141        metadata: Optional[Metadata] | Omit = omit,
1142        model: Union[str, ChatModel, None] | Omit = omit,
1143        parallel_tool_calls: bool | Omit = omit,
1144        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1145        stream: Optional[Literal[False]] | Omit = omit,
1146        temperature: Optional[float] | Omit = omit,
1147        thread: thread_create_and_run_params.Thread | Omit = omit,
1148        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1149        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1150        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1151        top_p: Optional[float] | Omit = omit,
1152        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1153        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1154        # The extra values given here take precedence over values defined on the client or passed to this method.
1155        extra_headers: Headers | None = None,
1156        extra_query: Query | None = None,
1157        extra_body: Body | None = None,
1158        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1159    ) -> Run:
1160        """
1161        Create a thread and run it in one request.
1162
1163        Args:
1164          assistant_id: The ID of the
1165              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
1166              execute this run.
1167
1168          instructions: Override the default system message of the assistant. This is useful for
1169              modifying the behavior on a per-run basis.
1170
1171          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
1172              run. The run will make a best effort to use only the number of completion tokens
1173              specified, across multiple turns of the run. If the run exceeds the number of
1174              completion tokens specified, the run will end with status `incomplete`. See
1175              `incomplete_details` for more info.
1176
1177          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
1178              The run will make a best effort to use only the number of prompt tokens
1179              specified, across multiple turns of the run. If the run exceeds the number of
1180              prompt tokens specified, the run will end with status `incomplete`. See
1181              `incomplete_details` for more info.
1182
1183          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1184              for storing additional information about the object in a structured format, and
1185              querying for objects via API or the dashboard.
1186
1187              Keys are strings with a maximum length of 64 characters. Values are strings with
1188              a maximum length of 512 characters.
1189
1190          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
1191              be used to execute this run. If a value is provided here, it will override the
1192              model associated with the assistant. If not, the model associated with the
1193              assistant will be used.
1194
1195          parallel_tool_calls: Whether to enable
1196              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
1197              during tool use.
1198
1199          response_format: Specifies the format that the model must output. Compatible with
1200              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
1201              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
1202              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
1203
1204              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
1205              Outputs which ensures the model will match your supplied JSON schema. Learn more
1206              in the
1207              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
1208
1209              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
1210              message the model generates is valid JSON.
1211
1212              **Important:** when using JSON mode, you **must** also instruct the model to
1213              produce JSON yourself via a system or user message. Without this, the model may
1214              generate an unending stream of whitespace until the generation reaches the token
1215              limit, resulting in a long-running and seemingly "stuck" request. Also note that
1216              the message content may be partially cut off if `finish_reason="length"`, which
1217              indicates the generation exceeded `max_tokens` or the conversation exceeded the
1218              max context length.
1219
1220          stream: If `true`, returns a stream of events that happen during the Run as server-sent
1221              events, terminating when the Run enters a terminal state with a `data: [DONE]`
1222              message.
1223
1224          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1225              make the output more random, while lower values like 0.2 will make it more
1226              focused and deterministic.
1227
1228          thread: Options to create a new thread. If no thread is provided when running a request,
1229              an empty thread will be created.
1230
1231          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
1232              not call any tools and instead generates a message. `auto` is the default value
1233              and means the model can pick between generating a message or calling one or more
1234              tools. `required` means the model must call one or more tools before responding
1235              to the user. Specifying a particular tool like `{"type": "file_search"}` or
1236              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
1237              call that tool.
1238
1239          tool_resources: A set of resources that are used by the assistant's tools. The resources are
1240              specific to the type of tool. For example, the `code_interpreter` tool requires
1241              a list of file IDs, while the `file_search` tool requires a list of vector store
1242              IDs.
1243
1244          tools: Override the tools the assistant can use for this run. This is useful for
1245              modifying the behavior on a per-run basis.
1246
1247          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1248              model considers the results of the tokens with top_p probability mass. So 0.1
1249              means only the tokens comprising the top 10% probability mass are considered.
1250
1251              We generally recommend altering this or temperature but not both.
1252
1253          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
1254              control the initial context window of the run.
1255
1256          extra_headers: Send extra headers
1257
1258          extra_query: Add additional query parameters to the request
1259
1260          extra_body: Add additional JSON properties to the request
1261
1262          timeout: Override the client-level default timeout for this request, in seconds
1263        """
1264        ...
1265
1266    @overload
1267    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1268    async def create_and_run(
1269        self,
1270        *,
1271        assistant_id: str,
1272        stream: Literal[True],
1273        instructions: Optional[str] | Omit = omit,
1274        max_completion_tokens: Optional[int] | Omit = omit,
1275        max_prompt_tokens: Optional[int] | Omit = omit,
1276        metadata: Optional[Metadata] | Omit = omit,
1277        model: Union[str, ChatModel, None] | Omit = omit,
1278        parallel_tool_calls: bool | Omit = omit,
1279        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1280        temperature: Optional[float] | Omit = omit,
1281        thread: thread_create_and_run_params.Thread | Omit = omit,
1282        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1283        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1284        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1285        top_p: Optional[float] | Omit = omit,
1286        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1287        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1288        # The extra values given here take precedence over values defined on the client or passed to this method.
1289        extra_headers: Headers | None = None,
1290        extra_query: Query | None = None,
1291        extra_body: Body | None = None,
1292        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1293    ) -> AsyncStream[AssistantStreamEvent]:
1294        """
1295        Create a thread and run it in one request.
1296
1297        Args:
1298          assistant_id: The ID of the
1299              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
1300              execute this run.
1301
1302          stream: If `true`, returns a stream of events that happen during the Run as server-sent
1303              events, terminating when the Run enters a terminal state with a `data: [DONE]`
1304              message.
1305
1306          instructions: Override the default system message of the assistant. This is useful for
1307              modifying the behavior on a per-run basis.
1308
1309          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
1310              run. The run will make a best effort to use only the number of completion tokens
1311              specified, across multiple turns of the run. If the run exceeds the number of
1312              completion tokens specified, the run will end with status `incomplete`. See
1313              `incomplete_details` for more info.
1314
1315          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
1316              The run will make a best effort to use only the number of prompt tokens
1317              specified, across multiple turns of the run. If the run exceeds the number of
1318              prompt tokens specified, the run will end with status `incomplete`. See
1319              `incomplete_details` for more info.
1320
1321          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1322              for storing additional information about the object in a structured format, and
1323              querying for objects via API or the dashboard.
1324
1325              Keys are strings with a maximum length of 64 characters. Values are strings with
1326              a maximum length of 512 characters.
1327
1328          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
1329              be used to execute this run. If a value is provided here, it will override the
1330              model associated with the assistant. If not, the model associated with the
1331              assistant will be used.
1332
1333          parallel_tool_calls: Whether to enable
1334              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
1335              during tool use.
1336
1337          response_format: Specifies the format that the model must output. Compatible with
1338              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
1339              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
1340              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
1341
1342              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
1343              Outputs which ensures the model will match your supplied JSON schema. Learn more
1344              in the
1345              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
1346
1347              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
1348              message the model generates is valid JSON.
1349
1350              **Important:** when using JSON mode, you **must** also instruct the model to
1351              produce JSON yourself via a system or user message. Without this, the model may
1352              generate an unending stream of whitespace until the generation reaches the token
1353              limit, resulting in a long-running and seemingly "stuck" request. Also note that
1354              the message content may be partially cut off if `finish_reason="length"`, which
1355              indicates the generation exceeded `max_tokens` or the conversation exceeded the
1356              max context length.
1357
1358          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1359              make the output more random, while lower values like 0.2 will make it more
1360              focused and deterministic.
1361
1362          thread: Options to create a new thread. If no thread is provided when running a request,
1363              an empty thread will be created.
1364
1365          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
1366              not call any tools and instead generates a message. `auto` is the default value
1367              and means the model can pick between generating a message or calling one or more
1368              tools. `required` means the model must call one or more tools before responding
1369              to the user. Specifying a particular tool like `{"type": "file_search"}` or
1370              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
1371              call that tool.
1372
1373          tool_resources: A set of resources that are used by the assistant's tools. The resources are
1374              specific to the type of tool. For example, the `code_interpreter` tool requires
1375              a list of file IDs, while the `file_search` tool requires a list of vector store
1376              IDs.
1377
1378          tools: Override the tools the assistant can use for this run. This is useful for
1379              modifying the behavior on a per-run basis.
1380
1381          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1382              model considers the results of the tokens with top_p probability mass. So 0.1
1383              means only the tokens comprising the top 10% probability mass are considered.
1384
1385              We generally recommend altering this or temperature but not both.
1386
1387          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
1388              control the initial context window of the run.
1389
1390          extra_headers: Send extra headers
1391
1392          extra_query: Add additional query parameters to the request
1393
1394          extra_body: Add additional JSON properties to the request
1395
1396          timeout: Override the client-level default timeout for this request, in seconds
1397        """
1398        ...
1399
1400    @overload
1401    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1402    async def create_and_run(
1403        self,
1404        *,
1405        assistant_id: str,
1406        stream: bool,
1407        instructions: Optional[str] | Omit = omit,
1408        max_completion_tokens: Optional[int] | Omit = omit,
1409        max_prompt_tokens: Optional[int] | Omit = omit,
1410        metadata: Optional[Metadata] | Omit = omit,
1411        model: Union[str, ChatModel, None] | Omit = omit,
1412        parallel_tool_calls: bool | Omit = omit,
1413        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1414        temperature: Optional[float] | Omit = omit,
1415        thread: thread_create_and_run_params.Thread | Omit = omit,
1416        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1417        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1418        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1419        top_p: Optional[float] | Omit = omit,
1420        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1421        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1422        # The extra values given here take precedence over values defined on the client or passed to this method.
1423        extra_headers: Headers | None = None,
1424        extra_query: Query | None = None,
1425        extra_body: Body | None = None,
1426        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1427    ) -> Run | AsyncStream[AssistantStreamEvent]:
1428        """
1429        Create a thread and run it in one request.
1430
1431        Args:
1432          assistant_id: The ID of the
1433              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
1434              execute this run.
1435
1436          stream: If `true`, returns a stream of events that happen during the Run as server-sent
1437              events, terminating when the Run enters a terminal state with a `data: [DONE]`
1438              message.
1439
1440          instructions: Override the default system message of the assistant. This is useful for
1441              modifying the behavior on a per-run basis.
1442
1443          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
1444              run. The run will make a best effort to use only the number of completion tokens
1445              specified, across multiple turns of the run. If the run exceeds the number of
1446              completion tokens specified, the run will end with status `incomplete`. See
1447              `incomplete_details` for more info.
1448
1449          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
1450              The run will make a best effort to use only the number of prompt tokens
1451              specified, across multiple turns of the run. If the run exceeds the number of
1452              prompt tokens specified, the run will end with status `incomplete`. See
1453              `incomplete_details` for more info.
1454
1455          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1456              for storing additional information about the object in a structured format, and
1457              querying for objects via API or the dashboard.
1458
1459              Keys are strings with a maximum length of 64 characters. Values are strings with
1460              a maximum length of 512 characters.
1461
1462          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
1463              be used to execute this run. If a value is provided here, it will override the
1464              model associated with the assistant. If not, the model associated with the
1465              assistant will be used.
1466
1467          parallel_tool_calls: Whether to enable
1468              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
1469              during tool use.
1470
1471          response_format: Specifies the format that the model must output. Compatible with
1472              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
1473              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
1474              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
1475
1476              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
1477              Outputs which ensures the model will match your supplied JSON schema. Learn more
1478              in the
1479              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
1480
1481              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
1482              message the model generates is valid JSON.
1483
1484              **Important:** when using JSON mode, you **must** also instruct the model to
1485              produce JSON yourself via a system or user message. Without this, the model may
1486              generate an unending stream of whitespace until the generation reaches the token
1487              limit, resulting in a long-running and seemingly "stuck" request. Also note that
1488              the message content may be partially cut off if `finish_reason="length"`, which
1489              indicates the generation exceeded `max_tokens` or the conversation exceeded the
1490              max context length.
1491
1492          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1493              make the output more random, while lower values like 0.2 will make it more
1494              focused and deterministic.
1495
1496          thread: Options to create a new thread. If no thread is provided when running a request,
1497              an empty thread will be created.
1498
1499          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
1500              not call any tools and instead generates a message. `auto` is the default value
1501              and means the model can pick between generating a message or calling one or more
1502              tools. `required` means the model must call one or more tools before responding
1503              to the user. Specifying a particular tool like `{"type": "file_search"}` or
1504              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
1505              call that tool.
1506
1507          tool_resources: A set of resources that are used by the assistant's tools. The resources are
1508              specific to the type of tool. For example, the `code_interpreter` tool requires
1509              a list of file IDs, while the `file_search` tool requires a list of vector store
1510              IDs.
1511
1512          tools: Override the tools the assistant can use for this run. This is useful for
1513              modifying the behavior on a per-run basis.
1514
1515          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1516              model considers the results of the tokens with top_p probability mass. So 0.1
1517              means only the tokens comprising the top 10% probability mass are considered.
1518
1519              We generally recommend altering this or temperature but not both.
1520
1521          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
1522              control the initial context window of the run.
1523
1524          extra_headers: Send extra headers
1525
1526          extra_query: Add additional query parameters to the request
1527
1528          extra_body: Add additional JSON properties to the request
1529
1530          timeout: Override the client-level default timeout for this request, in seconds
1531        """
1532        ...
1533
1534    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1535    @required_args(["assistant_id"], ["assistant_id", "stream"])
1536    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1537    async def create_and_run(
1538        self,
1539        *,
1540        assistant_id: str,
1541        instructions: Optional[str] | Omit = omit,
1542        max_completion_tokens: Optional[int] | Omit = omit,
1543        max_prompt_tokens: Optional[int] | Omit = omit,
1544        metadata: Optional[Metadata] | Omit = omit,
1545        model: Union[str, ChatModel, None] | Omit = omit,
1546        parallel_tool_calls: bool | Omit = omit,
1547        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1548        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
1549        temperature: Optional[float] | Omit = omit,
1550        thread: thread_create_and_run_params.Thread | Omit = omit,
1551        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1552        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1553        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1554        top_p: Optional[float] | Omit = omit,
1555        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1556        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1557        # The extra values given here take precedence over values defined on the client or passed to this method.
1558        extra_headers: Headers | None = None,
1559        extra_query: Query | None = None,
1560        extra_body: Body | None = None,
1561        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1562    ) -> Run | AsyncStream[AssistantStreamEvent]:
1563        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
1564        return await self._post(
1565            "/threads/runs",
1566            body=await async_maybe_transform(
1567                {
1568                    "assistant_id": assistant_id,
1569                    "instructions": instructions,
1570                    "max_completion_tokens": max_completion_tokens,
1571                    "max_prompt_tokens": max_prompt_tokens,
1572                    "metadata": metadata,
1573                    "model": model,
1574                    "parallel_tool_calls": parallel_tool_calls,
1575                    "response_format": response_format,
1576                    "stream": stream,
1577                    "temperature": temperature,
1578                    "thread": thread,
1579                    "tool_choice": tool_choice,
1580                    "tool_resources": tool_resources,
1581                    "tools": tools,
1582                    "top_p": top_p,
1583                    "truncation_strategy": truncation_strategy,
1584                },
1585                thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
1586                if stream
1587                else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
1588            ),
1589            options=make_request_options(
1590                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1591            ),
1592            cast_to=Run,
1593            stream=stream or False,
1594            stream_cls=AsyncStream[AssistantStreamEvent],
1595        )
1596
1597    async def create_and_run_poll(
1598        self,
1599        *,
1600        assistant_id: str,
1601        instructions: Optional[str] | Omit = omit,
1602        max_completion_tokens: Optional[int] | Omit = omit,
1603        max_prompt_tokens: Optional[int] | Omit = omit,
1604        metadata: Optional[Metadata] | Omit = omit,
1605        model: Union[str, ChatModel, None] | Omit = omit,
1606        parallel_tool_calls: bool | Omit = omit,
1607        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1608        temperature: Optional[float] | Omit = omit,
1609        thread: thread_create_and_run_params.Thread | Omit = omit,
1610        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1611        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1612        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1613        top_p: Optional[float] | Omit = omit,
1614        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1615        poll_interval_ms: int | Omit = omit,
1616        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1617        # The extra values given here take precedence over values defined on the client or passed to this method.
1618        extra_headers: Headers | None = None,
1619        extra_query: Query | None = None,
1620        extra_body: Body | None = None,
1621        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1622    ) -> Run:
1623        """
1624        A helper to create a thread, start a run and then poll for a terminal state.
1625        More information on Run lifecycles can be found here:
1626        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
1627        """
1628        run = await self.create_and_run(  # pyright: ignore[reportDeprecated]
1629            assistant_id=assistant_id,
1630            instructions=instructions,
1631            max_completion_tokens=max_completion_tokens,
1632            max_prompt_tokens=max_prompt_tokens,
1633            metadata=metadata,
1634            model=model,
1635            parallel_tool_calls=parallel_tool_calls,
1636            response_format=response_format,
1637            temperature=temperature,
1638            stream=False,
1639            thread=thread,
1640            tool_resources=tool_resources,
1641            tool_choice=tool_choice,
1642            truncation_strategy=truncation_strategy,
1643            top_p=top_p,
1644            tools=tools,
1645            extra_headers=extra_headers,
1646            extra_query=extra_query,
1647            extra_body=extra_body,
1648            timeout=timeout,
1649        )
1650        return await self.runs.poll(  # pyright: ignore[reportDeprecated]
1651            run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms
1652        )
1653
1654    @overload
1655    def create_and_run_stream(
1656        self,
1657        *,
1658        assistant_id: str,
1659        instructions: Optional[str] | Omit = omit,
1660        max_completion_tokens: Optional[int] | Omit = omit,
1661        max_prompt_tokens: Optional[int] | Omit = omit,
1662        metadata: Optional[Metadata] | Omit = omit,
1663        model: Union[str, ChatModel, None] | Omit = omit,
1664        parallel_tool_calls: bool | Omit = omit,
1665        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1666        temperature: Optional[float] | Omit = omit,
1667        thread: thread_create_and_run_params.Thread | Omit = omit,
1668        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1669        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1670        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1671        top_p: Optional[float] | Omit = omit,
1672        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1673        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1674        # The extra values given here take precedence over values defined on the client or passed to this method.
1675        extra_headers: Headers | None = None,
1676        extra_query: Query | None = None,
1677        extra_body: Body | None = None,
1678        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1679    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
1680        """Create a thread and stream the run back"""
1681        ...
1682
1683    @overload
1684    def create_and_run_stream(
1685        self,
1686        *,
1687        assistant_id: str,
1688        instructions: Optional[str] | Omit = omit,
1689        max_completion_tokens: Optional[int] | Omit = omit,
1690        max_prompt_tokens: Optional[int] | Omit = omit,
1691        metadata: Optional[Metadata] | Omit = omit,
1692        model: Union[str, ChatModel, None] | Omit = omit,
1693        parallel_tool_calls: bool | Omit = omit,
1694        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1695        temperature: Optional[float] | Omit = omit,
1696        thread: thread_create_and_run_params.Thread | Omit = omit,
1697        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1698        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1699        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1700        top_p: Optional[float] | Omit = omit,
1701        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1702        event_handler: AsyncAssistantEventHandlerT,
1703        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1704        # The extra values given here take precedence over values defined on the client or passed to this method.
1705        extra_headers: Headers | None = None,
1706        extra_query: Query | None = None,
1707        extra_body: Body | None = None,
1708        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1709    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
1710        """Create a thread and stream the run back"""
1711        ...
1712
1713    def create_and_run_stream(
1714        self,
1715        *,
1716        assistant_id: str,
1717        instructions: Optional[str] | Omit = omit,
1718        max_completion_tokens: Optional[int] | Omit = omit,
1719        max_prompt_tokens: Optional[int] | Omit = omit,
1720        metadata: Optional[Metadata] | Omit = omit,
1721        model: Union[str, ChatModel, None] | Omit = omit,
1722        parallel_tool_calls: bool | Omit = omit,
1723        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1724        temperature: Optional[float] | Omit = omit,
1725        thread: thread_create_and_run_params.Thread | Omit = omit,
1726        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1727        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1728        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1729        top_p: Optional[float] | Omit = omit,
1730        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1731        event_handler: AsyncAssistantEventHandlerT | None = None,
1732        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1733        # The extra values given here take precedence over values defined on the client or passed to this method.
1734        extra_headers: Headers | None = None,
1735        extra_query: Query | None = None,
1736        extra_body: Body | None = None,
1737        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1738    ) -> (
1739        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
1740        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
1741    ):
1742        """Create a thread and stream the run back"""
1743        extra_headers = {
1744            "OpenAI-Beta": "assistants=v2",
1745            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
1746            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
1747            **(extra_headers or {}),
1748        }
1749        request = self._post(
1750            "/threads/runs",
1751            body=maybe_transform(
1752                {
1753                    "assistant_id": assistant_id,
1754                    "instructions": instructions,
1755                    "max_completion_tokens": max_completion_tokens,
1756                    "max_prompt_tokens": max_prompt_tokens,
1757                    "metadata": metadata,
1758                    "model": model,
1759                    "parallel_tool_calls": parallel_tool_calls,
1760                    "response_format": response_format,
1761                    "temperature": temperature,
1762                    "tool_choice": tool_choice,
1763                    "stream": True,
1764                    "thread": thread,
1765                    "tools": tools,
1766                    "tool_resources": tool_resources,
1767                    "truncation_strategy": truncation_strategy,
1768                    "top_p": top_p,
1769                },
1770                thread_create_and_run_params.ThreadCreateAndRunParams,
1771            ),
1772            options=make_request_options(
1773                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1774            ),
1775            cast_to=Run,
1776            stream=True,
1777            stream_cls=AsyncStream[AssistantStreamEvent],
1778        )
1779        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
1780
1781
1782class ThreadsWithRawResponse:
1783    def __init__(self, threads: Threads) -> None:
1784        self._threads = threads
1785
1786        self.create = (  # pyright: ignore[reportDeprecated]
1787            _legacy_response.to_raw_response_wrapper(
1788                threads.create,  # pyright: ignore[reportDeprecated],
1789            )
1790        )
1791        self.retrieve = (  # pyright: ignore[reportDeprecated]
1792            _legacy_response.to_raw_response_wrapper(
1793                threads.retrieve,  # pyright: ignore[reportDeprecated],
1794            )
1795        )
1796        self.update = (  # pyright: ignore[reportDeprecated]
1797            _legacy_response.to_raw_response_wrapper(
1798                threads.update,  # pyright: ignore[reportDeprecated],
1799            )
1800        )
1801        self.delete = (  # pyright: ignore[reportDeprecated]
1802            _legacy_response.to_raw_response_wrapper(
1803                threads.delete,  # pyright: ignore[reportDeprecated],
1804            )
1805        )
1806        self.create_and_run = (  # pyright: ignore[reportDeprecated]
1807            _legacy_response.to_raw_response_wrapper(
1808                threads.create_and_run,  # pyright: ignore[reportDeprecated],
1809            )
1810        )
1811
1812    @cached_property
1813    def runs(self) -> RunsWithRawResponse:
1814        return RunsWithRawResponse(self._threads.runs)
1815
1816    @cached_property
1817    def messages(self) -> MessagesWithRawResponse:
1818        return MessagesWithRawResponse(self._threads.messages)
1819
1820
1821class AsyncThreadsWithRawResponse:
1822    def __init__(self, threads: AsyncThreads) -> None:
1823        self._threads = threads
1824
1825        self.create = (  # pyright: ignore[reportDeprecated]
1826            _legacy_response.async_to_raw_response_wrapper(
1827                threads.create,  # pyright: ignore[reportDeprecated],
1828            )
1829        )
1830        self.retrieve = (  # pyright: ignore[reportDeprecated]
1831            _legacy_response.async_to_raw_response_wrapper(
1832                threads.retrieve,  # pyright: ignore[reportDeprecated],
1833            )
1834        )
1835        self.update = (  # pyright: ignore[reportDeprecated]
1836            _legacy_response.async_to_raw_response_wrapper(
1837                threads.update,  # pyright: ignore[reportDeprecated],
1838            )
1839        )
1840        self.delete = (  # pyright: ignore[reportDeprecated]
1841            _legacy_response.async_to_raw_response_wrapper(
1842                threads.delete,  # pyright: ignore[reportDeprecated],
1843            )
1844        )
1845        self.create_and_run = (  # pyright: ignore[reportDeprecated]
1846            _legacy_response.async_to_raw_response_wrapper(
1847                threads.create_and_run,  # pyright: ignore[reportDeprecated],
1848            )
1849        )
1850
1851    @cached_property
1852    def runs(self) -> AsyncRunsWithRawResponse:
1853        return AsyncRunsWithRawResponse(self._threads.runs)
1854
1855    @cached_property
1856    def messages(self) -> AsyncMessagesWithRawResponse:
1857        return AsyncMessagesWithRawResponse(self._threads.messages)
1858
1859
1860class ThreadsWithStreamingResponse:
1861    def __init__(self, threads: Threads) -> None:
1862        self._threads = threads
1863
1864        self.create = (  # pyright: ignore[reportDeprecated]
1865            to_streamed_response_wrapper(
1866                threads.create,  # pyright: ignore[reportDeprecated],
1867            )
1868        )
1869        self.retrieve = (  # pyright: ignore[reportDeprecated]
1870            to_streamed_response_wrapper(
1871                threads.retrieve,  # pyright: ignore[reportDeprecated],
1872            )
1873        )
1874        self.update = (  # pyright: ignore[reportDeprecated]
1875            to_streamed_response_wrapper(
1876                threads.update,  # pyright: ignore[reportDeprecated],
1877            )
1878        )
1879        self.delete = (  # pyright: ignore[reportDeprecated]
1880            to_streamed_response_wrapper(
1881                threads.delete,  # pyright: ignore[reportDeprecated],
1882            )
1883        )
1884        self.create_and_run = (  # pyright: ignore[reportDeprecated]
1885            to_streamed_response_wrapper(
1886                threads.create_and_run,  # pyright: ignore[reportDeprecated],
1887            )
1888        )
1889
1890    @cached_property
1891    def runs(self) -> RunsWithStreamingResponse:
1892        return RunsWithStreamingResponse(self._threads.runs)
1893
1894    @cached_property
1895    def messages(self) -> MessagesWithStreamingResponse:
1896        return MessagesWithStreamingResponse(self._threads.messages)
1897
1898
1899class AsyncThreadsWithStreamingResponse:
1900    def __init__(self, threads: AsyncThreads) -> None:
1901        self._threads = threads
1902
1903        self.create = (  # pyright: ignore[reportDeprecated]
1904            async_to_streamed_response_wrapper(
1905                threads.create,  # pyright: ignore[reportDeprecated],
1906            )
1907        )
1908        self.retrieve = (  # pyright: ignore[reportDeprecated]
1909            async_to_streamed_response_wrapper(
1910                threads.retrieve,  # pyright: ignore[reportDeprecated],
1911            )
1912        )
1913        self.update = (  # pyright: ignore[reportDeprecated]
1914            async_to_streamed_response_wrapper(
1915                threads.update,  # pyright: ignore[reportDeprecated],
1916            )
1917        )
1918        self.delete = (  # pyright: ignore[reportDeprecated]
1919            async_to_streamed_response_wrapper(
1920                threads.delete,  # pyright: ignore[reportDeprecated],
1921            )
1922        )
1923        self.create_and_run = (  # pyright: ignore[reportDeprecated]
1924            async_to_streamed_response_wrapper(
1925                threads.create_and_run,  # pyright: ignore[reportDeprecated],
1926            )
1927        )
1928
1929    @cached_property
1930    def runs(self) -> AsyncRunsWithStreamingResponse:
1931        return AsyncRunsWithStreamingResponse(self._threads.runs)
1932
1933    @cached_property
1934    def messages(self) -> AsyncMessagesWithStreamingResponse:
1935        return AsyncMessagesWithStreamingResponse(self._threads.messages)