main
1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3from __future__ import annotations
4
5import typing_extensions
6from typing import Union, Iterable, Optional
7from functools import partial
8from typing_extensions import Literal, overload
9
10import httpx
11
12from .... import _legacy_response
13from .messages import (
14 Messages,
15 AsyncMessages,
16 MessagesWithRawResponse,
17 AsyncMessagesWithRawResponse,
18 MessagesWithStreamingResponse,
19 AsyncMessagesWithStreamingResponse,
20)
21from ...._types import NOT_GIVEN, Body, Omit, Query, Headers, NotGiven, omit, not_given
22from ...._utils import required_args, maybe_transform, async_maybe_transform
23from .runs.runs import (
24 Runs,
25 AsyncRuns,
26 RunsWithRawResponse,
27 AsyncRunsWithRawResponse,
28 RunsWithStreamingResponse,
29 AsyncRunsWithStreamingResponse,
30)
31from ...._compat import cached_property
32from ...._resource import SyncAPIResource, AsyncAPIResource
33from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
34from ...._streaming import Stream, AsyncStream
35from ....types.beta import (
36 thread_create_params,
37 thread_update_params,
38 thread_create_and_run_params,
39)
40from ...._base_client import make_request_options
41from ....lib.streaming import (
42 AssistantEventHandler,
43 AssistantEventHandlerT,
44 AssistantStreamManager,
45 AsyncAssistantEventHandler,
46 AsyncAssistantEventHandlerT,
47 AsyncAssistantStreamManager,
48)
49from ....types.beta.thread import Thread
50from ....types.beta.threads.run import Run
51from ....types.shared.chat_model import ChatModel
52from ....types.beta.thread_deleted import ThreadDeleted
53from ....types.shared_params.metadata import Metadata
54from ....types.beta.assistant_tool_param import AssistantToolParam
55from ....types.beta.assistant_stream_event import AssistantStreamEvent
56from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
57from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
58
59__all__ = ["Threads", "AsyncThreads"]
60
61
62class Threads(SyncAPIResource):
63 @cached_property
64 def runs(self) -> Runs:
65 return Runs(self._client)
66
67 @cached_property
68 def messages(self) -> Messages:
69 return Messages(self._client)
70
71 @cached_property
72 def with_raw_response(self) -> ThreadsWithRawResponse:
73 """
74 This property can be used as a prefix for any HTTP method call to return
75 the raw response object instead of the parsed content.
76
77 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
78 """
79 return ThreadsWithRawResponse(self)
80
81 @cached_property
82 def with_streaming_response(self) -> ThreadsWithStreamingResponse:
83 """
84 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
85
86 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
87 """
88 return ThreadsWithStreamingResponse(self)
89
90 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
91 def create(
92 self,
93 *,
94 messages: Iterable[thread_create_params.Message] | Omit = omit,
95 metadata: Optional[Metadata] | Omit = omit,
96 tool_resources: Optional[thread_create_params.ToolResources] | Omit = omit,
97 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
98 # The extra values given here take precedence over values defined on the client or passed to this method.
99 extra_headers: Headers | None = None,
100 extra_query: Query | None = None,
101 extra_body: Body | None = None,
102 timeout: float | httpx.Timeout | None | NotGiven = not_given,
103 ) -> Thread:
104 """
105 Create a thread.
106
107 Args:
108 messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
109 start the thread with.
110
111 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
112 for storing additional information about the object in a structured format, and
113 querying for objects via API or the dashboard.
114
115 Keys are strings with a maximum length of 64 characters. Values are strings with
116 a maximum length of 512 characters.
117
118 tool_resources: A set of resources that are made available to the assistant's tools in this
119 thread. The resources are specific to the type of tool. For example, the
120 `code_interpreter` tool requires a list of file IDs, while the `file_search`
121 tool requires a list of vector store IDs.
122
123 extra_headers: Send extra headers
124
125 extra_query: Add additional query parameters to the request
126
127 extra_body: Add additional JSON properties to the request
128
129 timeout: Override the client-level default timeout for this request, in seconds
130 """
131 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
132 return self._post(
133 "/threads",
134 body=maybe_transform(
135 {
136 "messages": messages,
137 "metadata": metadata,
138 "tool_resources": tool_resources,
139 },
140 thread_create_params.ThreadCreateParams,
141 ),
142 options=make_request_options(
143 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
144 ),
145 cast_to=Thread,
146 )
147
148 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
149 def retrieve(
150 self,
151 thread_id: str,
152 *,
153 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
154 # The extra values given here take precedence over values defined on the client or passed to this method.
155 extra_headers: Headers | None = None,
156 extra_query: Query | None = None,
157 extra_body: Body | None = None,
158 timeout: float | httpx.Timeout | None | NotGiven = not_given,
159 ) -> Thread:
160 """
161 Retrieves a thread.
162
163 Args:
164 extra_headers: Send extra headers
165
166 extra_query: Add additional query parameters to the request
167
168 extra_body: Add additional JSON properties to the request
169
170 timeout: Override the client-level default timeout for this request, in seconds
171 """
172 if not thread_id:
173 raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
174 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
175 return self._get(
176 f"/threads/{thread_id}",
177 options=make_request_options(
178 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
179 ),
180 cast_to=Thread,
181 )
182
183 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
184 def update(
185 self,
186 thread_id: str,
187 *,
188 metadata: Optional[Metadata] | Omit = omit,
189 tool_resources: Optional[thread_update_params.ToolResources] | Omit = omit,
190 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
191 # The extra values given here take precedence over values defined on the client or passed to this method.
192 extra_headers: Headers | None = None,
193 extra_query: Query | None = None,
194 extra_body: Body | None = None,
195 timeout: float | httpx.Timeout | None | NotGiven = not_given,
196 ) -> Thread:
197 """
198 Modifies a thread.
199
200 Args:
201 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
202 for storing additional information about the object in a structured format, and
203 querying for objects via API or the dashboard.
204
205 Keys are strings with a maximum length of 64 characters. Values are strings with
206 a maximum length of 512 characters.
207
208 tool_resources: A set of resources that are made available to the assistant's tools in this
209 thread. The resources are specific to the type of tool. For example, the
210 `code_interpreter` tool requires a list of file IDs, while the `file_search`
211 tool requires a list of vector store IDs.
212
213 extra_headers: Send extra headers
214
215 extra_query: Add additional query parameters to the request
216
217 extra_body: Add additional JSON properties to the request
218
219 timeout: Override the client-level default timeout for this request, in seconds
220 """
221 if not thread_id:
222 raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
223 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
224 return self._post(
225 f"/threads/{thread_id}",
226 body=maybe_transform(
227 {
228 "metadata": metadata,
229 "tool_resources": tool_resources,
230 },
231 thread_update_params.ThreadUpdateParams,
232 ),
233 options=make_request_options(
234 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
235 ),
236 cast_to=Thread,
237 )
238
239 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
240 def delete(
241 self,
242 thread_id: str,
243 *,
244 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
245 # The extra values given here take precedence over values defined on the client or passed to this method.
246 extra_headers: Headers | None = None,
247 extra_query: Query | None = None,
248 extra_body: Body | None = None,
249 timeout: float | httpx.Timeout | None | NotGiven = not_given,
250 ) -> ThreadDeleted:
251 """
252 Delete a thread.
253
254 Args:
255 extra_headers: Send extra headers
256
257 extra_query: Add additional query parameters to the request
258
259 extra_body: Add additional JSON properties to the request
260
261 timeout: Override the client-level default timeout for this request, in seconds
262 """
263 if not thread_id:
264 raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
265 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
266 return self._delete(
267 f"/threads/{thread_id}",
268 options=make_request_options(
269 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
270 ),
271 cast_to=ThreadDeleted,
272 )
273
274 @overload
275 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
276 def create_and_run(
277 self,
278 *,
279 assistant_id: str,
280 instructions: Optional[str] | Omit = omit,
281 max_completion_tokens: Optional[int] | Omit = omit,
282 max_prompt_tokens: Optional[int] | Omit = omit,
283 metadata: Optional[Metadata] | Omit = omit,
284 model: Union[str, ChatModel, None] | Omit = omit,
285 parallel_tool_calls: bool | Omit = omit,
286 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
287 stream: Optional[Literal[False]] | Omit = omit,
288 temperature: Optional[float] | Omit = omit,
289 thread: thread_create_and_run_params.Thread | Omit = omit,
290 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
291 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
292 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
293 top_p: Optional[float] | Omit = omit,
294 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
295 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
296 # The extra values given here take precedence over values defined on the client or passed to this method.
297 extra_headers: Headers | None = None,
298 extra_query: Query | None = None,
299 extra_body: Body | None = None,
300 timeout: float | httpx.Timeout | None | NotGiven = not_given,
301 ) -> Run:
302 """
303 Create a thread and run it in one request.
304
305 Args:
306 assistant_id: The ID of the
307 [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
308 execute this run.
309
310 instructions: Override the default system message of the assistant. This is useful for
311 modifying the behavior on a per-run basis.
312
313 max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
314 run. The run will make a best effort to use only the number of completion tokens
315 specified, across multiple turns of the run. If the run exceeds the number of
316 completion tokens specified, the run will end with status `incomplete`. See
317 `incomplete_details` for more info.
318
319 max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
320 The run will make a best effort to use only the number of prompt tokens
321 specified, across multiple turns of the run. If the run exceeds the number of
322 prompt tokens specified, the run will end with status `incomplete`. See
323 `incomplete_details` for more info.
324
325 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
326 for storing additional information about the object in a structured format, and
327 querying for objects via API or the dashboard.
328
329 Keys are strings with a maximum length of 64 characters. Values are strings with
330 a maximum length of 512 characters.
331
332 model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
333 be used to execute this run. If a value is provided here, it will override the
334 model associated with the assistant. If not, the model associated with the
335 assistant will be used.
336
337 parallel_tool_calls: Whether to enable
338 [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
339 during tool use.
340
341 response_format: Specifies the format that the model must output. Compatible with
342 [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
343 [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
344 and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
345
346 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
347 Outputs which ensures the model will match your supplied JSON schema. Learn more
348 in the
349 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
350
351 Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
352 message the model generates is valid JSON.
353
354 **Important:** when using JSON mode, you **must** also instruct the model to
355 produce JSON yourself via a system or user message. Without this, the model may
356 generate an unending stream of whitespace until the generation reaches the token
357 limit, resulting in a long-running and seemingly "stuck" request. Also note that
358 the message content may be partially cut off if `finish_reason="length"`, which
359 indicates the generation exceeded `max_tokens` or the conversation exceeded the
360 max context length.
361
362 stream: If `true`, returns a stream of events that happen during the Run as server-sent
363 events, terminating when the Run enters a terminal state with a `data: [DONE]`
364 message.
365
366 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
367 make the output more random, while lower values like 0.2 will make it more
368 focused and deterministic.
369
370 thread: Options to create a new thread. If no thread is provided when running a request,
371 an empty thread will be created.
372
373 tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
374 not call any tools and instead generates a message. `auto` is the default value
375 and means the model can pick between generating a message or calling one or more
376 tools. `required` means the model must call one or more tools before responding
377 to the user. Specifying a particular tool like `{"type": "file_search"}` or
378 `{"type": "function", "function": {"name": "my_function"}}` forces the model to
379 call that tool.
380
381 tool_resources: A set of resources that are used by the assistant's tools. The resources are
382 specific to the type of tool. For example, the `code_interpreter` tool requires
383 a list of file IDs, while the `file_search` tool requires a list of vector store
384 IDs.
385
386 tools: Override the tools the assistant can use for this run. This is useful for
387 modifying the behavior on a per-run basis.
388
389 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
390 model considers the results of the tokens with top_p probability mass. So 0.1
391 means only the tokens comprising the top 10% probability mass are considered.
392
393 We generally recommend altering this or temperature but not both.
394
395 truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
396 control the initial context window of the run.
397
398 extra_headers: Send extra headers
399
400 extra_query: Add additional query parameters to the request
401
402 extra_body: Add additional JSON properties to the request
403
404 timeout: Override the client-level default timeout for this request, in seconds
405 """
406 ...
407
408 @overload
409 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
410 def create_and_run(
411 self,
412 *,
413 assistant_id: str,
414 stream: Literal[True],
415 instructions: Optional[str] | Omit = omit,
416 max_completion_tokens: Optional[int] | Omit = omit,
417 max_prompt_tokens: Optional[int] | Omit = omit,
418 metadata: Optional[Metadata] | Omit = omit,
419 model: Union[str, ChatModel, None] | Omit = omit,
420 parallel_tool_calls: bool | Omit = omit,
421 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
422 temperature: Optional[float] | Omit = omit,
423 thread: thread_create_and_run_params.Thread | Omit = omit,
424 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
425 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
426 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
427 top_p: Optional[float] | Omit = omit,
428 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
429 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
430 # The extra values given here take precedence over values defined on the client or passed to this method.
431 extra_headers: Headers | None = None,
432 extra_query: Query | None = None,
433 extra_body: Body | None = None,
434 timeout: float | httpx.Timeout | None | NotGiven = not_given,
435 ) -> Stream[AssistantStreamEvent]:
436 """
437 Create a thread and run it in one request.
438
439 Args:
440 assistant_id: The ID of the
441 [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
442 execute this run.
443
444 stream: If `true`, returns a stream of events that happen during the Run as server-sent
445 events, terminating when the Run enters a terminal state with a `data: [DONE]`
446 message.
447
448 instructions: Override the default system message of the assistant. This is useful for
449 modifying the behavior on a per-run basis.
450
451 max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
452 run. The run will make a best effort to use only the number of completion tokens
453 specified, across multiple turns of the run. If the run exceeds the number of
454 completion tokens specified, the run will end with status `incomplete`. See
455 `incomplete_details` for more info.
456
457 max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
458 The run will make a best effort to use only the number of prompt tokens
459 specified, across multiple turns of the run. If the run exceeds the number of
460 prompt tokens specified, the run will end with status `incomplete`. See
461 `incomplete_details` for more info.
462
463 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
464 for storing additional information about the object in a structured format, and
465 querying for objects via API or the dashboard.
466
467 Keys are strings with a maximum length of 64 characters. Values are strings with
468 a maximum length of 512 characters.
469
470 model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
471 be used to execute this run. If a value is provided here, it will override the
472 model associated with the assistant. If not, the model associated with the
473 assistant will be used.
474
475 parallel_tool_calls: Whether to enable
476 [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
477 during tool use.
478
479 response_format: Specifies the format that the model must output. Compatible with
480 [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
481 [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
482 and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
483
484 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
485 Outputs which ensures the model will match your supplied JSON schema. Learn more
486 in the
487 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
488
489 Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
490 message the model generates is valid JSON.
491
492 **Important:** when using JSON mode, you **must** also instruct the model to
493 produce JSON yourself via a system or user message. Without this, the model may
494 generate an unending stream of whitespace until the generation reaches the token
495 limit, resulting in a long-running and seemingly "stuck" request. Also note that
496 the message content may be partially cut off if `finish_reason="length"`, which
497 indicates the generation exceeded `max_tokens` or the conversation exceeded the
498 max context length.
499
500 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
501 make the output more random, while lower values like 0.2 will make it more
502 focused and deterministic.
503
504 thread: Options to create a new thread. If no thread is provided when running a request,
505 an empty thread will be created.
506
507 tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
508 not call any tools and instead generates a message. `auto` is the default value
509 and means the model can pick between generating a message or calling one or more
510 tools. `required` means the model must call one or more tools before responding
511 to the user. Specifying a particular tool like `{"type": "file_search"}` or
512 `{"type": "function", "function": {"name": "my_function"}}` forces the model to
513 call that tool.
514
515 tool_resources: A set of resources that are used by the assistant's tools. The resources are
516 specific to the type of tool. For example, the `code_interpreter` tool requires
517 a list of file IDs, while the `file_search` tool requires a list of vector store
518 IDs.
519
520 tools: Override the tools the assistant can use for this run. This is useful for
521 modifying the behavior on a per-run basis.
522
523 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
524 model considers the results of the tokens with top_p probability mass. So 0.1
525 means only the tokens comprising the top 10% probability mass are considered.
526
527 We generally recommend altering this or temperature but not both.
528
529 truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
530 control the initial context window of the run.
531
532 extra_headers: Send extra headers
533
534 extra_query: Add additional query parameters to the request
535
536 extra_body: Add additional JSON properties to the request
537
538 timeout: Override the client-level default timeout for this request, in seconds
539 """
540 ...
541
542 @overload
543 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
544 def create_and_run(
545 self,
546 *,
547 assistant_id: str,
548 stream: bool,
549 instructions: Optional[str] | Omit = omit,
550 max_completion_tokens: Optional[int] | Omit = omit,
551 max_prompt_tokens: Optional[int] | Omit = omit,
552 metadata: Optional[Metadata] | Omit = omit,
553 model: Union[str, ChatModel, None] | Omit = omit,
554 parallel_tool_calls: bool | Omit = omit,
555 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
556 temperature: Optional[float] | Omit = omit,
557 thread: thread_create_and_run_params.Thread | Omit = omit,
558 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
559 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
560 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
561 top_p: Optional[float] | Omit = omit,
562 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
563 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
564 # The extra values given here take precedence over values defined on the client or passed to this method.
565 extra_headers: Headers | None = None,
566 extra_query: Query | None = None,
567 extra_body: Body | None = None,
568 timeout: float | httpx.Timeout | None | NotGiven = not_given,
569 ) -> Run | Stream[AssistantStreamEvent]:
570 """
571 Create a thread and run it in one request.
572
573 Args:
574 assistant_id: The ID of the
575 [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
576 execute this run.
577
578 stream: If `true`, returns a stream of events that happen during the Run as server-sent
579 events, terminating when the Run enters a terminal state with a `data: [DONE]`
580 message.
581
582 instructions: Override the default system message of the assistant. This is useful for
583 modifying the behavior on a per-run basis.
584
585 max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
586 run. The run will make a best effort to use only the number of completion tokens
587 specified, across multiple turns of the run. If the run exceeds the number of
588 completion tokens specified, the run will end with status `incomplete`. See
589 `incomplete_details` for more info.
590
591 max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
592 The run will make a best effort to use only the number of prompt tokens
593 specified, across multiple turns of the run. If the run exceeds the number of
594 prompt tokens specified, the run will end with status `incomplete`. See
595 `incomplete_details` for more info.
596
597 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
598 for storing additional information about the object in a structured format, and
599 querying for objects via API or the dashboard.
600
601 Keys are strings with a maximum length of 64 characters. Values are strings with
602 a maximum length of 512 characters.
603
604 model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
605 be used to execute this run. If a value is provided here, it will override the
606 model associated with the assistant. If not, the model associated with the
607 assistant will be used.
608
609 parallel_tool_calls: Whether to enable
610 [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
611 during tool use.
612
613 response_format: Specifies the format that the model must output. Compatible with
614 [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
615 [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
616 and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
617
618 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
619 Outputs which ensures the model will match your supplied JSON schema. Learn more
620 in the
621 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
622
623 Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
624 message the model generates is valid JSON.
625
626 **Important:** when using JSON mode, you **must** also instruct the model to
627 produce JSON yourself via a system or user message. Without this, the model may
628 generate an unending stream of whitespace until the generation reaches the token
629 limit, resulting in a long-running and seemingly "stuck" request. Also note that
630 the message content may be partially cut off if `finish_reason="length"`, which
631 indicates the generation exceeded `max_tokens` or the conversation exceeded the
632 max context length.
633
634 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
635 make the output more random, while lower values like 0.2 will make it more
636 focused and deterministic.
637
638 thread: Options to create a new thread. If no thread is provided when running a request,
639 an empty thread will be created.
640
641 tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
642 not call any tools and instead generates a message. `auto` is the default value
643 and means the model can pick between generating a message or calling one or more
644 tools. `required` means the model must call one or more tools before responding
645 to the user. Specifying a particular tool like `{"type": "file_search"}` or
646 `{"type": "function", "function": {"name": "my_function"}}` forces the model to
647 call that tool.
648
649 tool_resources: A set of resources that are used by the assistant's tools. The resources are
650 specific to the type of tool. For example, the `code_interpreter` tool requires
651 a list of file IDs, while the `file_search` tool requires a list of vector store
652 IDs.
653
654 tools: Override the tools the assistant can use for this run. This is useful for
655 modifying the behavior on a per-run basis.
656
657 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
658 model considers the results of the tokens with top_p probability mass. So 0.1
659 means only the tokens comprising the top 10% probability mass are considered.
660
661 We generally recommend altering this or temperature but not both.
662
663 truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
664 control the initial context window of the run.
665
666 extra_headers: Send extra headers
667
668 extra_query: Add additional query parameters to the request
669
670 extra_body: Add additional JSON properties to the request
671
672 timeout: Override the client-level default timeout for this request, in seconds
673 """
674 ...
675
676 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
677 @required_args(["assistant_id"], ["assistant_id", "stream"])
678 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
679 def create_and_run(
680 self,
681 *,
682 assistant_id: str,
683 instructions: Optional[str] | Omit = omit,
684 max_completion_tokens: Optional[int] | Omit = omit,
685 max_prompt_tokens: Optional[int] | Omit = omit,
686 metadata: Optional[Metadata] | Omit = omit,
687 model: Union[str, ChatModel, None] | Omit = omit,
688 parallel_tool_calls: bool | Omit = omit,
689 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
690 stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
691 temperature: Optional[float] | Omit = omit,
692 thread: thread_create_and_run_params.Thread | Omit = omit,
693 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
694 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
695 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
696 top_p: Optional[float] | Omit = omit,
697 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
698 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
699 # The extra values given here take precedence over values defined on the client or passed to this method.
700 extra_headers: Headers | None = None,
701 extra_query: Query | None = None,
702 extra_body: Body | None = None,
703 timeout: float | httpx.Timeout | None | NotGiven = not_given,
704 ) -> Run | Stream[AssistantStreamEvent]:
705 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
706 return self._post(
707 "/threads/runs",
708 body=maybe_transform(
709 {
710 "assistant_id": assistant_id,
711 "instructions": instructions,
712 "max_completion_tokens": max_completion_tokens,
713 "max_prompt_tokens": max_prompt_tokens,
714 "metadata": metadata,
715 "model": model,
716 "parallel_tool_calls": parallel_tool_calls,
717 "response_format": response_format,
718 "stream": stream,
719 "temperature": temperature,
720 "thread": thread,
721 "tool_choice": tool_choice,
722 "tool_resources": tool_resources,
723 "tools": tools,
724 "top_p": top_p,
725 "truncation_strategy": truncation_strategy,
726 },
727 thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
728 if stream
729 else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
730 ),
731 options=make_request_options(
732 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
733 ),
734 cast_to=Run,
735 stream=stream or False,
736 stream_cls=Stream[AssistantStreamEvent],
737 )
738
739 def create_and_run_poll(
740 self,
741 *,
742 assistant_id: str,
743 instructions: Optional[str] | Omit = omit,
744 max_completion_tokens: Optional[int] | Omit = omit,
745 max_prompt_tokens: Optional[int] | Omit = omit,
746 metadata: Optional[Metadata] | Omit = omit,
747 model: Union[str, ChatModel, None] | Omit = omit,
748 parallel_tool_calls: bool | Omit = omit,
749 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
750 temperature: Optional[float] | Omit = omit,
751 thread: thread_create_and_run_params.Thread | Omit = omit,
752 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
753 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
754 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
755 top_p: Optional[float] | Omit = omit,
756 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
757 poll_interval_ms: int | Omit = omit,
758 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
759 # The extra values given here take precedence over values defined on the client or passed to this method.
760 extra_headers: Headers | None = None,
761 extra_query: Query | None = None,
762 extra_body: Body | None = None,
763 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
764 ) -> Run:
765 """
766 A helper to create a thread, start a run and then poll for a terminal state.
767 More information on Run lifecycles can be found here:
768 https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
769 """
770 run = self.create_and_run( # pyright: ignore[reportDeprecated]
771 assistant_id=assistant_id,
772 instructions=instructions,
773 max_completion_tokens=max_completion_tokens,
774 max_prompt_tokens=max_prompt_tokens,
775 metadata=metadata,
776 model=model,
777 parallel_tool_calls=parallel_tool_calls,
778 response_format=response_format,
779 temperature=temperature,
780 stream=False,
781 thread=thread,
782 tool_resources=tool_resources,
783 tool_choice=tool_choice,
784 truncation_strategy=truncation_strategy,
785 top_p=top_p,
786 tools=tools,
787 extra_headers=extra_headers,
788 extra_query=extra_query,
789 extra_body=extra_body,
790 timeout=timeout,
791 )
792 return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms) # pyright: ignore[reportDeprecated]
793
794 @overload
795 def create_and_run_stream(
796 self,
797 *,
798 assistant_id: str,
799 instructions: Optional[str] | Omit = omit,
800 max_completion_tokens: Optional[int] | Omit = omit,
801 max_prompt_tokens: Optional[int] | Omit = omit,
802 metadata: Optional[Metadata] | Omit = omit,
803 model: Union[str, ChatModel, None] | Omit = omit,
804 parallel_tool_calls: bool | Omit = omit,
805 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
806 temperature: Optional[float] | Omit = omit,
807 thread: thread_create_and_run_params.Thread | Omit = omit,
808 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
809 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
810 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
811 top_p: Optional[float] | Omit = omit,
812 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
813 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
814 # The extra values given here take precedence over values defined on the client or passed to this method.
815 extra_headers: Headers | None = None,
816 extra_query: Query | None = None,
817 extra_body: Body | None = None,
818 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
819 ) -> AssistantStreamManager[AssistantEventHandler]:
820 """Create a thread and stream the run back"""
821 ...
822
823 @overload
824 def create_and_run_stream(
825 self,
826 *,
827 assistant_id: str,
828 instructions: Optional[str] | Omit = omit,
829 max_completion_tokens: Optional[int] | Omit = omit,
830 max_prompt_tokens: Optional[int] | Omit = omit,
831 metadata: Optional[Metadata] | Omit = omit,
832 model: Union[str, ChatModel, None] | Omit = omit,
833 parallel_tool_calls: bool | Omit = omit,
834 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
835 temperature: Optional[float] | Omit = omit,
836 thread: thread_create_and_run_params.Thread | Omit = omit,
837 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
838 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
839 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
840 top_p: Optional[float] | Omit = omit,
841 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
842 event_handler: AssistantEventHandlerT,
843 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
844 # The extra values given here take precedence over values defined on the client or passed to this method.
845 extra_headers: Headers | None = None,
846 extra_query: Query | None = None,
847 extra_body: Body | None = None,
848 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
849 ) -> AssistantStreamManager[AssistantEventHandlerT]:
850 """Create a thread and stream the run back"""
851 ...
852
853 def create_and_run_stream(
854 self,
855 *,
856 assistant_id: str,
857 instructions: Optional[str] | Omit = omit,
858 max_completion_tokens: Optional[int] | Omit = omit,
859 max_prompt_tokens: Optional[int] | Omit = omit,
860 metadata: Optional[Metadata] | Omit = omit,
861 model: Union[str, ChatModel, None] | Omit = omit,
862 parallel_tool_calls: bool | Omit = omit,
863 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
864 temperature: Optional[float] | Omit = omit,
865 thread: thread_create_and_run_params.Thread | Omit = omit,
866 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
867 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
868 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
869 top_p: Optional[float] | Omit = omit,
870 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
871 event_handler: AssistantEventHandlerT | None = None,
872 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
873 # The extra values given here take precedence over values defined on the client or passed to this method.
874 extra_headers: Headers | None = None,
875 extra_query: Query | None = None,
876 extra_body: Body | None = None,
877 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
878 ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
879 """Create a thread and stream the run back"""
880 extra_headers = {
881 "OpenAI-Beta": "assistants=v2",
882 "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
883 "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
884 **(extra_headers or {}),
885 }
886 make_request = partial(
887 self._post,
888 "/threads/runs",
889 body=maybe_transform(
890 {
891 "assistant_id": assistant_id,
892 "instructions": instructions,
893 "max_completion_tokens": max_completion_tokens,
894 "max_prompt_tokens": max_prompt_tokens,
895 "metadata": metadata,
896 "model": model,
897 "parallel_tool_calls": parallel_tool_calls,
898 "response_format": response_format,
899 "temperature": temperature,
900 "tool_choice": tool_choice,
901 "stream": True,
902 "thread": thread,
903 "tools": tools,
904 "tool_resources": tool_resources,
905 "truncation_strategy": truncation_strategy,
906 "top_p": top_p,
907 },
908 thread_create_and_run_params.ThreadCreateAndRunParams,
909 ),
910 options=make_request_options(
911 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
912 ),
913 cast_to=Run,
914 stream=True,
915 stream_cls=Stream[AssistantStreamEvent],
916 )
917 return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
918
919
920class AsyncThreads(AsyncAPIResource):
921 @cached_property
922 def runs(self) -> AsyncRuns:
923 return AsyncRuns(self._client)
924
925 @cached_property
926 def messages(self) -> AsyncMessages:
927 return AsyncMessages(self._client)
928
929 @cached_property
930 def with_raw_response(self) -> AsyncThreadsWithRawResponse:
931 """
932 This property can be used as a prefix for any HTTP method call to return
933 the raw response object instead of the parsed content.
934
935 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
936 """
937 return AsyncThreadsWithRawResponse(self)
938
939 @cached_property
940 def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
941 """
942 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
943
944 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
945 """
946 return AsyncThreadsWithStreamingResponse(self)
947
948 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
949 async def create(
950 self,
951 *,
952 messages: Iterable[thread_create_params.Message] | Omit = omit,
953 metadata: Optional[Metadata] | Omit = omit,
954 tool_resources: Optional[thread_create_params.ToolResources] | Omit = omit,
955 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
956 # The extra values given here take precedence over values defined on the client or passed to this method.
957 extra_headers: Headers | None = None,
958 extra_query: Query | None = None,
959 extra_body: Body | None = None,
960 timeout: float | httpx.Timeout | None | NotGiven = not_given,
961 ) -> Thread:
962 """
963 Create a thread.
964
965 Args:
966 messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
967 start the thread with.
968
969 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
970 for storing additional information about the object in a structured format, and
971 querying for objects via API or the dashboard.
972
973 Keys are strings with a maximum length of 64 characters. Values are strings with
974 a maximum length of 512 characters.
975
976 tool_resources: A set of resources that are made available to the assistant's tools in this
977 thread. The resources are specific to the type of tool. For example, the
978 `code_interpreter` tool requires a list of file IDs, while the `file_search`
979 tool requires a list of vector store IDs.
980
981 extra_headers: Send extra headers
982
983 extra_query: Add additional query parameters to the request
984
985 extra_body: Add additional JSON properties to the request
986
987 timeout: Override the client-level default timeout for this request, in seconds
988 """
989 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
990 return await self._post(
991 "/threads",
992 body=await async_maybe_transform(
993 {
994 "messages": messages,
995 "metadata": metadata,
996 "tool_resources": tool_resources,
997 },
998 thread_create_params.ThreadCreateParams,
999 ),
1000 options=make_request_options(
1001 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1002 ),
1003 cast_to=Thread,
1004 )
1005
1006 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1007 async def retrieve(
1008 self,
1009 thread_id: str,
1010 *,
1011 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1012 # The extra values given here take precedence over values defined on the client or passed to this method.
1013 extra_headers: Headers | None = None,
1014 extra_query: Query | None = None,
1015 extra_body: Body | None = None,
1016 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1017 ) -> Thread:
1018 """
1019 Retrieves a thread.
1020
1021 Args:
1022 extra_headers: Send extra headers
1023
1024 extra_query: Add additional query parameters to the request
1025
1026 extra_body: Add additional JSON properties to the request
1027
1028 timeout: Override the client-level default timeout for this request, in seconds
1029 """
1030 if not thread_id:
1031 raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
1032 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
1033 return await self._get(
1034 f"/threads/{thread_id}",
1035 options=make_request_options(
1036 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1037 ),
1038 cast_to=Thread,
1039 )
1040
1041 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1042 async def update(
1043 self,
1044 thread_id: str,
1045 *,
1046 metadata: Optional[Metadata] | Omit = omit,
1047 tool_resources: Optional[thread_update_params.ToolResources] | Omit = omit,
1048 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1049 # The extra values given here take precedence over values defined on the client or passed to this method.
1050 extra_headers: Headers | None = None,
1051 extra_query: Query | None = None,
1052 extra_body: Body | None = None,
1053 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1054 ) -> Thread:
1055 """
1056 Modifies a thread.
1057
1058 Args:
1059 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1060 for storing additional information about the object in a structured format, and
1061 querying for objects via API or the dashboard.
1062
1063 Keys are strings with a maximum length of 64 characters. Values are strings with
1064 a maximum length of 512 characters.
1065
1066 tool_resources: A set of resources that are made available to the assistant's tools in this
1067 thread. The resources are specific to the type of tool. For example, the
1068 `code_interpreter` tool requires a list of file IDs, while the `file_search`
1069 tool requires a list of vector store IDs.
1070
1071 extra_headers: Send extra headers
1072
1073 extra_query: Add additional query parameters to the request
1074
1075 extra_body: Add additional JSON properties to the request
1076
1077 timeout: Override the client-level default timeout for this request, in seconds
1078 """
1079 if not thread_id:
1080 raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
1081 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
1082 return await self._post(
1083 f"/threads/{thread_id}",
1084 body=await async_maybe_transform(
1085 {
1086 "metadata": metadata,
1087 "tool_resources": tool_resources,
1088 },
1089 thread_update_params.ThreadUpdateParams,
1090 ),
1091 options=make_request_options(
1092 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1093 ),
1094 cast_to=Thread,
1095 )
1096
1097 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1098 async def delete(
1099 self,
1100 thread_id: str,
1101 *,
1102 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1103 # The extra values given here take precedence over values defined on the client or passed to this method.
1104 extra_headers: Headers | None = None,
1105 extra_query: Query | None = None,
1106 extra_body: Body | None = None,
1107 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1108 ) -> ThreadDeleted:
1109 """
1110 Delete a thread.
1111
1112 Args:
1113 extra_headers: Send extra headers
1114
1115 extra_query: Add additional query parameters to the request
1116
1117 extra_body: Add additional JSON properties to the request
1118
1119 timeout: Override the client-level default timeout for this request, in seconds
1120 """
1121 if not thread_id:
1122 raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
1123 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
1124 return await self._delete(
1125 f"/threads/{thread_id}",
1126 options=make_request_options(
1127 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1128 ),
1129 cast_to=ThreadDeleted,
1130 )
1131
1132 @overload
1133 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1134 async def create_and_run(
1135 self,
1136 *,
1137 assistant_id: str,
1138 instructions: Optional[str] | Omit = omit,
1139 max_completion_tokens: Optional[int] | Omit = omit,
1140 max_prompt_tokens: Optional[int] | Omit = omit,
1141 metadata: Optional[Metadata] | Omit = omit,
1142 model: Union[str, ChatModel, None] | Omit = omit,
1143 parallel_tool_calls: bool | Omit = omit,
1144 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1145 stream: Optional[Literal[False]] | Omit = omit,
1146 temperature: Optional[float] | Omit = omit,
1147 thread: thread_create_and_run_params.Thread | Omit = omit,
1148 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1149 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1150 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1151 top_p: Optional[float] | Omit = omit,
1152 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1153 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1154 # The extra values given here take precedence over values defined on the client or passed to this method.
1155 extra_headers: Headers | None = None,
1156 extra_query: Query | None = None,
1157 extra_body: Body | None = None,
1158 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1159 ) -> Run:
1160 """
1161 Create a thread and run it in one request.
1162
1163 Args:
1164 assistant_id: The ID of the
1165 [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
1166 execute this run.
1167
1168 instructions: Override the default system message of the assistant. This is useful for
1169 modifying the behavior on a per-run basis.
1170
1171 max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
1172 run. The run will make a best effort to use only the number of completion tokens
1173 specified, across multiple turns of the run. If the run exceeds the number of
1174 completion tokens specified, the run will end with status `incomplete`. See
1175 `incomplete_details` for more info.
1176
1177 max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
1178 The run will make a best effort to use only the number of prompt tokens
1179 specified, across multiple turns of the run. If the run exceeds the number of
1180 prompt tokens specified, the run will end with status `incomplete`. See
1181 `incomplete_details` for more info.
1182
1183 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1184 for storing additional information about the object in a structured format, and
1185 querying for objects via API or the dashboard.
1186
1187 Keys are strings with a maximum length of 64 characters. Values are strings with
1188 a maximum length of 512 characters.
1189
1190 model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
1191 be used to execute this run. If a value is provided here, it will override the
1192 model associated with the assistant. If not, the model associated with the
1193 assistant will be used.
1194
1195 parallel_tool_calls: Whether to enable
1196 [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
1197 during tool use.
1198
1199 response_format: Specifies the format that the model must output. Compatible with
1200 [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
1201 [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
1202 and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
1203
1204 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
1205 Outputs which ensures the model will match your supplied JSON schema. Learn more
1206 in the
1207 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
1208
1209 Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
1210 message the model generates is valid JSON.
1211
1212 **Important:** when using JSON mode, you **must** also instruct the model to
1213 produce JSON yourself via a system or user message. Without this, the model may
1214 generate an unending stream of whitespace until the generation reaches the token
1215 limit, resulting in a long-running and seemingly "stuck" request. Also note that
1216 the message content may be partially cut off if `finish_reason="length"`, which
1217 indicates the generation exceeded `max_tokens` or the conversation exceeded the
1218 max context length.
1219
1220 stream: If `true`, returns a stream of events that happen during the Run as server-sent
1221 events, terminating when the Run enters a terminal state with a `data: [DONE]`
1222 message.
1223
1224 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1225 make the output more random, while lower values like 0.2 will make it more
1226 focused and deterministic.
1227
1228 thread: Options to create a new thread. If no thread is provided when running a request,
1229 an empty thread will be created.
1230
1231 tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
1232 not call any tools and instead generates a message. `auto` is the default value
1233 and means the model can pick between generating a message or calling one or more
1234 tools. `required` means the model must call one or more tools before responding
1235 to the user. Specifying a particular tool like `{"type": "file_search"}` or
1236 `{"type": "function", "function": {"name": "my_function"}}` forces the model to
1237 call that tool.
1238
1239 tool_resources: A set of resources that are used by the assistant's tools. The resources are
1240 specific to the type of tool. For example, the `code_interpreter` tool requires
1241 a list of file IDs, while the `file_search` tool requires a list of vector store
1242 IDs.
1243
1244 tools: Override the tools the assistant can use for this run. This is useful for
1245 modifying the behavior on a per-run basis.
1246
1247 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1248 model considers the results of the tokens with top_p probability mass. So 0.1
1249 means only the tokens comprising the top 10% probability mass are considered.
1250
1251 We generally recommend altering this or temperature but not both.
1252
1253 truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
1254 control the initial context window of the run.
1255
1256 extra_headers: Send extra headers
1257
1258 extra_query: Add additional query parameters to the request
1259
1260 extra_body: Add additional JSON properties to the request
1261
1262 timeout: Override the client-level default timeout for this request, in seconds
1263 """
1264 ...
1265
1266 @overload
1267 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1268 async def create_and_run(
1269 self,
1270 *,
1271 assistant_id: str,
1272 stream: Literal[True],
1273 instructions: Optional[str] | Omit = omit,
1274 max_completion_tokens: Optional[int] | Omit = omit,
1275 max_prompt_tokens: Optional[int] | Omit = omit,
1276 metadata: Optional[Metadata] | Omit = omit,
1277 model: Union[str, ChatModel, None] | Omit = omit,
1278 parallel_tool_calls: bool | Omit = omit,
1279 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1280 temperature: Optional[float] | Omit = omit,
1281 thread: thread_create_and_run_params.Thread | Omit = omit,
1282 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1283 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1284 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1285 top_p: Optional[float] | Omit = omit,
1286 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1287 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1288 # The extra values given here take precedence over values defined on the client or passed to this method.
1289 extra_headers: Headers | None = None,
1290 extra_query: Query | None = None,
1291 extra_body: Body | None = None,
1292 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1293 ) -> AsyncStream[AssistantStreamEvent]:
1294 """
1295 Create a thread and run it in one request.
1296
1297 Args:
1298 assistant_id: The ID of the
1299 [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
1300 execute this run.
1301
1302 stream: If `true`, returns a stream of events that happen during the Run as server-sent
1303 events, terminating when the Run enters a terminal state with a `data: [DONE]`
1304 message.
1305
1306 instructions: Override the default system message of the assistant. This is useful for
1307 modifying the behavior on a per-run basis.
1308
1309 max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
1310 run. The run will make a best effort to use only the number of completion tokens
1311 specified, across multiple turns of the run. If the run exceeds the number of
1312 completion tokens specified, the run will end with status `incomplete`. See
1313 `incomplete_details` for more info.
1314
1315 max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
1316 The run will make a best effort to use only the number of prompt tokens
1317 specified, across multiple turns of the run. If the run exceeds the number of
1318 prompt tokens specified, the run will end with status `incomplete`. See
1319 `incomplete_details` for more info.
1320
1321 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1322 for storing additional information about the object in a structured format, and
1323 querying for objects via API or the dashboard.
1324
1325 Keys are strings with a maximum length of 64 characters. Values are strings with
1326 a maximum length of 512 characters.
1327
1328 model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
1329 be used to execute this run. If a value is provided here, it will override the
1330 model associated with the assistant. If not, the model associated with the
1331 assistant will be used.
1332
1333 parallel_tool_calls: Whether to enable
1334 [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
1335 during tool use.
1336
1337 response_format: Specifies the format that the model must output. Compatible with
1338 [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
1339 [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
1340 and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
1341
1342 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
1343 Outputs which ensures the model will match your supplied JSON schema. Learn more
1344 in the
1345 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
1346
1347 Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
1348 message the model generates is valid JSON.
1349
1350 **Important:** when using JSON mode, you **must** also instruct the model to
1351 produce JSON yourself via a system or user message. Without this, the model may
1352 generate an unending stream of whitespace until the generation reaches the token
1353 limit, resulting in a long-running and seemingly "stuck" request. Also note that
1354 the message content may be partially cut off if `finish_reason="length"`, which
1355 indicates the generation exceeded `max_tokens` or the conversation exceeded the
1356 max context length.
1357
1358 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1359 make the output more random, while lower values like 0.2 will make it more
1360 focused and deterministic.
1361
1362 thread: Options to create a new thread. If no thread is provided when running a request,
1363 an empty thread will be created.
1364
1365 tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
1366 not call any tools and instead generates a message. `auto` is the default value
1367 and means the model can pick between generating a message or calling one or more
1368 tools. `required` means the model must call one or more tools before responding
1369 to the user. Specifying a particular tool like `{"type": "file_search"}` or
1370 `{"type": "function", "function": {"name": "my_function"}}` forces the model to
1371 call that tool.
1372
1373 tool_resources: A set of resources that are used by the assistant's tools. The resources are
1374 specific to the type of tool. For example, the `code_interpreter` tool requires
1375 a list of file IDs, while the `file_search` tool requires a list of vector store
1376 IDs.
1377
1378 tools: Override the tools the assistant can use for this run. This is useful for
1379 modifying the behavior on a per-run basis.
1380
1381 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1382 model considers the results of the tokens with top_p probability mass. So 0.1
1383 means only the tokens comprising the top 10% probability mass are considered.
1384
1385 We generally recommend altering this or temperature but not both.
1386
1387 truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
1388 control the initial context window of the run.
1389
1390 extra_headers: Send extra headers
1391
1392 extra_query: Add additional query parameters to the request
1393
1394 extra_body: Add additional JSON properties to the request
1395
1396 timeout: Override the client-level default timeout for this request, in seconds
1397 """
1398 ...
1399
1400 @overload
1401 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1402 async def create_and_run(
1403 self,
1404 *,
1405 assistant_id: str,
1406 stream: bool,
1407 instructions: Optional[str] | Omit = omit,
1408 max_completion_tokens: Optional[int] | Omit = omit,
1409 max_prompt_tokens: Optional[int] | Omit = omit,
1410 metadata: Optional[Metadata] | Omit = omit,
1411 model: Union[str, ChatModel, None] | Omit = omit,
1412 parallel_tool_calls: bool | Omit = omit,
1413 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1414 temperature: Optional[float] | Omit = omit,
1415 thread: thread_create_and_run_params.Thread | Omit = omit,
1416 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1417 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1418 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1419 top_p: Optional[float] | Omit = omit,
1420 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1421 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1422 # The extra values given here take precedence over values defined on the client or passed to this method.
1423 extra_headers: Headers | None = None,
1424 extra_query: Query | None = None,
1425 extra_body: Body | None = None,
1426 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1427 ) -> Run | AsyncStream[AssistantStreamEvent]:
1428 """
1429 Create a thread and run it in one request.
1430
1431 Args:
1432 assistant_id: The ID of the
1433 [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
1434 execute this run.
1435
1436 stream: If `true`, returns a stream of events that happen during the Run as server-sent
1437 events, terminating when the Run enters a terminal state with a `data: [DONE]`
1438 message.
1439
1440 instructions: Override the default system message of the assistant. This is useful for
1441 modifying the behavior on a per-run basis.
1442
1443 max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
1444 run. The run will make a best effort to use only the number of completion tokens
1445 specified, across multiple turns of the run. If the run exceeds the number of
1446 completion tokens specified, the run will end with status `incomplete`. See
1447 `incomplete_details` for more info.
1448
1449 max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
1450 The run will make a best effort to use only the number of prompt tokens
1451 specified, across multiple turns of the run. If the run exceeds the number of
1452 prompt tokens specified, the run will end with status `incomplete`. See
1453 `incomplete_details` for more info.
1454
1455 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1456 for storing additional information about the object in a structured format, and
1457 querying for objects via API or the dashboard.
1458
1459 Keys are strings with a maximum length of 64 characters. Values are strings with
1460 a maximum length of 512 characters.
1461
1462 model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
1463 be used to execute this run. If a value is provided here, it will override the
1464 model associated with the assistant. If not, the model associated with the
1465 assistant will be used.
1466
1467 parallel_tool_calls: Whether to enable
1468 [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
1469 during tool use.
1470
1471 response_format: Specifies the format that the model must output. Compatible with
1472 [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
1473 [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
1474 and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
1475
1476 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
1477 Outputs which ensures the model will match your supplied JSON schema. Learn more
1478 in the
1479 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
1480
1481 Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
1482 message the model generates is valid JSON.
1483
1484 **Important:** when using JSON mode, you **must** also instruct the model to
1485 produce JSON yourself via a system or user message. Without this, the model may
1486 generate an unending stream of whitespace until the generation reaches the token
1487 limit, resulting in a long-running and seemingly "stuck" request. Also note that
1488 the message content may be partially cut off if `finish_reason="length"`, which
1489 indicates the generation exceeded `max_tokens` or the conversation exceeded the
1490 max context length.
1491
1492 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1493 make the output more random, while lower values like 0.2 will make it more
1494 focused and deterministic.
1495
1496 thread: Options to create a new thread. If no thread is provided when running a request,
1497 an empty thread will be created.
1498
1499 tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
1500 not call any tools and instead generates a message. `auto` is the default value
1501 and means the model can pick between generating a message or calling one or more
1502 tools. `required` means the model must call one or more tools before responding
1503 to the user. Specifying a particular tool like `{"type": "file_search"}` or
1504 `{"type": "function", "function": {"name": "my_function"}}` forces the model to
1505 call that tool.
1506
1507 tool_resources: A set of resources that are used by the assistant's tools. The resources are
1508 specific to the type of tool. For example, the `code_interpreter` tool requires
1509 a list of file IDs, while the `file_search` tool requires a list of vector store
1510 IDs.
1511
1512 tools: Override the tools the assistant can use for this run. This is useful for
1513 modifying the behavior on a per-run basis.
1514
1515 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1516 model considers the results of the tokens with top_p probability mass. So 0.1
1517 means only the tokens comprising the top 10% probability mass are considered.
1518
1519 We generally recommend altering this or temperature but not both.
1520
1521 truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
1522 control the initial context window of the run.
1523
1524 extra_headers: Send extra headers
1525
1526 extra_query: Add additional query parameters to the request
1527
1528 extra_body: Add additional JSON properties to the request
1529
1530 timeout: Override the client-level default timeout for this request, in seconds
1531 """
1532 ...
1533
1534 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1535 @required_args(["assistant_id"], ["assistant_id", "stream"])
1536 @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
1537 async def create_and_run(
1538 self,
1539 *,
1540 assistant_id: str,
1541 instructions: Optional[str] | Omit = omit,
1542 max_completion_tokens: Optional[int] | Omit = omit,
1543 max_prompt_tokens: Optional[int] | Omit = omit,
1544 metadata: Optional[Metadata] | Omit = omit,
1545 model: Union[str, ChatModel, None] | Omit = omit,
1546 parallel_tool_calls: bool | Omit = omit,
1547 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1548 stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
1549 temperature: Optional[float] | Omit = omit,
1550 thread: thread_create_and_run_params.Thread | Omit = omit,
1551 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1552 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1553 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1554 top_p: Optional[float] | Omit = omit,
1555 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1556 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1557 # The extra values given here take precedence over values defined on the client or passed to this method.
1558 extra_headers: Headers | None = None,
1559 extra_query: Query | None = None,
1560 extra_body: Body | None = None,
1561 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1562 ) -> Run | AsyncStream[AssistantStreamEvent]:
1563 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
1564 return await self._post(
1565 "/threads/runs",
1566 body=await async_maybe_transform(
1567 {
1568 "assistant_id": assistant_id,
1569 "instructions": instructions,
1570 "max_completion_tokens": max_completion_tokens,
1571 "max_prompt_tokens": max_prompt_tokens,
1572 "metadata": metadata,
1573 "model": model,
1574 "parallel_tool_calls": parallel_tool_calls,
1575 "response_format": response_format,
1576 "stream": stream,
1577 "temperature": temperature,
1578 "thread": thread,
1579 "tool_choice": tool_choice,
1580 "tool_resources": tool_resources,
1581 "tools": tools,
1582 "top_p": top_p,
1583 "truncation_strategy": truncation_strategy,
1584 },
1585 thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
1586 if stream
1587 else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
1588 ),
1589 options=make_request_options(
1590 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1591 ),
1592 cast_to=Run,
1593 stream=stream or False,
1594 stream_cls=AsyncStream[AssistantStreamEvent],
1595 )
1596
1597 async def create_and_run_poll(
1598 self,
1599 *,
1600 assistant_id: str,
1601 instructions: Optional[str] | Omit = omit,
1602 max_completion_tokens: Optional[int] | Omit = omit,
1603 max_prompt_tokens: Optional[int] | Omit = omit,
1604 metadata: Optional[Metadata] | Omit = omit,
1605 model: Union[str, ChatModel, None] | Omit = omit,
1606 parallel_tool_calls: bool | Omit = omit,
1607 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1608 temperature: Optional[float] | Omit = omit,
1609 thread: thread_create_and_run_params.Thread | Omit = omit,
1610 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1611 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1612 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1613 top_p: Optional[float] | Omit = omit,
1614 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1615 poll_interval_ms: int | Omit = omit,
1616 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1617 # The extra values given here take precedence over values defined on the client or passed to this method.
1618 extra_headers: Headers | None = None,
1619 extra_query: Query | None = None,
1620 extra_body: Body | None = None,
1621 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1622 ) -> Run:
1623 """
1624 A helper to create a thread, start a run and then poll for a terminal state.
1625 More information on Run lifecycles can be found here:
1626 https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
1627 """
1628 run = await self.create_and_run( # pyright: ignore[reportDeprecated]
1629 assistant_id=assistant_id,
1630 instructions=instructions,
1631 max_completion_tokens=max_completion_tokens,
1632 max_prompt_tokens=max_prompt_tokens,
1633 metadata=metadata,
1634 model=model,
1635 parallel_tool_calls=parallel_tool_calls,
1636 response_format=response_format,
1637 temperature=temperature,
1638 stream=False,
1639 thread=thread,
1640 tool_resources=tool_resources,
1641 tool_choice=tool_choice,
1642 truncation_strategy=truncation_strategy,
1643 top_p=top_p,
1644 tools=tools,
1645 extra_headers=extra_headers,
1646 extra_query=extra_query,
1647 extra_body=extra_body,
1648 timeout=timeout,
1649 )
1650 return await self.runs.poll( # pyright: ignore[reportDeprecated]
1651 run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms
1652 )
1653
1654 @overload
1655 def create_and_run_stream(
1656 self,
1657 *,
1658 assistant_id: str,
1659 instructions: Optional[str] | Omit = omit,
1660 max_completion_tokens: Optional[int] | Omit = omit,
1661 max_prompt_tokens: Optional[int] | Omit = omit,
1662 metadata: Optional[Metadata] | Omit = omit,
1663 model: Union[str, ChatModel, None] | Omit = omit,
1664 parallel_tool_calls: bool | Omit = omit,
1665 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1666 temperature: Optional[float] | Omit = omit,
1667 thread: thread_create_and_run_params.Thread | Omit = omit,
1668 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1669 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1670 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1671 top_p: Optional[float] | Omit = omit,
1672 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1673 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1674 # The extra values given here take precedence over values defined on the client or passed to this method.
1675 extra_headers: Headers | None = None,
1676 extra_query: Query | None = None,
1677 extra_body: Body | None = None,
1678 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1679 ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
1680 """Create a thread and stream the run back"""
1681 ...
1682
1683 @overload
1684 def create_and_run_stream(
1685 self,
1686 *,
1687 assistant_id: str,
1688 instructions: Optional[str] | Omit = omit,
1689 max_completion_tokens: Optional[int] | Omit = omit,
1690 max_prompt_tokens: Optional[int] | Omit = omit,
1691 metadata: Optional[Metadata] | Omit = omit,
1692 model: Union[str, ChatModel, None] | Omit = omit,
1693 parallel_tool_calls: bool | Omit = omit,
1694 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1695 temperature: Optional[float] | Omit = omit,
1696 thread: thread_create_and_run_params.Thread | Omit = omit,
1697 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1698 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1699 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1700 top_p: Optional[float] | Omit = omit,
1701 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1702 event_handler: AsyncAssistantEventHandlerT,
1703 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1704 # The extra values given here take precedence over values defined on the client or passed to this method.
1705 extra_headers: Headers | None = None,
1706 extra_query: Query | None = None,
1707 extra_body: Body | None = None,
1708 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1709 ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
1710 """Create a thread and stream the run back"""
1711 ...
1712
1713 def create_and_run_stream(
1714 self,
1715 *,
1716 assistant_id: str,
1717 instructions: Optional[str] | Omit = omit,
1718 max_completion_tokens: Optional[int] | Omit = omit,
1719 max_prompt_tokens: Optional[int] | Omit = omit,
1720 metadata: Optional[Metadata] | Omit = omit,
1721 model: Union[str, ChatModel, None] | Omit = omit,
1722 parallel_tool_calls: bool | Omit = omit,
1723 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
1724 temperature: Optional[float] | Omit = omit,
1725 thread: thread_create_and_run_params.Thread | Omit = omit,
1726 tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
1727 tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
1728 tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
1729 top_p: Optional[float] | Omit = omit,
1730 truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
1731 event_handler: AsyncAssistantEventHandlerT | None = None,
1732 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1733 # The extra values given here take precedence over values defined on the client or passed to this method.
1734 extra_headers: Headers | None = None,
1735 extra_query: Query | None = None,
1736 extra_body: Body | None = None,
1737 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1738 ) -> (
1739 AsyncAssistantStreamManager[AsyncAssistantEventHandler]
1740 | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
1741 ):
1742 """Create a thread and stream the run back"""
1743 extra_headers = {
1744 "OpenAI-Beta": "assistants=v2",
1745 "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
1746 "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
1747 **(extra_headers or {}),
1748 }
1749 request = self._post(
1750 "/threads/runs",
1751 body=maybe_transform(
1752 {
1753 "assistant_id": assistant_id,
1754 "instructions": instructions,
1755 "max_completion_tokens": max_completion_tokens,
1756 "max_prompt_tokens": max_prompt_tokens,
1757 "metadata": metadata,
1758 "model": model,
1759 "parallel_tool_calls": parallel_tool_calls,
1760 "response_format": response_format,
1761 "temperature": temperature,
1762 "tool_choice": tool_choice,
1763 "stream": True,
1764 "thread": thread,
1765 "tools": tools,
1766 "tool_resources": tool_resources,
1767 "truncation_strategy": truncation_strategy,
1768 "top_p": top_p,
1769 },
1770 thread_create_and_run_params.ThreadCreateAndRunParams,
1771 ),
1772 options=make_request_options(
1773 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1774 ),
1775 cast_to=Run,
1776 stream=True,
1777 stream_cls=AsyncStream[AssistantStreamEvent],
1778 )
1779 return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
1780
1781
1782class ThreadsWithRawResponse:
1783 def __init__(self, threads: Threads) -> None:
1784 self._threads = threads
1785
1786 self.create = ( # pyright: ignore[reportDeprecated]
1787 _legacy_response.to_raw_response_wrapper(
1788 threads.create, # pyright: ignore[reportDeprecated],
1789 )
1790 )
1791 self.retrieve = ( # pyright: ignore[reportDeprecated]
1792 _legacy_response.to_raw_response_wrapper(
1793 threads.retrieve, # pyright: ignore[reportDeprecated],
1794 )
1795 )
1796 self.update = ( # pyright: ignore[reportDeprecated]
1797 _legacy_response.to_raw_response_wrapper(
1798 threads.update, # pyright: ignore[reportDeprecated],
1799 )
1800 )
1801 self.delete = ( # pyright: ignore[reportDeprecated]
1802 _legacy_response.to_raw_response_wrapper(
1803 threads.delete, # pyright: ignore[reportDeprecated],
1804 )
1805 )
1806 self.create_and_run = ( # pyright: ignore[reportDeprecated]
1807 _legacy_response.to_raw_response_wrapper(
1808 threads.create_and_run, # pyright: ignore[reportDeprecated],
1809 )
1810 )
1811
1812 @cached_property
1813 def runs(self) -> RunsWithRawResponse:
1814 return RunsWithRawResponse(self._threads.runs)
1815
1816 @cached_property
1817 def messages(self) -> MessagesWithRawResponse:
1818 return MessagesWithRawResponse(self._threads.messages)
1819
1820
1821class AsyncThreadsWithRawResponse:
1822 def __init__(self, threads: AsyncThreads) -> None:
1823 self._threads = threads
1824
1825 self.create = ( # pyright: ignore[reportDeprecated]
1826 _legacy_response.async_to_raw_response_wrapper(
1827 threads.create, # pyright: ignore[reportDeprecated],
1828 )
1829 )
1830 self.retrieve = ( # pyright: ignore[reportDeprecated]
1831 _legacy_response.async_to_raw_response_wrapper(
1832 threads.retrieve, # pyright: ignore[reportDeprecated],
1833 )
1834 )
1835 self.update = ( # pyright: ignore[reportDeprecated]
1836 _legacy_response.async_to_raw_response_wrapper(
1837 threads.update, # pyright: ignore[reportDeprecated],
1838 )
1839 )
1840 self.delete = ( # pyright: ignore[reportDeprecated]
1841 _legacy_response.async_to_raw_response_wrapper(
1842 threads.delete, # pyright: ignore[reportDeprecated],
1843 )
1844 )
1845 self.create_and_run = ( # pyright: ignore[reportDeprecated]
1846 _legacy_response.async_to_raw_response_wrapper(
1847 threads.create_and_run, # pyright: ignore[reportDeprecated],
1848 )
1849 )
1850
1851 @cached_property
1852 def runs(self) -> AsyncRunsWithRawResponse:
1853 return AsyncRunsWithRawResponse(self._threads.runs)
1854
1855 @cached_property
1856 def messages(self) -> AsyncMessagesWithRawResponse:
1857 return AsyncMessagesWithRawResponse(self._threads.messages)
1858
1859
1860class ThreadsWithStreamingResponse:
1861 def __init__(self, threads: Threads) -> None:
1862 self._threads = threads
1863
1864 self.create = ( # pyright: ignore[reportDeprecated]
1865 to_streamed_response_wrapper(
1866 threads.create, # pyright: ignore[reportDeprecated],
1867 )
1868 )
1869 self.retrieve = ( # pyright: ignore[reportDeprecated]
1870 to_streamed_response_wrapper(
1871 threads.retrieve, # pyright: ignore[reportDeprecated],
1872 )
1873 )
1874 self.update = ( # pyright: ignore[reportDeprecated]
1875 to_streamed_response_wrapper(
1876 threads.update, # pyright: ignore[reportDeprecated],
1877 )
1878 )
1879 self.delete = ( # pyright: ignore[reportDeprecated]
1880 to_streamed_response_wrapper(
1881 threads.delete, # pyright: ignore[reportDeprecated],
1882 )
1883 )
1884 self.create_and_run = ( # pyright: ignore[reportDeprecated]
1885 to_streamed_response_wrapper(
1886 threads.create_and_run, # pyright: ignore[reportDeprecated],
1887 )
1888 )
1889
1890 @cached_property
1891 def runs(self) -> RunsWithStreamingResponse:
1892 return RunsWithStreamingResponse(self._threads.runs)
1893
1894 @cached_property
1895 def messages(self) -> MessagesWithStreamingResponse:
1896 return MessagesWithStreamingResponse(self._threads.messages)
1897
1898
1899class AsyncThreadsWithStreamingResponse:
1900 def __init__(self, threads: AsyncThreads) -> None:
1901 self._threads = threads
1902
1903 self.create = ( # pyright: ignore[reportDeprecated]
1904 async_to_streamed_response_wrapper(
1905 threads.create, # pyright: ignore[reportDeprecated],
1906 )
1907 )
1908 self.retrieve = ( # pyright: ignore[reportDeprecated]
1909 async_to_streamed_response_wrapper(
1910 threads.retrieve, # pyright: ignore[reportDeprecated],
1911 )
1912 )
1913 self.update = ( # pyright: ignore[reportDeprecated]
1914 async_to_streamed_response_wrapper(
1915 threads.update, # pyright: ignore[reportDeprecated],
1916 )
1917 )
1918 self.delete = ( # pyright: ignore[reportDeprecated]
1919 async_to_streamed_response_wrapper(
1920 threads.delete, # pyright: ignore[reportDeprecated],
1921 )
1922 )
1923 self.create_and_run = ( # pyright: ignore[reportDeprecated]
1924 async_to_streamed_response_wrapper(
1925 threads.create_and_run, # pyright: ignore[reportDeprecated],
1926 )
1927 )
1928
1929 @cached_property
1930 def runs(self) -> AsyncRunsWithStreamingResponse:
1931 return AsyncRunsWithStreamingResponse(self._threads.runs)
1932
1933 @cached_property
1934 def messages(self) -> AsyncMessagesWithStreamingResponse:
1935 return AsyncMessagesWithStreamingResponse(self._threads.messages)