main
1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3from __future__ import annotations
4
5from typing import Union, Iterable, Optional
6from typing_extensions import Literal
7
8import httpx
9
10from ... import _legacy_response
11from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
12from ..._utils import maybe_transform, async_maybe_transform
13from ..._compat import cached_property
14from ..._resource import SyncAPIResource, AsyncAPIResource
15from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
16from ...pagination import SyncCursorPage, AsyncCursorPage
17from ...types.beta import (
18 assistant_list_params,
19 assistant_create_params,
20 assistant_update_params,
21)
22from ..._base_client import AsyncPaginator, make_request_options
23from ...types.beta.assistant import Assistant
24from ...types.shared.chat_model import ChatModel
25from ...types.beta.assistant_deleted import AssistantDeleted
26from ...types.shared_params.metadata import Metadata
27from ...types.shared.reasoning_effort import ReasoningEffort
28from ...types.beta.assistant_tool_param import AssistantToolParam
29from ...types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
30
31__all__ = ["Assistants", "AsyncAssistants"]
32
33
34class Assistants(SyncAPIResource):
35 @cached_property
36 def with_raw_response(self) -> AssistantsWithRawResponse:
37 """
38 This property can be used as a prefix for any HTTP method call to return
39 the raw response object instead of the parsed content.
40
41 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
42 """
43 return AssistantsWithRawResponse(self)
44
45 @cached_property
46 def with_streaming_response(self) -> AssistantsWithStreamingResponse:
47 """
48 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
49
50 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
51 """
52 return AssistantsWithStreamingResponse(self)
53
54 def create(
55 self,
56 *,
57 model: Union[str, ChatModel],
58 description: Optional[str] | Omit = omit,
59 instructions: Optional[str] | Omit = omit,
60 metadata: Optional[Metadata] | Omit = omit,
61 name: Optional[str] | Omit = omit,
62 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
63 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
64 temperature: Optional[float] | Omit = omit,
65 tool_resources: Optional[assistant_create_params.ToolResources] | Omit = omit,
66 tools: Iterable[AssistantToolParam] | Omit = omit,
67 top_p: Optional[float] | Omit = omit,
68 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
69 # The extra values given here take precedence over values defined on the client or passed to this method.
70 extra_headers: Headers | None = None,
71 extra_query: Query | None = None,
72 extra_body: Body | None = None,
73 timeout: float | httpx.Timeout | None | NotGiven = not_given,
74 ) -> Assistant:
75 """
76 Create an assistant with a model and instructions.
77
78 Args:
79 model: ID of the model to use. You can use the
80 [List models](https://platform.openai.com/docs/api-reference/models/list) API to
81 see all of your available models, or see our
82 [Model overview](https://platform.openai.com/docs/models) for descriptions of
83 them.
84
85 description: The description of the assistant. The maximum length is 512 characters.
86
87 instructions: The system instructions that the assistant uses. The maximum length is 256,000
88 characters.
89
90 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
91 for storing additional information about the object in a structured format, and
92 querying for objects via API or the dashboard.
93
94 Keys are strings with a maximum length of 64 characters. Values are strings with
95 a maximum length of 512 characters.
96
97 name: The name of the assistant. The maximum length is 256 characters.
98
99 reasoning_effort: Constrains effort on reasoning for
100 [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
101 supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
102 Reducing reasoning effort can result in faster responses and fewer tokens used
103 on reasoning in a response.
104
105 - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
106 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
107 calls are supported for all reasoning values in gpt-5.1.
108 - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
109 support `none`.
110 - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
111 - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
112
113 response_format: Specifies the format that the model must output. Compatible with
114 [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
115 [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
116 and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
117
118 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
119 Outputs which ensures the model will match your supplied JSON schema. Learn more
120 in the
121 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
122
123 Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
124 message the model generates is valid JSON.
125
126 **Important:** when using JSON mode, you **must** also instruct the model to
127 produce JSON yourself via a system or user message. Without this, the model may
128 generate an unending stream of whitespace until the generation reaches the token
129 limit, resulting in a long-running and seemingly "stuck" request. Also note that
130 the message content may be partially cut off if `finish_reason="length"`, which
131 indicates the generation exceeded `max_tokens` or the conversation exceeded the
132 max context length.
133
134 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
135 make the output more random, while lower values like 0.2 will make it more
136 focused and deterministic.
137
138 tool_resources: A set of resources that are used by the assistant's tools. The resources are
139 specific to the type of tool. For example, the `code_interpreter` tool requires
140 a list of file IDs, while the `file_search` tool requires a list of vector store
141 IDs.
142
143 tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
144 assistant. Tools can be of types `code_interpreter`, `file_search`, or
145 `function`.
146
147 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
148 model considers the results of the tokens with top_p probability mass. So 0.1
149 means only the tokens comprising the top 10% probability mass are considered.
150
151 We generally recommend altering this or temperature but not both.
152
153 extra_headers: Send extra headers
154
155 extra_query: Add additional query parameters to the request
156
157 extra_body: Add additional JSON properties to the request
158
159 timeout: Override the client-level default timeout for this request, in seconds
160 """
161 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
162 return self._post(
163 "/assistants",
164 body=maybe_transform(
165 {
166 "model": model,
167 "description": description,
168 "instructions": instructions,
169 "metadata": metadata,
170 "name": name,
171 "reasoning_effort": reasoning_effort,
172 "response_format": response_format,
173 "temperature": temperature,
174 "tool_resources": tool_resources,
175 "tools": tools,
176 "top_p": top_p,
177 },
178 assistant_create_params.AssistantCreateParams,
179 ),
180 options=make_request_options(
181 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
182 ),
183 cast_to=Assistant,
184 )
185
186 def retrieve(
187 self,
188 assistant_id: str,
189 *,
190 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
191 # The extra values given here take precedence over values defined on the client or passed to this method.
192 extra_headers: Headers | None = None,
193 extra_query: Query | None = None,
194 extra_body: Body | None = None,
195 timeout: float | httpx.Timeout | None | NotGiven = not_given,
196 ) -> Assistant:
197 """
198 Retrieves an assistant.
199
200 Args:
201 extra_headers: Send extra headers
202
203 extra_query: Add additional query parameters to the request
204
205 extra_body: Add additional JSON properties to the request
206
207 timeout: Override the client-level default timeout for this request, in seconds
208 """
209 if not assistant_id:
210 raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
211 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
212 return self._get(
213 f"/assistants/{assistant_id}",
214 options=make_request_options(
215 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
216 ),
217 cast_to=Assistant,
218 )
219
220 def update(
221 self,
222 assistant_id: str,
223 *,
224 description: Optional[str] | Omit = omit,
225 instructions: Optional[str] | Omit = omit,
226 metadata: Optional[Metadata] | Omit = omit,
227 model: Union[
228 str,
229 Literal[
230 "gpt-5",
231 "gpt-5-mini",
232 "gpt-5-nano",
233 "gpt-5-2025-08-07",
234 "gpt-5-mini-2025-08-07",
235 "gpt-5-nano-2025-08-07",
236 "gpt-4.1",
237 "gpt-4.1-mini",
238 "gpt-4.1-nano",
239 "gpt-4.1-2025-04-14",
240 "gpt-4.1-mini-2025-04-14",
241 "gpt-4.1-nano-2025-04-14",
242 "o3-mini",
243 "o3-mini-2025-01-31",
244 "o1",
245 "o1-2024-12-17",
246 "gpt-4o",
247 "gpt-4o-2024-11-20",
248 "gpt-4o-2024-08-06",
249 "gpt-4o-2024-05-13",
250 "gpt-4o-mini",
251 "gpt-4o-mini-2024-07-18",
252 "gpt-4.5-preview",
253 "gpt-4.5-preview-2025-02-27",
254 "gpt-4-turbo",
255 "gpt-4-turbo-2024-04-09",
256 "gpt-4-0125-preview",
257 "gpt-4-turbo-preview",
258 "gpt-4-1106-preview",
259 "gpt-4-vision-preview",
260 "gpt-4",
261 "gpt-4-0314",
262 "gpt-4-0613",
263 "gpt-4-32k",
264 "gpt-4-32k-0314",
265 "gpt-4-32k-0613",
266 "gpt-3.5-turbo",
267 "gpt-3.5-turbo-16k",
268 "gpt-3.5-turbo-0613",
269 "gpt-3.5-turbo-1106",
270 "gpt-3.5-turbo-0125",
271 "gpt-3.5-turbo-16k-0613",
272 ],
273 ]
274 | Omit = omit,
275 name: Optional[str] | Omit = omit,
276 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
277 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
278 temperature: Optional[float] | Omit = omit,
279 tool_resources: Optional[assistant_update_params.ToolResources] | Omit = omit,
280 tools: Iterable[AssistantToolParam] | Omit = omit,
281 top_p: Optional[float] | Omit = omit,
282 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
283 # The extra values given here take precedence over values defined on the client or passed to this method.
284 extra_headers: Headers | None = None,
285 extra_query: Query | None = None,
286 extra_body: Body | None = None,
287 timeout: float | httpx.Timeout | None | NotGiven = not_given,
288 ) -> Assistant:
289 """Modifies an assistant.
290
291 Args:
292 description: The description of the assistant.
293
294 The maximum length is 512 characters.
295
296 instructions: The system instructions that the assistant uses. The maximum length is 256,000
297 characters.
298
299 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
300 for storing additional information about the object in a structured format, and
301 querying for objects via API or the dashboard.
302
303 Keys are strings with a maximum length of 64 characters. Values are strings with
304 a maximum length of 512 characters.
305
306 model: ID of the model to use. You can use the
307 [List models](https://platform.openai.com/docs/api-reference/models/list) API to
308 see all of your available models, or see our
309 [Model overview](https://platform.openai.com/docs/models) for descriptions of
310 them.
311
312 name: The name of the assistant. The maximum length is 256 characters.
313
314 reasoning_effort: Constrains effort on reasoning for
315 [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
316 supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
317 Reducing reasoning effort can result in faster responses and fewer tokens used
318 on reasoning in a response.
319
320 - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
321 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
322 calls are supported for all reasoning values in gpt-5.1.
323 - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
324 support `none`.
325 - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
326 - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
327
328 response_format: Specifies the format that the model must output. Compatible with
329 [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
330 [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
331 and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
332
333 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
334 Outputs which ensures the model will match your supplied JSON schema. Learn more
335 in the
336 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
337
338 Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
339 message the model generates is valid JSON.
340
341 **Important:** when using JSON mode, you **must** also instruct the model to
342 produce JSON yourself via a system or user message. Without this, the model may
343 generate an unending stream of whitespace until the generation reaches the token
344 limit, resulting in a long-running and seemingly "stuck" request. Also note that
345 the message content may be partially cut off if `finish_reason="length"`, which
346 indicates the generation exceeded `max_tokens` or the conversation exceeded the
347 max context length.
348
349 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
350 make the output more random, while lower values like 0.2 will make it more
351 focused and deterministic.
352
353 tool_resources: A set of resources that are used by the assistant's tools. The resources are
354 specific to the type of tool. For example, the `code_interpreter` tool requires
355 a list of file IDs, while the `file_search` tool requires a list of vector store
356 IDs.
357
358 tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
359 assistant. Tools can be of types `code_interpreter`, `file_search`, or
360 `function`.
361
362 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
363 model considers the results of the tokens with top_p probability mass. So 0.1
364 means only the tokens comprising the top 10% probability mass are considered.
365
366 We generally recommend altering this or temperature but not both.
367
368 extra_headers: Send extra headers
369
370 extra_query: Add additional query parameters to the request
371
372 extra_body: Add additional JSON properties to the request
373
374 timeout: Override the client-level default timeout for this request, in seconds
375 """
376 if not assistant_id:
377 raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
378 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
379 return self._post(
380 f"/assistants/{assistant_id}",
381 body=maybe_transform(
382 {
383 "description": description,
384 "instructions": instructions,
385 "metadata": metadata,
386 "model": model,
387 "name": name,
388 "reasoning_effort": reasoning_effort,
389 "response_format": response_format,
390 "temperature": temperature,
391 "tool_resources": tool_resources,
392 "tools": tools,
393 "top_p": top_p,
394 },
395 assistant_update_params.AssistantUpdateParams,
396 ),
397 options=make_request_options(
398 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
399 ),
400 cast_to=Assistant,
401 )
402
403 def list(
404 self,
405 *,
406 after: str | Omit = omit,
407 before: str | Omit = omit,
408 limit: int | Omit = omit,
409 order: Literal["asc", "desc"] | Omit = omit,
410 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
411 # The extra values given here take precedence over values defined on the client or passed to this method.
412 extra_headers: Headers | None = None,
413 extra_query: Query | None = None,
414 extra_body: Body | None = None,
415 timeout: float | httpx.Timeout | None | NotGiven = not_given,
416 ) -> SyncCursorPage[Assistant]:
417 """Returns a list of assistants.
418
419 Args:
420 after: A cursor for use in pagination.
421
422 `after` is an object ID that defines your place
423 in the list. For instance, if you make a list request and receive 100 objects,
424 ending with obj_foo, your subsequent call can include after=obj_foo in order to
425 fetch the next page of the list.
426
427 before: A cursor for use in pagination. `before` is an object ID that defines your place
428 in the list. For instance, if you make a list request and receive 100 objects,
429 starting with obj_foo, your subsequent call can include before=obj_foo in order
430 to fetch the previous page of the list.
431
432 limit: A limit on the number of objects to be returned. Limit can range between 1 and
433 100, and the default is 20.
434
435 order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
436 order and `desc` for descending order.
437
438 extra_headers: Send extra headers
439
440 extra_query: Add additional query parameters to the request
441
442 extra_body: Add additional JSON properties to the request
443
444 timeout: Override the client-level default timeout for this request, in seconds
445 """
446 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
447 return self._get_api_list(
448 "/assistants",
449 page=SyncCursorPage[Assistant],
450 options=make_request_options(
451 extra_headers=extra_headers,
452 extra_query=extra_query,
453 extra_body=extra_body,
454 timeout=timeout,
455 query=maybe_transform(
456 {
457 "after": after,
458 "before": before,
459 "limit": limit,
460 "order": order,
461 },
462 assistant_list_params.AssistantListParams,
463 ),
464 ),
465 model=Assistant,
466 )
467
468 def delete(
469 self,
470 assistant_id: str,
471 *,
472 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
473 # The extra values given here take precedence over values defined on the client or passed to this method.
474 extra_headers: Headers | None = None,
475 extra_query: Query | None = None,
476 extra_body: Body | None = None,
477 timeout: float | httpx.Timeout | None | NotGiven = not_given,
478 ) -> AssistantDeleted:
479 """
480 Delete an assistant.
481
482 Args:
483 extra_headers: Send extra headers
484
485 extra_query: Add additional query parameters to the request
486
487 extra_body: Add additional JSON properties to the request
488
489 timeout: Override the client-level default timeout for this request, in seconds
490 """
491 if not assistant_id:
492 raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
493 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
494 return self._delete(
495 f"/assistants/{assistant_id}",
496 options=make_request_options(
497 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
498 ),
499 cast_to=AssistantDeleted,
500 )
501
502
503class AsyncAssistants(AsyncAPIResource):
504 @cached_property
505 def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
506 """
507 This property can be used as a prefix for any HTTP method call to return
508 the raw response object instead of the parsed content.
509
510 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
511 """
512 return AsyncAssistantsWithRawResponse(self)
513
514 @cached_property
515 def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
516 """
517 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
518
519 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
520 """
521 return AsyncAssistantsWithStreamingResponse(self)
522
523 async def create(
524 self,
525 *,
526 model: Union[str, ChatModel],
527 description: Optional[str] | Omit = omit,
528 instructions: Optional[str] | Omit = omit,
529 metadata: Optional[Metadata] | Omit = omit,
530 name: Optional[str] | Omit = omit,
531 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
532 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
533 temperature: Optional[float] | Omit = omit,
534 tool_resources: Optional[assistant_create_params.ToolResources] | Omit = omit,
535 tools: Iterable[AssistantToolParam] | Omit = omit,
536 top_p: Optional[float] | Omit = omit,
537 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
538 # The extra values given here take precedence over values defined on the client or passed to this method.
539 extra_headers: Headers | None = None,
540 extra_query: Query | None = None,
541 extra_body: Body | None = None,
542 timeout: float | httpx.Timeout | None | NotGiven = not_given,
543 ) -> Assistant:
544 """
545 Create an assistant with a model and instructions.
546
547 Args:
548 model: ID of the model to use. You can use the
549 [List models](https://platform.openai.com/docs/api-reference/models/list) API to
550 see all of your available models, or see our
551 [Model overview](https://platform.openai.com/docs/models) for descriptions of
552 them.
553
554 description: The description of the assistant. The maximum length is 512 characters.
555
556 instructions: The system instructions that the assistant uses. The maximum length is 256,000
557 characters.
558
559 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
560 for storing additional information about the object in a structured format, and
561 querying for objects via API or the dashboard.
562
563 Keys are strings with a maximum length of 64 characters. Values are strings with
564 a maximum length of 512 characters.
565
566 name: The name of the assistant. The maximum length is 256 characters.
567
568 reasoning_effort: Constrains effort on reasoning for
569 [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
570 supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
571 Reducing reasoning effort can result in faster responses and fewer tokens used
572 on reasoning in a response.
573
574 - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
575 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
576 calls are supported for all reasoning values in gpt-5.1.
577 - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
578 support `none`.
579 - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
580 - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
581
582 response_format: Specifies the format that the model must output. Compatible with
583 [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
584 [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
585 and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
586
587 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
588 Outputs which ensures the model will match your supplied JSON schema. Learn more
589 in the
590 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
591
592 Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
593 message the model generates is valid JSON.
594
595 **Important:** when using JSON mode, you **must** also instruct the model to
596 produce JSON yourself via a system or user message. Without this, the model may
597 generate an unending stream of whitespace until the generation reaches the token
598 limit, resulting in a long-running and seemingly "stuck" request. Also note that
599 the message content may be partially cut off if `finish_reason="length"`, which
600 indicates the generation exceeded `max_tokens` or the conversation exceeded the
601 max context length.
602
603 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
604 make the output more random, while lower values like 0.2 will make it more
605 focused and deterministic.
606
607 tool_resources: A set of resources that are used by the assistant's tools. The resources are
608 specific to the type of tool. For example, the `code_interpreter` tool requires
609 a list of file IDs, while the `file_search` tool requires a list of vector store
610 IDs.
611
612 tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
613 assistant. Tools can be of types `code_interpreter`, `file_search`, or
614 `function`.
615
616 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
617 model considers the results of the tokens with top_p probability mass. So 0.1
618 means only the tokens comprising the top 10% probability mass are considered.
619
620 We generally recommend altering this or temperature but not both.
621
622 extra_headers: Send extra headers
623
624 extra_query: Add additional query parameters to the request
625
626 extra_body: Add additional JSON properties to the request
627
628 timeout: Override the client-level default timeout for this request, in seconds
629 """
630 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
631 return await self._post(
632 "/assistants",
633 body=await async_maybe_transform(
634 {
635 "model": model,
636 "description": description,
637 "instructions": instructions,
638 "metadata": metadata,
639 "name": name,
640 "reasoning_effort": reasoning_effort,
641 "response_format": response_format,
642 "temperature": temperature,
643 "tool_resources": tool_resources,
644 "tools": tools,
645 "top_p": top_p,
646 },
647 assistant_create_params.AssistantCreateParams,
648 ),
649 options=make_request_options(
650 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
651 ),
652 cast_to=Assistant,
653 )
654
655 async def retrieve(
656 self,
657 assistant_id: str,
658 *,
659 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
660 # The extra values given here take precedence over values defined on the client or passed to this method.
661 extra_headers: Headers | None = None,
662 extra_query: Query | None = None,
663 extra_body: Body | None = None,
664 timeout: float | httpx.Timeout | None | NotGiven = not_given,
665 ) -> Assistant:
666 """
667 Retrieves an assistant.
668
669 Args:
670 extra_headers: Send extra headers
671
672 extra_query: Add additional query parameters to the request
673
674 extra_body: Add additional JSON properties to the request
675
676 timeout: Override the client-level default timeout for this request, in seconds
677 """
678 if not assistant_id:
679 raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
680 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
681 return await self._get(
682 f"/assistants/{assistant_id}",
683 options=make_request_options(
684 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
685 ),
686 cast_to=Assistant,
687 )
688
689 async def update(
690 self,
691 assistant_id: str,
692 *,
693 description: Optional[str] | Omit = omit,
694 instructions: Optional[str] | Omit = omit,
695 metadata: Optional[Metadata] | Omit = omit,
696 model: Union[
697 str,
698 Literal[
699 "gpt-5",
700 "gpt-5-mini",
701 "gpt-5-nano",
702 "gpt-5-2025-08-07",
703 "gpt-5-mini-2025-08-07",
704 "gpt-5-nano-2025-08-07",
705 "gpt-4.1",
706 "gpt-4.1-mini",
707 "gpt-4.1-nano",
708 "gpt-4.1-2025-04-14",
709 "gpt-4.1-mini-2025-04-14",
710 "gpt-4.1-nano-2025-04-14",
711 "o3-mini",
712 "o3-mini-2025-01-31",
713 "o1",
714 "o1-2024-12-17",
715 "gpt-4o",
716 "gpt-4o-2024-11-20",
717 "gpt-4o-2024-08-06",
718 "gpt-4o-2024-05-13",
719 "gpt-4o-mini",
720 "gpt-4o-mini-2024-07-18",
721 "gpt-4.5-preview",
722 "gpt-4.5-preview-2025-02-27",
723 "gpt-4-turbo",
724 "gpt-4-turbo-2024-04-09",
725 "gpt-4-0125-preview",
726 "gpt-4-turbo-preview",
727 "gpt-4-1106-preview",
728 "gpt-4-vision-preview",
729 "gpt-4",
730 "gpt-4-0314",
731 "gpt-4-0613",
732 "gpt-4-32k",
733 "gpt-4-32k-0314",
734 "gpt-4-32k-0613",
735 "gpt-3.5-turbo",
736 "gpt-3.5-turbo-16k",
737 "gpt-3.5-turbo-0613",
738 "gpt-3.5-turbo-1106",
739 "gpt-3.5-turbo-0125",
740 "gpt-3.5-turbo-16k-0613",
741 ],
742 ]
743 | Omit = omit,
744 name: Optional[str] | Omit = omit,
745 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
746 response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
747 temperature: Optional[float] | Omit = omit,
748 tool_resources: Optional[assistant_update_params.ToolResources] | Omit = omit,
749 tools: Iterable[AssistantToolParam] | Omit = omit,
750 top_p: Optional[float] | Omit = omit,
751 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
752 # The extra values given here take precedence over values defined on the client or passed to this method.
753 extra_headers: Headers | None = None,
754 extra_query: Query | None = None,
755 extra_body: Body | None = None,
756 timeout: float | httpx.Timeout | None | NotGiven = not_given,
757 ) -> Assistant:
758 """Modifies an assistant.
759
760 Args:
761 description: The description of the assistant.
762
763 The maximum length is 512 characters.
764
765 instructions: The system instructions that the assistant uses. The maximum length is 256,000
766 characters.
767
768 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
769 for storing additional information about the object in a structured format, and
770 querying for objects via API or the dashboard.
771
772 Keys are strings with a maximum length of 64 characters. Values are strings with
773 a maximum length of 512 characters.
774
775 model: ID of the model to use. You can use the
776 [List models](https://platform.openai.com/docs/api-reference/models/list) API to
777 see all of your available models, or see our
778 [Model overview](https://platform.openai.com/docs/models) for descriptions of
779 them.
780
781 name: The name of the assistant. The maximum length is 256 characters.
782
783 reasoning_effort: Constrains effort on reasoning for
784 [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
785 supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
786 Reducing reasoning effort can result in faster responses and fewer tokens used
787 on reasoning in a response.
788
789 - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
790 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
791 calls are supported for all reasoning values in gpt-5.1.
792 - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
793 support `none`.
794 - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
795 - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
796
797 response_format: Specifies the format that the model must output. Compatible with
798 [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
799 [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
800 and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
801
802 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
803 Outputs which ensures the model will match your supplied JSON schema. Learn more
804 in the
805 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
806
807 Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
808 message the model generates is valid JSON.
809
810 **Important:** when using JSON mode, you **must** also instruct the model to
811 produce JSON yourself via a system or user message. Without this, the model may
812 generate an unending stream of whitespace until the generation reaches the token
813 limit, resulting in a long-running and seemingly "stuck" request. Also note that
814 the message content may be partially cut off if `finish_reason="length"`, which
815 indicates the generation exceeded `max_tokens` or the conversation exceeded the
816 max context length.
817
818 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
819 make the output more random, while lower values like 0.2 will make it more
820 focused and deterministic.
821
822 tool_resources: A set of resources that are used by the assistant's tools. The resources are
823 specific to the type of tool. For example, the `code_interpreter` tool requires
824 a list of file IDs, while the `file_search` tool requires a list of vector store
825 IDs.
826
827 tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
828 assistant. Tools can be of types `code_interpreter`, `file_search`, or
829 `function`.
830
831 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
832 model considers the results of the tokens with top_p probability mass. So 0.1
833 means only the tokens comprising the top 10% probability mass are considered.
834
835 We generally recommend altering this or temperature but not both.
836
837 extra_headers: Send extra headers
838
839 extra_query: Add additional query parameters to the request
840
841 extra_body: Add additional JSON properties to the request
842
843 timeout: Override the client-level default timeout for this request, in seconds
844 """
845 if not assistant_id:
846 raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
847 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
848 return await self._post(
849 f"/assistants/{assistant_id}",
850 body=await async_maybe_transform(
851 {
852 "description": description,
853 "instructions": instructions,
854 "metadata": metadata,
855 "model": model,
856 "name": name,
857 "reasoning_effort": reasoning_effort,
858 "response_format": response_format,
859 "temperature": temperature,
860 "tool_resources": tool_resources,
861 "tools": tools,
862 "top_p": top_p,
863 },
864 assistant_update_params.AssistantUpdateParams,
865 ),
866 options=make_request_options(
867 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
868 ),
869 cast_to=Assistant,
870 )
871
872 def list(
873 self,
874 *,
875 after: str | Omit = omit,
876 before: str | Omit = omit,
877 limit: int | Omit = omit,
878 order: Literal["asc", "desc"] | Omit = omit,
879 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
880 # The extra values given here take precedence over values defined on the client or passed to this method.
881 extra_headers: Headers | None = None,
882 extra_query: Query | None = None,
883 extra_body: Body | None = None,
884 timeout: float | httpx.Timeout | None | NotGiven = not_given,
885 ) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]:
886 """Returns a list of assistants.
887
888 Args:
889 after: A cursor for use in pagination.
890
891 `after` is an object ID that defines your place
892 in the list. For instance, if you make a list request and receive 100 objects,
893 ending with obj_foo, your subsequent call can include after=obj_foo in order to
894 fetch the next page of the list.
895
896 before: A cursor for use in pagination. `before` is an object ID that defines your place
897 in the list. For instance, if you make a list request and receive 100 objects,
898 starting with obj_foo, your subsequent call can include before=obj_foo in order
899 to fetch the previous page of the list.
900
901 limit: A limit on the number of objects to be returned. Limit can range between 1 and
902 100, and the default is 20.
903
904 order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
905 order and `desc` for descending order.
906
907 extra_headers: Send extra headers
908
909 extra_query: Add additional query parameters to the request
910
911 extra_body: Add additional JSON properties to the request
912
913 timeout: Override the client-level default timeout for this request, in seconds
914 """
915 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
916 return self._get_api_list(
917 "/assistants",
918 page=AsyncCursorPage[Assistant],
919 options=make_request_options(
920 extra_headers=extra_headers,
921 extra_query=extra_query,
922 extra_body=extra_body,
923 timeout=timeout,
924 query=maybe_transform(
925 {
926 "after": after,
927 "before": before,
928 "limit": limit,
929 "order": order,
930 },
931 assistant_list_params.AssistantListParams,
932 ),
933 ),
934 model=Assistant,
935 )
936
937 async def delete(
938 self,
939 assistant_id: str,
940 *,
941 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
942 # The extra values given here take precedence over values defined on the client or passed to this method.
943 extra_headers: Headers | None = None,
944 extra_query: Query | None = None,
945 extra_body: Body | None = None,
946 timeout: float | httpx.Timeout | None | NotGiven = not_given,
947 ) -> AssistantDeleted:
948 """
949 Delete an assistant.
950
951 Args:
952 extra_headers: Send extra headers
953
954 extra_query: Add additional query parameters to the request
955
956 extra_body: Add additional JSON properties to the request
957
958 timeout: Override the client-level default timeout for this request, in seconds
959 """
960 if not assistant_id:
961 raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
962 extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
963 return await self._delete(
964 f"/assistants/{assistant_id}",
965 options=make_request_options(
966 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
967 ),
968 cast_to=AssistantDeleted,
969 )
970
971
972class AssistantsWithRawResponse:
973 def __init__(self, assistants: Assistants) -> None:
974 self._assistants = assistants
975
976 self.create = _legacy_response.to_raw_response_wrapper(
977 assistants.create,
978 )
979 self.retrieve = _legacy_response.to_raw_response_wrapper(
980 assistants.retrieve,
981 )
982 self.update = _legacy_response.to_raw_response_wrapper(
983 assistants.update,
984 )
985 self.list = _legacy_response.to_raw_response_wrapper(
986 assistants.list,
987 )
988 self.delete = _legacy_response.to_raw_response_wrapper(
989 assistants.delete,
990 )
991
992
993class AsyncAssistantsWithRawResponse:
994 def __init__(self, assistants: AsyncAssistants) -> None:
995 self._assistants = assistants
996
997 self.create = _legacy_response.async_to_raw_response_wrapper(
998 assistants.create,
999 )
1000 self.retrieve = _legacy_response.async_to_raw_response_wrapper(
1001 assistants.retrieve,
1002 )
1003 self.update = _legacy_response.async_to_raw_response_wrapper(
1004 assistants.update,
1005 )
1006 self.list = _legacy_response.async_to_raw_response_wrapper(
1007 assistants.list,
1008 )
1009 self.delete = _legacy_response.async_to_raw_response_wrapper(
1010 assistants.delete,
1011 )
1012
1013
1014class AssistantsWithStreamingResponse:
1015 def __init__(self, assistants: Assistants) -> None:
1016 self._assistants = assistants
1017
1018 self.create = to_streamed_response_wrapper(
1019 assistants.create,
1020 )
1021 self.retrieve = to_streamed_response_wrapper(
1022 assistants.retrieve,
1023 )
1024 self.update = to_streamed_response_wrapper(
1025 assistants.update,
1026 )
1027 self.list = to_streamed_response_wrapper(
1028 assistants.list,
1029 )
1030 self.delete = to_streamed_response_wrapper(
1031 assistants.delete,
1032 )
1033
1034
1035class AsyncAssistantsWithStreamingResponse:
1036 def __init__(self, assistants: AsyncAssistants) -> None:
1037 self._assistants = assistants
1038
1039 self.create = async_to_streamed_response_wrapper(
1040 assistants.create,
1041 )
1042 self.retrieve = async_to_streamed_response_wrapper(
1043 assistants.retrieve,
1044 )
1045 self.update = async_to_streamed_response_wrapper(
1046 assistants.update,
1047 )
1048 self.list = async_to_streamed_response_wrapper(
1049 assistants.list,
1050 )
1051 self.delete = async_to_streamed_response_wrapper(
1052 assistants.delete,
1053 )