main
1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3from __future__ import annotations
4
5from copy import copy
6from typing import Any, List, Type, Union, Iterable, Optional, cast
7from functools import partial
8from typing_extensions import Literal, overload
9
10import httpx
11
12from ... import _legacy_response
13from ..._types import NOT_GIVEN, Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
14from ..._utils import is_given, maybe_transform, async_maybe_transform
15from ..._compat import cached_property
16from ..._resource import SyncAPIResource, AsyncAPIResource
17from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
18from .input_items import (
19 InputItems,
20 AsyncInputItems,
21 InputItemsWithRawResponse,
22 AsyncInputItemsWithRawResponse,
23 InputItemsWithStreamingResponse,
24 AsyncInputItemsWithStreamingResponse,
25)
26from ..._streaming import Stream, AsyncStream
27from ...lib._tools import PydanticFunctionTool, ResponsesPydanticFunctionTool
28from .input_tokens import (
29 InputTokens,
30 AsyncInputTokens,
31 InputTokensWithRawResponse,
32 AsyncInputTokensWithRawResponse,
33 InputTokensWithStreamingResponse,
34 AsyncInputTokensWithStreamingResponse,
35)
36from ..._base_client import make_request_options
37from ...types.responses import (
38 response_create_params,
39 response_compact_params,
40 response_retrieve_params,
41)
42from ...lib._parsing._responses import (
43 TextFormatT,
44 parse_response,
45 type_to_text_format_param as _type_to_text_format_param,
46)
47from ...types.responses.response import Response
48from ...types.responses.tool_param import ToolParam, ParseableToolParam
49from ...types.shared_params.metadata import Metadata
50from ...types.shared_params.reasoning import Reasoning
51from ...types.responses.parsed_response import ParsedResponse
52from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager
53from ...types.responses.compacted_response import CompactedResponse
54from ...types.responses.response_includable import ResponseIncludable
55from ...types.shared_params.responses_model import ResponsesModel
56from ...types.responses.response_input_param import ResponseInputParam
57from ...types.responses.response_prompt_param import ResponsePromptParam
58from ...types.responses.response_stream_event import ResponseStreamEvent
59from ...types.responses.response_input_item_param import ResponseInputItemParam
60from ...types.responses.response_text_config_param import ResponseTextConfigParam
61
62__all__ = ["Responses", "AsyncResponses"]
63
64
65class Responses(SyncAPIResource):
66 @cached_property
67 def input_items(self) -> InputItems:
68 return InputItems(self._client)
69
70 @cached_property
71 def input_tokens(self) -> InputTokens:
72 return InputTokens(self._client)
73
74 @cached_property
75 def with_raw_response(self) -> ResponsesWithRawResponse:
76 """
77 This property can be used as a prefix for any HTTP method call to return
78 the raw response object instead of the parsed content.
79
80 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
81 """
82 return ResponsesWithRawResponse(self)
83
84 @cached_property
85 def with_streaming_response(self) -> ResponsesWithStreamingResponse:
86 """
87 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
88
89 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
90 """
91 return ResponsesWithStreamingResponse(self)
92
93 @overload
94 def create(
95 self,
96 *,
97 background: Optional[bool] | Omit = omit,
98 conversation: Optional[response_create_params.Conversation] | Omit = omit,
99 include: Optional[List[ResponseIncludable]] | Omit = omit,
100 input: Union[str, ResponseInputParam] | Omit = omit,
101 instructions: Optional[str] | Omit = omit,
102 max_output_tokens: Optional[int] | Omit = omit,
103 max_tool_calls: Optional[int] | Omit = omit,
104 metadata: Optional[Metadata] | Omit = omit,
105 model: ResponsesModel | Omit = omit,
106 parallel_tool_calls: Optional[bool] | Omit = omit,
107 previous_response_id: Optional[str] | Omit = omit,
108 prompt: Optional[ResponsePromptParam] | Omit = omit,
109 prompt_cache_key: str | Omit = omit,
110 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
111 reasoning: Optional[Reasoning] | Omit = omit,
112 safety_identifier: str | Omit = omit,
113 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
114 store: Optional[bool] | Omit = omit,
115 stream: Optional[Literal[False]] | Omit = omit,
116 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
117 temperature: Optional[float] | Omit = omit,
118 text: ResponseTextConfigParam | Omit = omit,
119 tool_choice: response_create_params.ToolChoice | Omit = omit,
120 tools: Iterable[ToolParam] | Omit = omit,
121 top_logprobs: Optional[int] | Omit = omit,
122 top_p: Optional[float] | Omit = omit,
123 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
124 user: str | Omit = omit,
125 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
126 # The extra values given here take precedence over values defined on the client or passed to this method.
127 extra_headers: Headers | None = None,
128 extra_query: Query | None = None,
129 extra_body: Body | None = None,
130 timeout: float | httpx.Timeout | None | NotGiven = not_given,
131 ) -> Response:
132 """Creates a model response.
133
134 Provide
135 [text](https://platform.openai.com/docs/guides/text) or
136 [image](https://platform.openai.com/docs/guides/images) inputs to generate
137 [text](https://platform.openai.com/docs/guides/text) or
138 [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
139 the model call your own
140 [custom code](https://platform.openai.com/docs/guides/function-calling) or use
141 built-in [tools](https://platform.openai.com/docs/guides/tools) like
142 [web search](https://platform.openai.com/docs/guides/tools-web-search) or
143 [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
144 your own data as input for the model's response.
145
146 Args:
147 background: Whether to run the model response in the background.
148 [Learn more](https://platform.openai.com/docs/guides/background).
149
150 conversation: The conversation that this response belongs to. Items from this conversation are
151 prepended to `input_items` for this response request. Input items and output
152 items from this response are automatically added to this conversation after this
153 response completes.
154
155 include: Specify additional output data to include in the model response. Currently
156 supported values are:
157
158 - `web_search_call.action.sources`: Include the sources of the web search tool
159 call.
160 - `code_interpreter_call.outputs`: Includes the outputs of python code execution
161 in code interpreter tool call items.
162 - `computer_call_output.output.image_url`: Include image urls from the computer
163 call output.
164 - `file_search_call.results`: Include the search results of the file search tool
165 call.
166 - `message.input_image.image_url`: Include image urls from the input message.
167 - `message.output_text.logprobs`: Include logprobs with assistant messages.
168 - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
169 tokens in reasoning item outputs. This enables reasoning items to be used in
170 multi-turn conversations when using the Responses API statelessly (like when
171 the `store` parameter is set to `false`, or when an organization is enrolled
172 in the zero data retention program).
173
174 input: Text, image, or file inputs to the model, used to generate a response.
175
176 Learn more:
177
178 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
179 - [Image inputs](https://platform.openai.com/docs/guides/images)
180 - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
181 - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
182 - [Function calling](https://platform.openai.com/docs/guides/function-calling)
183
184 instructions: A system (or developer) message inserted into the model's context.
185
186 When using along with `previous_response_id`, the instructions from a previous
187 response will not be carried over to the next response. This makes it simple to
188 swap out system (or developer) messages in new responses.
189
190 max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
191 including visible output tokens and
192 [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
193
194 max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
195 response. This maximum number applies across all built-in tool calls, not per
196 individual tool. Any further attempts to call a tool by the model will be
197 ignored.
198
199 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
200 for storing additional information about the object in a structured format, and
201 querying for objects via API or the dashboard.
202
203 Keys are strings with a maximum length of 64 characters. Values are strings with
204 a maximum length of 512 characters.
205
206 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
207 wide range of models with different capabilities, performance characteristics,
208 and price points. Refer to the
209 [model guide](https://platform.openai.com/docs/models) to browse and compare
210 available models.
211
212 parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
213
214 previous_response_id: The unique ID of the previous response to the model. Use this to create
215 multi-turn conversations. Learn more about
216 [conversation state](https://platform.openai.com/docs/guides/conversation-state).
217 Cannot be used in conjunction with `conversation`.
218
219 prompt: Reference to a prompt template and its variables.
220 [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
221
222 prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
223 hit rates. Replaces the `user` field.
224 [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
225
226 prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
227 prompt caching, which keeps cached prefixes active for longer, up to a maximum
228 of 24 hours.
229 [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
230
231 reasoning: **gpt-5 and o-series models only**
232
233 Configuration options for
234 [reasoning models](https://platform.openai.com/docs/guides/reasoning).
235
236 safety_identifier: A stable identifier used to help detect users of your application that may be
237 violating OpenAI's usage policies. The IDs should be a string that uniquely
238 identifies each user. We recommend hashing their username or email address, in
239 order to avoid sending us any identifying information.
240 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
241
242 service_tier: Specifies the processing type used for serving the request.
243
244 - If set to 'auto', then the request will be processed with the service tier
245 configured in the Project settings. Unless otherwise configured, the Project
246 will use 'default'.
247 - If set to 'default', then the request will be processed with the standard
248 pricing and performance for the selected model.
249 - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
250 '[priority](https://openai.com/api-priority-processing/)', then the request
251 will be processed with the corresponding service tier.
252 - When not set, the default behavior is 'auto'.
253
254 When the `service_tier` parameter is set, the response body will include the
255 `service_tier` value based on the processing mode actually used to serve the
256 request. This response value may be different from the value set in the
257 parameter.
258
259 store: Whether to store the generated model response for later retrieval via API.
260
261 stream: If set to true, the model response data will be streamed to the client as it is
262 generated using
263 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
264 See the
265 [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
266 for more information.
267
268 stream_options: Options for streaming responses. Only set this when you set `stream: true`.
269
270 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
271 make the output more random, while lower values like 0.2 will make it more
272 focused and deterministic. We generally recommend altering this or `top_p` but
273 not both.
274
275 text: Configuration options for a text response from the model. Can be plain text or
276 structured JSON data. Learn more:
277
278 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
279 - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
280
281 tool_choice: How the model should select which tool (or tools) to use when generating a
282 response. See the `tools` parameter to see how to specify which tools the model
283 can call.
284
285 tools: An array of tools the model may call while generating a response. You can
286 specify which tool to use by setting the `tool_choice` parameter.
287
288 We support the following categories of tools:
289
290 - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
291 capabilities, like
292 [web search](https://platform.openai.com/docs/guides/tools-web-search) or
293 [file search](https://platform.openai.com/docs/guides/tools-file-search).
294 Learn more about
295 [built-in tools](https://platform.openai.com/docs/guides/tools).
296 - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
297 predefined connectors such as Google Drive and SharePoint. Learn more about
298 [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
299 - **Function calls (custom tools)**: Functions that are defined by you, enabling
300 the model to call your own code with strongly typed arguments and outputs.
301 Learn more about
302 [function calling](https://platform.openai.com/docs/guides/function-calling).
303 You can also use custom tools to call your own code.
304
305 top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
306 return at each token position, each with an associated log probability.
307
308 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
309 model considers the results of the tokens with top_p probability mass. So 0.1
310 means only the tokens comprising the top 10% probability mass are considered.
311
312 We generally recommend altering this or `temperature` but not both.
313
314 truncation: The truncation strategy to use for the model response.
315
316 - `auto`: If the input to this Response exceeds the model's context window size,
317 the model will truncate the response to fit the context window by dropping
318 items from the beginning of the conversation.
319 - `disabled` (default): If the input size will exceed the context window size
320 for a model, the request will fail with a 400 error.
321
322 user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
323 `prompt_cache_key` instead to maintain caching optimizations. A stable
324 identifier for your end-users. Used to boost cache hit rates by better bucketing
325 similar requests and to help OpenAI detect and prevent abuse.
326 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
327
328 extra_headers: Send extra headers
329
330 extra_query: Add additional query parameters to the request
331
332 extra_body: Add additional JSON properties to the request
333
334 timeout: Override the client-level default timeout for this request, in seconds
335 """
336 ...
337
338 @overload
339 def create(
340 self,
341 *,
342 stream: Literal[True],
343 background: Optional[bool] | Omit = omit,
344 conversation: Optional[response_create_params.Conversation] | Omit = omit,
345 include: Optional[List[ResponseIncludable]] | Omit = omit,
346 input: Union[str, ResponseInputParam] | Omit = omit,
347 instructions: Optional[str] | Omit = omit,
348 max_output_tokens: Optional[int] | Omit = omit,
349 max_tool_calls: Optional[int] | Omit = omit,
350 metadata: Optional[Metadata] | Omit = omit,
351 model: ResponsesModel | Omit = omit,
352 parallel_tool_calls: Optional[bool] | Omit = omit,
353 previous_response_id: Optional[str] | Omit = omit,
354 prompt: Optional[ResponsePromptParam] | Omit = omit,
355 prompt_cache_key: str | Omit = omit,
356 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
357 reasoning: Optional[Reasoning] | Omit = omit,
358 safety_identifier: str | Omit = omit,
359 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
360 store: Optional[bool] | Omit = omit,
361 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
362 temperature: Optional[float] | Omit = omit,
363 text: ResponseTextConfigParam | Omit = omit,
364 tool_choice: response_create_params.ToolChoice | Omit = omit,
365 tools: Iterable[ToolParam] | Omit = omit,
366 top_logprobs: Optional[int] | Omit = omit,
367 top_p: Optional[float] | Omit = omit,
368 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
369 user: str | Omit = omit,
370 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
371 # The extra values given here take precedence over values defined on the client or passed to this method.
372 extra_headers: Headers | None = None,
373 extra_query: Query | None = None,
374 extra_body: Body | None = None,
375 timeout: float | httpx.Timeout | None | NotGiven = not_given,
376 ) -> Stream[ResponseStreamEvent]:
377 """Creates a model response.
378
379 Provide
380 [text](https://platform.openai.com/docs/guides/text) or
381 [image](https://platform.openai.com/docs/guides/images) inputs to generate
382 [text](https://platform.openai.com/docs/guides/text) or
383 [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
384 the model call your own
385 [custom code](https://platform.openai.com/docs/guides/function-calling) or use
386 built-in [tools](https://platform.openai.com/docs/guides/tools) like
387 [web search](https://platform.openai.com/docs/guides/tools-web-search) or
388 [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
389 your own data as input for the model's response.
390
391 Args:
392 stream: If set to true, the model response data will be streamed to the client as it is
393 generated using
394 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
395 See the
396 [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
397 for more information.
398
399 background: Whether to run the model response in the background.
400 [Learn more](https://platform.openai.com/docs/guides/background).
401
402 conversation: The conversation that this response belongs to. Items from this conversation are
403 prepended to `input_items` for this response request. Input items and output
404 items from this response are automatically added to this conversation after this
405 response completes.
406
407 include: Specify additional output data to include in the model response. Currently
408 supported values are:
409
410 - `web_search_call.action.sources`: Include the sources of the web search tool
411 call.
412 - `code_interpreter_call.outputs`: Includes the outputs of python code execution
413 in code interpreter tool call items.
414 - `computer_call_output.output.image_url`: Include image urls from the computer
415 call output.
416 - `file_search_call.results`: Include the search results of the file search tool
417 call.
418 - `message.input_image.image_url`: Include image urls from the input message.
419 - `message.output_text.logprobs`: Include logprobs with assistant messages.
420 - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
421 tokens in reasoning item outputs. This enables reasoning items to be used in
422 multi-turn conversations when using the Responses API statelessly (like when
423 the `store` parameter is set to `false`, or when an organization is enrolled
424 in the zero data retention program).
425
426 input: Text, image, or file inputs to the model, used to generate a response.
427
428 Learn more:
429
430 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
431 - [Image inputs](https://platform.openai.com/docs/guides/images)
432 - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
433 - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
434 - [Function calling](https://platform.openai.com/docs/guides/function-calling)
435
436 instructions: A system (or developer) message inserted into the model's context.
437
438 When using along with `previous_response_id`, the instructions from a previous
439 response will not be carried over to the next response. This makes it simple to
440 swap out system (or developer) messages in new responses.
441
442 max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
443 including visible output tokens and
444 [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
445
446 max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
447 response. This maximum number applies across all built-in tool calls, not per
448 individual tool. Any further attempts to call a tool by the model will be
449 ignored.
450
451 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
452 for storing additional information about the object in a structured format, and
453 querying for objects via API or the dashboard.
454
455 Keys are strings with a maximum length of 64 characters. Values are strings with
456 a maximum length of 512 characters.
457
458 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
459 wide range of models with different capabilities, performance characteristics,
460 and price points. Refer to the
461 [model guide](https://platform.openai.com/docs/models) to browse and compare
462 available models.
463
464 parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
465
466 previous_response_id: The unique ID of the previous response to the model. Use this to create
467 multi-turn conversations. Learn more about
468 [conversation state](https://platform.openai.com/docs/guides/conversation-state).
469 Cannot be used in conjunction with `conversation`.
470
471 prompt: Reference to a prompt template and its variables.
472 [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
473
474 prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
475 hit rates. Replaces the `user` field.
476 [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
477
478 prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
479 prompt caching, which keeps cached prefixes active for longer, up to a maximum
480 of 24 hours.
481 [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
482
483 reasoning: **gpt-5 and o-series models only**
484
485 Configuration options for
486 [reasoning models](https://platform.openai.com/docs/guides/reasoning).
487
488 safety_identifier: A stable identifier used to help detect users of your application that may be
489 violating OpenAI's usage policies. The IDs should be a string that uniquely
490 identifies each user. We recommend hashing their username or email address, in
491 order to avoid sending us any identifying information.
492 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
493
494 service_tier: Specifies the processing type used for serving the request.
495
496 - If set to 'auto', then the request will be processed with the service tier
497 configured in the Project settings. Unless otherwise configured, the Project
498 will use 'default'.
499 - If set to 'default', then the request will be processed with the standard
500 pricing and performance for the selected model.
501 - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
502 '[priority](https://openai.com/api-priority-processing/)', then the request
503 will be processed with the corresponding service tier.
504 - When not set, the default behavior is 'auto'.
505
506 When the `service_tier` parameter is set, the response body will include the
507 `service_tier` value based on the processing mode actually used to serve the
508 request. This response value may be different from the value set in the
509 parameter.
510
511 store: Whether to store the generated model response for later retrieval via API.
512
513 stream_options: Options for streaming responses. Only set this when you set `stream: true`.
514
515 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
516 make the output more random, while lower values like 0.2 will make it more
517 focused and deterministic. We generally recommend altering this or `top_p` but
518 not both.
519
520 text: Configuration options for a text response from the model. Can be plain text or
521 structured JSON data. Learn more:
522
523 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
524 - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
525
526 tool_choice: How the model should select which tool (or tools) to use when generating a
527 response. See the `tools` parameter to see how to specify which tools the model
528 can call.
529
530 tools: An array of tools the model may call while generating a response. You can
531 specify which tool to use by setting the `tool_choice` parameter.
532
533 We support the following categories of tools:
534
535 - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
536 capabilities, like
537 [web search](https://platform.openai.com/docs/guides/tools-web-search) or
538 [file search](https://platform.openai.com/docs/guides/tools-file-search).
539 Learn more about
540 [built-in tools](https://platform.openai.com/docs/guides/tools).
541 - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
542 predefined connectors such as Google Drive and SharePoint. Learn more about
543 [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
544 - **Function calls (custom tools)**: Functions that are defined by you, enabling
545 the model to call your own code with strongly typed arguments and outputs.
546 Learn more about
547 [function calling](https://platform.openai.com/docs/guides/function-calling).
548 You can also use custom tools to call your own code.
549
550 top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
551 return at each token position, each with an associated log probability.
552
553 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
554 model considers the results of the tokens with top_p probability mass. So 0.1
555 means only the tokens comprising the top 10% probability mass are considered.
556
557 We generally recommend altering this or `temperature` but not both.
558
559 truncation: The truncation strategy to use for the model response.
560
561 - `auto`: If the input to this Response exceeds the model's context window size,
562 the model will truncate the response to fit the context window by dropping
563 items from the beginning of the conversation.
564 - `disabled` (default): If the input size will exceed the context window size
565 for a model, the request will fail with a 400 error.
566
567 user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
568 `prompt_cache_key` instead to maintain caching optimizations. A stable
569 identifier for your end-users. Used to boost cache hit rates by better bucketing
570 similar requests and to help OpenAI detect and prevent abuse.
571 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
572
573 extra_headers: Send extra headers
574
575 extra_query: Add additional query parameters to the request
576
577 extra_body: Add additional JSON properties to the request
578
579 timeout: Override the client-level default timeout for this request, in seconds
580 """
581 ...
582
583 @overload
584 def create(
585 self,
586 *,
587 stream: bool,
588 background: Optional[bool] | Omit = omit,
589 conversation: Optional[response_create_params.Conversation] | Omit = omit,
590 include: Optional[List[ResponseIncludable]] | Omit = omit,
591 input: Union[str, ResponseInputParam] | Omit = omit,
592 instructions: Optional[str] | Omit = omit,
593 max_output_tokens: Optional[int] | Omit = omit,
594 max_tool_calls: Optional[int] | Omit = omit,
595 metadata: Optional[Metadata] | Omit = omit,
596 model: ResponsesModel | Omit = omit,
597 parallel_tool_calls: Optional[bool] | Omit = omit,
598 previous_response_id: Optional[str] | Omit = omit,
599 prompt: Optional[ResponsePromptParam] | Omit = omit,
600 prompt_cache_key: str | Omit = omit,
601 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
602 reasoning: Optional[Reasoning] | Omit = omit,
603 safety_identifier: str | Omit = omit,
604 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
605 store: Optional[bool] | Omit = omit,
606 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
607 temperature: Optional[float] | Omit = omit,
608 text: ResponseTextConfigParam | Omit = omit,
609 tool_choice: response_create_params.ToolChoice | Omit = omit,
610 tools: Iterable[ToolParam] | Omit = omit,
611 top_logprobs: Optional[int] | Omit = omit,
612 top_p: Optional[float] | Omit = omit,
613 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
614 user: str | Omit = omit,
615 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
616 # The extra values given here take precedence over values defined on the client or passed to this method.
617 extra_headers: Headers | None = None,
618 extra_query: Query | None = None,
619 extra_body: Body | None = None,
620 timeout: float | httpx.Timeout | None | NotGiven = not_given,
621 ) -> Response | Stream[ResponseStreamEvent]:
622 """Creates a model response.
623
624 Provide
625 [text](https://platform.openai.com/docs/guides/text) or
626 [image](https://platform.openai.com/docs/guides/images) inputs to generate
627 [text](https://platform.openai.com/docs/guides/text) or
628 [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
629 the model call your own
630 [custom code](https://platform.openai.com/docs/guides/function-calling) or use
631 built-in [tools](https://platform.openai.com/docs/guides/tools) like
632 [web search](https://platform.openai.com/docs/guides/tools-web-search) or
633 [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
634 your own data as input for the model's response.
635
636 Args:
637 stream: If set to true, the model response data will be streamed to the client as it is
638 generated using
639 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
640 See the
641 [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
642 for more information.
643
644 background: Whether to run the model response in the background.
645 [Learn more](https://platform.openai.com/docs/guides/background).
646
647 conversation: The conversation that this response belongs to. Items from this conversation are
648 prepended to `input_items` for this response request. Input items and output
649 items from this response are automatically added to this conversation after this
650 response completes.
651
652 include: Specify additional output data to include in the model response. Currently
653 supported values are:
654
655 - `web_search_call.action.sources`: Include the sources of the web search tool
656 call.
657 - `code_interpreter_call.outputs`: Includes the outputs of python code execution
658 in code interpreter tool call items.
659 - `computer_call_output.output.image_url`: Include image urls from the computer
660 call output.
661 - `file_search_call.results`: Include the search results of the file search tool
662 call.
663 - `message.input_image.image_url`: Include image urls from the input message.
664 - `message.output_text.logprobs`: Include logprobs with assistant messages.
665 - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
666 tokens in reasoning item outputs. This enables reasoning items to be used in
667 multi-turn conversations when using the Responses API statelessly (like when
668 the `store` parameter is set to `false`, or when an organization is enrolled
669 in the zero data retention program).
670
671 input: Text, image, or file inputs to the model, used to generate a response.
672
673 Learn more:
674
675 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
676 - [Image inputs](https://platform.openai.com/docs/guides/images)
677 - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
678 - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
679 - [Function calling](https://platform.openai.com/docs/guides/function-calling)
680
681 instructions: A system (or developer) message inserted into the model's context.
682
683 When using along with `previous_response_id`, the instructions from a previous
684 response will not be carried over to the next response. This makes it simple to
685 swap out system (or developer) messages in new responses.
686
687 max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
688 including visible output tokens and
689 [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
690
691 max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
692 response. This maximum number applies across all built-in tool calls, not per
693 individual tool. Any further attempts to call a tool by the model will be
694 ignored.
695
696 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
697 for storing additional information about the object in a structured format, and
698 querying for objects via API or the dashboard.
699
700 Keys are strings with a maximum length of 64 characters. Values are strings with
701 a maximum length of 512 characters.
702
703 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
704 wide range of models with different capabilities, performance characteristics,
705 and price points. Refer to the
706 [model guide](https://platform.openai.com/docs/models) to browse and compare
707 available models.
708
709 parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
710
711 previous_response_id: The unique ID of the previous response to the model. Use this to create
712 multi-turn conversations. Learn more about
713 [conversation state](https://platform.openai.com/docs/guides/conversation-state).
714 Cannot be used in conjunction with `conversation`.
715
716 prompt: Reference to a prompt template and its variables.
717 [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
718
719 prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
720 hit rates. Replaces the `user` field.
721 [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
722
723 prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
724 prompt caching, which keeps cached prefixes active for longer, up to a maximum
725 of 24 hours.
726 [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
727
728 reasoning: **gpt-5 and o-series models only**
729
730 Configuration options for
731 [reasoning models](https://platform.openai.com/docs/guides/reasoning).
732
733 safety_identifier: A stable identifier used to help detect users of your application that may be
734 violating OpenAI's usage policies. The IDs should be a string that uniquely
735 identifies each user. We recommend hashing their username or email address, in
736 order to avoid sending us any identifying information.
737 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
738
739 service_tier: Specifies the processing type used for serving the request.
740
741 - If set to 'auto', then the request will be processed with the service tier
742 configured in the Project settings. Unless otherwise configured, the Project
743 will use 'default'.
744 - If set to 'default', then the request will be processed with the standard
745 pricing and performance for the selected model.
746 - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
747 '[priority](https://openai.com/api-priority-processing/)', then the request
748 will be processed with the corresponding service tier.
749 - When not set, the default behavior is 'auto'.
750
751 When the `service_tier` parameter is set, the response body will include the
752 `service_tier` value based on the processing mode actually used to serve the
753 request. This response value may be different from the value set in the
754 parameter.
755
756 store: Whether to store the generated model response for later retrieval via API.
757
758 stream_options: Options for streaming responses. Only set this when you set `stream: true`.
759
760 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
761 make the output more random, while lower values like 0.2 will make it more
762 focused and deterministic. We generally recommend altering this or `top_p` but
763 not both.
764
765 text: Configuration options for a text response from the model. Can be plain text or
766 structured JSON data. Learn more:
767
768 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
769 - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
770
771 tool_choice: How the model should select which tool (or tools) to use when generating a
772 response. See the `tools` parameter to see how to specify which tools the model
773 can call.
774
775 tools: An array of tools the model may call while generating a response. You can
776 specify which tool to use by setting the `tool_choice` parameter.
777
778 We support the following categories of tools:
779
780 - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
781 capabilities, like
782 [web search](https://platform.openai.com/docs/guides/tools-web-search) or
783 [file search](https://platform.openai.com/docs/guides/tools-file-search).
784 Learn more about
785 [built-in tools](https://platform.openai.com/docs/guides/tools).
786 - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
787 predefined connectors such as Google Drive and SharePoint. Learn more about
788 [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
789 - **Function calls (custom tools)**: Functions that are defined by you, enabling
790 the model to call your own code with strongly typed arguments and outputs.
791 Learn more about
792 [function calling](https://platform.openai.com/docs/guides/function-calling).
793 You can also use custom tools to call your own code.
794
795 top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
796 return at each token position, each with an associated log probability.
797
798 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
799 model considers the results of the tokens with top_p probability mass. So 0.1
800 means only the tokens comprising the top 10% probability mass are considered.
801
802 We generally recommend altering this or `temperature` but not both.
803
804 truncation: The truncation strategy to use for the model response.
805
806 - `auto`: If the input to this Response exceeds the model's context window size,
807 the model will truncate the response to fit the context window by dropping
808 items from the beginning of the conversation.
809 - `disabled` (default): If the input size will exceed the context window size
810 for a model, the request will fail with a 400 error.
811
812 user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
813 `prompt_cache_key` instead to maintain caching optimizations. A stable
814 identifier for your end-users. Used to boost cache hit rates by better bucketing
815 similar requests and to help OpenAI detect and prevent abuse.
816 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
817
818 extra_headers: Send extra headers
819
820 extra_query: Add additional query parameters to the request
821
822 extra_body: Add additional JSON properties to the request
823
824 timeout: Override the client-level default timeout for this request, in seconds
825 """
826 ...
827
828 def create(
829 self,
830 *,
831 background: Optional[bool] | Omit = omit,
832 conversation: Optional[response_create_params.Conversation] | Omit = omit,
833 include: Optional[List[ResponseIncludable]] | Omit = omit,
834 input: Union[str, ResponseInputParam] | Omit = omit,
835 instructions: Optional[str] | Omit = omit,
836 max_output_tokens: Optional[int] | Omit = omit,
837 max_tool_calls: Optional[int] | Omit = omit,
838 metadata: Optional[Metadata] | Omit = omit,
839 model: ResponsesModel | Omit = omit,
840 parallel_tool_calls: Optional[bool] | Omit = omit,
841 previous_response_id: Optional[str] | Omit = omit,
842 prompt: Optional[ResponsePromptParam] | Omit = omit,
843 prompt_cache_key: str | Omit = omit,
844 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
845 reasoning: Optional[Reasoning] | Omit = omit,
846 safety_identifier: str | Omit = omit,
847 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
848 store: Optional[bool] | Omit = omit,
849 stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
850 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
851 temperature: Optional[float] | Omit = omit,
852 text: ResponseTextConfigParam | Omit = omit,
853 tool_choice: response_create_params.ToolChoice | Omit = omit,
854 tools: Iterable[ToolParam] | Omit = omit,
855 top_logprobs: Optional[int] | Omit = omit,
856 top_p: Optional[float] | Omit = omit,
857 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
858 user: str | Omit = omit,
859 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
860 # The extra values given here take precedence over values defined on the client or passed to this method.
861 extra_headers: Headers | None = None,
862 extra_query: Query | None = None,
863 extra_body: Body | None = None,
864 timeout: float | httpx.Timeout | None | NotGiven = not_given,
865 ) -> Response | Stream[ResponseStreamEvent]:
866 return self._post(
867 "/responses",
868 body=maybe_transform(
869 {
870 "background": background,
871 "conversation": conversation,
872 "include": include,
873 "input": input,
874 "instructions": instructions,
875 "max_output_tokens": max_output_tokens,
876 "max_tool_calls": max_tool_calls,
877 "metadata": metadata,
878 "model": model,
879 "parallel_tool_calls": parallel_tool_calls,
880 "previous_response_id": previous_response_id,
881 "prompt": prompt,
882 "prompt_cache_key": prompt_cache_key,
883 "prompt_cache_retention": prompt_cache_retention,
884 "reasoning": reasoning,
885 "safety_identifier": safety_identifier,
886 "service_tier": service_tier,
887 "store": store,
888 "stream": stream,
889 "stream_options": stream_options,
890 "temperature": temperature,
891 "text": text,
892 "tool_choice": tool_choice,
893 "tools": tools,
894 "top_logprobs": top_logprobs,
895 "top_p": top_p,
896 "truncation": truncation,
897 "user": user,
898 },
899 response_create_params.ResponseCreateParamsStreaming
900 if stream
901 else response_create_params.ResponseCreateParamsNonStreaming,
902 ),
903 options=make_request_options(
904 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
905 ),
906 cast_to=Response,
907 stream=stream or False,
908 stream_cls=Stream[ResponseStreamEvent],
909 )
910
911 @overload
912 def stream(
913 self,
914 *,
915 response_id: str,
916 text_format: type[TextFormatT] | Omit = omit,
917 starting_after: int | Omit = omit,
918 tools: Iterable[ParseableToolParam] | Omit = omit,
919 # The extra values given here take precedence over values defined on the client or passed to this method.
920 extra_headers: Headers | None = None,
921 extra_query: Query | None = None,
922 extra_body: Body | None = None,
923 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
924 ) -> ResponseStreamManager[TextFormatT]: ...
925
926 @overload
927 def stream(
928 self,
929 *,
930 input: Union[str, ResponseInputParam],
931 model: ResponsesModel,
932 background: Optional[bool] | Omit = omit,
933 text_format: type[TextFormatT] | Omit = omit,
934 tools: Iterable[ParseableToolParam] | Omit = omit,
935 conversation: Optional[response_create_params.Conversation] | Omit = omit,
936 include: Optional[List[ResponseIncludable]] | Omit = omit,
937 instructions: Optional[str] | Omit = omit,
938 max_output_tokens: Optional[int] | Omit = omit,
939 max_tool_calls: Optional[int] | Omit = omit,
940 metadata: Optional[Metadata] | Omit = omit,
941 parallel_tool_calls: Optional[bool] | Omit = omit,
942 previous_response_id: Optional[str] | Omit = omit,
943 prompt: Optional[ResponsePromptParam] | Omit = omit,
944 prompt_cache_key: str | Omit = omit,
945 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
946 reasoning: Optional[Reasoning] | Omit = omit,
947 safety_identifier: str | Omit = omit,
948 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
949 store: Optional[bool] | Omit = omit,
950 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
951 temperature: Optional[float] | Omit = omit,
952 text: ResponseTextConfigParam | Omit = omit,
953 tool_choice: response_create_params.ToolChoice | Omit = omit,
954 top_logprobs: Optional[int] | Omit = omit,
955 top_p: Optional[float] | Omit = omit,
956 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
957 user: str | Omit = omit,
958 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
959 # The extra values given here take precedence over values defined on the client or passed to this method.
960 extra_headers: Headers | None = None,
961 extra_query: Query | None = None,
962 extra_body: Body | None = None,
963 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
964 ) -> ResponseStreamManager[TextFormatT]: ...
965
966 def stream(
967 self,
968 *,
969 response_id: str | Omit = omit,
970 input: Union[str, ResponseInputParam] | Omit = omit,
971 model: ResponsesModel | Omit = omit,
972 background: Optional[bool] | Omit = omit,
973 text_format: type[TextFormatT] | Omit = omit,
974 tools: Iterable[ParseableToolParam] | Omit = omit,
975 conversation: Optional[response_create_params.Conversation] | Omit = omit,
976 include: Optional[List[ResponseIncludable]] | Omit = omit,
977 instructions: Optional[str] | Omit = omit,
978 max_output_tokens: Optional[int] | Omit = omit,
979 max_tool_calls: Optional[int] | Omit = omit,
980 metadata: Optional[Metadata] | Omit = omit,
981 parallel_tool_calls: Optional[bool] | Omit = omit,
982 previous_response_id: Optional[str] | Omit = omit,
983 prompt: Optional[ResponsePromptParam] | Omit = omit,
984 prompt_cache_key: str | Omit = omit,
985 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
986 reasoning: Optional[Reasoning] | Omit = omit,
987 safety_identifier: str | Omit = omit,
988 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
989 store: Optional[bool] | Omit = omit,
990 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
991 temperature: Optional[float] | Omit = omit,
992 text: ResponseTextConfigParam | Omit = omit,
993 tool_choice: response_create_params.ToolChoice | Omit = omit,
994 top_logprobs: Optional[int] | Omit = omit,
995 top_p: Optional[float] | Omit = omit,
996 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
997 user: str | Omit = omit,
998 starting_after: int | Omit = omit,
999 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1000 # The extra values given here take precedence over values defined on the client or passed to this method.
1001 extra_headers: Headers | None = None,
1002 extra_query: Query | None = None,
1003 extra_body: Body | None = None,
1004 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1005 ) -> ResponseStreamManager[TextFormatT]:
1006 new_response_args = {
1007 "input": input,
1008 "model": model,
1009 "conversation": conversation,
1010 "include": include,
1011 "instructions": instructions,
1012 "max_output_tokens": max_output_tokens,
1013 "max_tool_calls": max_tool_calls,
1014 "metadata": metadata,
1015 "parallel_tool_calls": parallel_tool_calls,
1016 "previous_response_id": previous_response_id,
1017 "prompt": prompt,
1018 "prompt_cache_key": prompt_cache_key,
1019 "prompt_cache_retention": prompt_cache_retention,
1020 "reasoning": reasoning,
1021 "safety_identifier": safety_identifier,
1022 "service_tier": service_tier,
1023 "store": store,
1024 "stream_options": stream_options,
1025 "temperature": temperature,
1026 "text": text,
1027 "tool_choice": tool_choice,
1028 "top_logprobs": top_logprobs,
1029 "top_p": top_p,
1030 "truncation": truncation,
1031 "user": user,
1032 "background": background,
1033 }
1034 new_response_args_names = [k for k, v in new_response_args.items() if is_given(v)]
1035
1036 if (is_given(response_id) or is_given(starting_after)) and len(new_response_args_names) > 0:
1037 raise ValueError(
1038 "Cannot provide both response_id/starting_after can't be provided together with "
1039 + ", ".join(new_response_args_names)
1040 )
1041 tools = _make_tools(tools)
1042 if len(new_response_args_names) > 0:
1043 if not is_given(input):
1044 raise ValueError("input must be provided when creating a new response")
1045
1046 if not is_given(model):
1047 raise ValueError("model must be provided when creating a new response")
1048
1049 if is_given(text_format):
1050 if not text:
1051 text = {}
1052
1053 if "format" in text:
1054 raise TypeError("Cannot mix and match text.format with text_format")
1055
1056 text = copy(text)
1057 text["format"] = _type_to_text_format_param(text_format)
1058
1059 api_request: partial[Stream[ResponseStreamEvent]] = partial(
1060 self.create,
1061 input=input,
1062 model=model,
1063 tools=tools,
1064 conversation=conversation,
1065 include=include,
1066 instructions=instructions,
1067 max_output_tokens=max_output_tokens,
1068 max_tool_calls=max_tool_calls,
1069 metadata=metadata,
1070 parallel_tool_calls=parallel_tool_calls,
1071 previous_response_id=previous_response_id,
1072 prompt=prompt,
1073 prompt_cache_key=prompt_cache_key,
1074 prompt_cache_retention=prompt_cache_retention,
1075 store=store,
1076 stream_options=stream_options,
1077 stream=True,
1078 temperature=temperature,
1079 text=text,
1080 tool_choice=tool_choice,
1081 reasoning=reasoning,
1082 safety_identifier=safety_identifier,
1083 service_tier=service_tier,
1084 top_logprobs=top_logprobs,
1085 top_p=top_p,
1086 truncation=truncation,
1087 user=user,
1088 background=background,
1089 extra_headers=extra_headers,
1090 extra_query=extra_query,
1091 extra_body=extra_body,
1092 timeout=timeout,
1093 )
1094
1095 return ResponseStreamManager(api_request, text_format=text_format, input_tools=tools, starting_after=None)
1096 else:
1097 if not is_given(response_id):
1098 raise ValueError("id must be provided when streaming an existing response")
1099
1100 return ResponseStreamManager(
1101 lambda: self.retrieve(
1102 response_id=response_id,
1103 stream=True,
1104 include=include or [],
1105 extra_headers=extra_headers,
1106 extra_query=extra_query,
1107 extra_body=extra_body,
1108 starting_after=omit,
1109 timeout=timeout,
1110 ),
1111 text_format=text_format,
1112 input_tools=tools,
1113 starting_after=starting_after if is_given(starting_after) else None,
1114 )
1115
1116 def parse(
1117 self,
1118 *,
1119 text_format: type[TextFormatT] | Omit = omit,
1120 background: Optional[bool] | Omit = omit,
1121 conversation: Optional[response_create_params.Conversation] | Omit = omit,
1122 include: Optional[List[ResponseIncludable]] | Omit = omit,
1123 input: Union[str, ResponseInputParam] | Omit = omit,
1124 instructions: Optional[str] | Omit = omit,
1125 max_output_tokens: Optional[int] | Omit = omit,
1126 max_tool_calls: Optional[int] | Omit = omit,
1127 metadata: Optional[Metadata] | Omit = omit,
1128 model: ResponsesModel | Omit = omit,
1129 parallel_tool_calls: Optional[bool] | Omit = omit,
1130 previous_response_id: Optional[str] | Omit = omit,
1131 prompt: Optional[ResponsePromptParam] | Omit = omit,
1132 prompt_cache_key: str | Omit = omit,
1133 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1134 reasoning: Optional[Reasoning] | Omit = omit,
1135 safety_identifier: str | Omit = omit,
1136 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1137 store: Optional[bool] | Omit = omit,
1138 stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
1139 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
1140 temperature: Optional[float] | Omit = omit,
1141 text: ResponseTextConfigParam | Omit = omit,
1142 tool_choice: response_create_params.ToolChoice | Omit = omit,
1143 tools: Iterable[ParseableToolParam] | Omit = omit,
1144 top_logprobs: Optional[int] | Omit = omit,
1145 top_p: Optional[float] | Omit = omit,
1146 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
1147 user: str | Omit = omit,
1148 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
1149 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1150 # The extra values given here take precedence over values defined on the client or passed to this method.
1151 extra_headers: Headers | None = None,
1152 extra_query: Query | None = None,
1153 extra_body: Body | None = None,
1154 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1155 ) -> ParsedResponse[TextFormatT]:
1156 if is_given(text_format):
1157 if not text:
1158 text = {}
1159
1160 if "format" in text:
1161 raise TypeError("Cannot mix and match text.format with text_format")
1162 text = copy(text)
1163 text["format"] = _type_to_text_format_param(text_format)
1164
1165 tools = _make_tools(tools)
1166
1167 def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
1168 return parse_response(
1169 input_tools=tools,
1170 text_format=text_format,
1171 response=raw_response,
1172 )
1173
1174 return self._post(
1175 "/responses",
1176 body=maybe_transform(
1177 {
1178 "background": background,
1179 "conversation": conversation,
1180 "include": include,
1181 "input": input,
1182 "instructions": instructions,
1183 "max_output_tokens": max_output_tokens,
1184 "max_tool_calls": max_tool_calls,
1185 "metadata": metadata,
1186 "model": model,
1187 "parallel_tool_calls": parallel_tool_calls,
1188 "previous_response_id": previous_response_id,
1189 "prompt": prompt,
1190 "prompt_cache_key": prompt_cache_key,
1191 "prompt_cache_retention": prompt_cache_retention,
1192 "reasoning": reasoning,
1193 "safety_identifier": safety_identifier,
1194 "service_tier": service_tier,
1195 "store": store,
1196 "stream": stream,
1197 "stream_options": stream_options,
1198 "temperature": temperature,
1199 "text": text,
1200 "tool_choice": tool_choice,
1201 "tools": tools,
1202 "top_logprobs": top_logprobs,
1203 "top_p": top_p,
1204 "truncation": truncation,
1205 "user": user,
1206 "verbosity": verbosity,
1207 },
1208 response_create_params.ResponseCreateParams,
1209 ),
1210 options=make_request_options(
1211 extra_headers=extra_headers,
1212 extra_query=extra_query,
1213 extra_body=extra_body,
1214 timeout=timeout,
1215 post_parser=parser,
1216 ),
1217 # we turn the `Response` instance into a `ParsedResponse`
1218 # in the `parser` function above
1219 cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
1220 )
1221
1222 @overload
1223 def retrieve(
1224 self,
1225 response_id: str,
1226 *,
1227 include: List[ResponseIncludable] | Omit = omit,
1228 include_obfuscation: bool | Omit = omit,
1229 starting_after: int | Omit = omit,
1230 stream: Literal[False] | Omit = omit,
1231 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1232 # The extra values given here take precedence over values defined on the client or passed to this method.
1233 extra_headers: Headers | None = None,
1234 extra_query: Query | None = None,
1235 extra_body: Body | None = None,
1236 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1237 ) -> Response: ...
1238
1239 @overload
1240 def retrieve(
1241 self,
1242 response_id: str,
1243 *,
1244 stream: Literal[True],
1245 include: List[ResponseIncludable] | Omit = omit,
1246 starting_after: int | Omit = omit,
1247 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1248 # The extra values given here take precedence over values defined on the client or passed to this method.
1249 extra_headers: Headers | None = None,
1250 extra_query: Query | None = None,
1251 extra_body: Body | None = None,
1252 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1253 ) -> Stream[ResponseStreamEvent]: ...
1254
1255 @overload
1256 def retrieve(
1257 self,
1258 response_id: str,
1259 *,
1260 stream: bool,
1261 include: List[ResponseIncludable] | Omit = omit,
1262 starting_after: int | Omit = omit,
1263 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1264 # The extra values given here take precedence over values defined on the client or passed to this method.
1265 extra_headers: Headers | None = None,
1266 extra_query: Query | None = None,
1267 extra_body: Body | None = None,
1268 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1269 ) -> Response | Stream[ResponseStreamEvent]: ...
1270
1271 @overload
1272 def retrieve(
1273 self,
1274 response_id: str,
1275 *,
1276 stream: bool = False,
1277 include: List[ResponseIncludable] | Omit = omit,
1278 starting_after: int | Omit = omit,
1279 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1280 # The extra values given here take precedence over values defined on the client or passed to this method.
1281 extra_headers: Headers | None = None,
1282 extra_query: Query | None = None,
1283 extra_body: Body | None = None,
1284 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1285 ) -> Response | Stream[ResponseStreamEvent]:
1286 """
1287 Retrieves a model response with the given ID.
1288
1289 Args:
1290 include: Additional fields to include in the response. See the `include` parameter for
1291 Response creation above for more information.
1292
1293 include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
1294 characters to an `obfuscation` field on streaming delta events to normalize
1295 payload sizes as a mitigation to certain side-channel attacks. These obfuscation
1296 fields are included by default, but add a small amount of overhead to the data
1297 stream. You can set `include_obfuscation` to false to optimize for bandwidth if
1298 you trust the network links between your application and the OpenAI API.
1299
1300 starting_after: The sequence number of the event after which to start streaming.
1301
1302 stream: If set to true, the model response data will be streamed to the client as it is
1303 generated using
1304 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
1305 See the
1306 [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
1307 for more information.
1308
1309 extra_headers: Send extra headers
1310
1311 extra_query: Add additional query parameters to the request
1312
1313 extra_body: Add additional JSON properties to the request
1314
1315 timeout: Override the client-level default timeout for this request, in seconds
1316 """
1317 ...
1318
1319 @overload
1320 def retrieve(
1321 self,
1322 response_id: str,
1323 *,
1324 stream: Literal[True],
1325 include: List[ResponseIncludable] | Omit = omit,
1326 include_obfuscation: bool | Omit = omit,
1327 starting_after: int | Omit = omit,
1328 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1329 # The extra values given here take precedence over values defined on the client or passed to this method.
1330 extra_headers: Headers | None = None,
1331 extra_query: Query | None = None,
1332 extra_body: Body | None = None,
1333 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1334 ) -> Stream[ResponseStreamEvent]:
1335 """
1336 Retrieves a model response with the given ID.
1337
1338 Args:
1339 stream: If set to true, the model response data will be streamed to the client as it is
1340 generated using
1341 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
1342 See the
1343 [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
1344 for more information.
1345
1346 include: Additional fields to include in the response. See the `include` parameter for
1347 Response creation above for more information.
1348
1349 include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
1350 characters to an `obfuscation` field on streaming delta events to normalize
1351 payload sizes as a mitigation to certain side-channel attacks. These obfuscation
1352 fields are included by default, but add a small amount of overhead to the data
1353 stream. You can set `include_obfuscation` to false to optimize for bandwidth if
1354 you trust the network links between your application and the OpenAI API.
1355
1356 starting_after: The sequence number of the event after which to start streaming.
1357
1358 extra_headers: Send extra headers
1359
1360 extra_query: Add additional query parameters to the request
1361
1362 extra_body: Add additional JSON properties to the request
1363
1364 timeout: Override the client-level default timeout for this request, in seconds
1365 """
1366 ...
1367
1368 @overload
1369 def retrieve(
1370 self,
1371 response_id: str,
1372 *,
1373 stream: bool,
1374 include: List[ResponseIncludable] | Omit = omit,
1375 include_obfuscation: bool | Omit = omit,
1376 starting_after: int | Omit = omit,
1377 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1378 # The extra values given here take precedence over values defined on the client or passed to this method.
1379 extra_headers: Headers | None = None,
1380 extra_query: Query | None = None,
1381 extra_body: Body | None = None,
1382 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1383 ) -> Response | Stream[ResponseStreamEvent]:
1384 """
1385 Retrieves a model response with the given ID.
1386
1387 Args:
1388 stream: If set to true, the model response data will be streamed to the client as it is
1389 generated using
1390 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
1391 See the
1392 [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
1393 for more information.
1394
1395 include: Additional fields to include in the response. See the `include` parameter for
1396 Response creation above for more information.
1397
1398 include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
1399 characters to an `obfuscation` field on streaming delta events to normalize
1400 payload sizes as a mitigation to certain side-channel attacks. These obfuscation
1401 fields are included by default, but add a small amount of overhead to the data
1402 stream. You can set `include_obfuscation` to false to optimize for bandwidth if
1403 you trust the network links between your application and the OpenAI API.
1404
1405 starting_after: The sequence number of the event after which to start streaming.
1406
1407 extra_headers: Send extra headers
1408
1409 extra_query: Add additional query parameters to the request
1410
1411 extra_body: Add additional JSON properties to the request
1412
1413 timeout: Override the client-level default timeout for this request, in seconds
1414 """
1415 ...
1416
1417 def retrieve(
1418 self,
1419 response_id: str,
1420 *,
1421 include: List[ResponseIncludable] | Omit = omit,
1422 include_obfuscation: bool | Omit = omit,
1423 starting_after: int | Omit = omit,
1424 stream: Literal[False] | Literal[True] | Omit = omit,
1425 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1426 # The extra values given here take precedence over values defined on the client or passed to this method.
1427 extra_headers: Headers | None = None,
1428 extra_query: Query | None = None,
1429 extra_body: Body | None = None,
1430 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1431 ) -> Response | Stream[ResponseStreamEvent]:
1432 if not response_id:
1433 raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
1434 return self._get(
1435 f"/responses/{response_id}",
1436 options=make_request_options(
1437 extra_headers=extra_headers,
1438 extra_query=extra_query,
1439 extra_body=extra_body,
1440 timeout=timeout,
1441 query=maybe_transform(
1442 {
1443 "include": include,
1444 "include_obfuscation": include_obfuscation,
1445 "starting_after": starting_after,
1446 "stream": stream,
1447 },
1448 response_retrieve_params.ResponseRetrieveParams,
1449 ),
1450 ),
1451 cast_to=Response,
1452 stream=stream or False,
1453 stream_cls=Stream[ResponseStreamEvent],
1454 )
1455
1456 def delete(
1457 self,
1458 response_id: str,
1459 *,
1460 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1461 # The extra values given here take precedence over values defined on the client or passed to this method.
1462 extra_headers: Headers | None = None,
1463 extra_query: Query | None = None,
1464 extra_body: Body | None = None,
1465 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1466 ) -> None:
1467 """
1468 Deletes a model response with the given ID.
1469
1470 Args:
1471 extra_headers: Send extra headers
1472
1473 extra_query: Add additional query parameters to the request
1474
1475 extra_body: Add additional JSON properties to the request
1476
1477 timeout: Override the client-level default timeout for this request, in seconds
1478 """
1479 if not response_id:
1480 raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
1481 extra_headers = {"Accept": "*/*", **(extra_headers or {})}
1482 return self._delete(
1483 f"/responses/{response_id}",
1484 options=make_request_options(
1485 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1486 ),
1487 cast_to=NoneType,
1488 )
1489
1490 def cancel(
1491 self,
1492 response_id: str,
1493 *,
1494 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1495 # The extra values given here take precedence over values defined on the client or passed to this method.
1496 extra_headers: Headers | None = None,
1497 extra_query: Query | None = None,
1498 extra_body: Body | None = None,
1499 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1500 ) -> Response:
1501 """Cancels a model response with the given ID.
1502
1503 Only responses created with the
1504 `background` parameter set to `true` can be cancelled.
1505 [Learn more](https://platform.openai.com/docs/guides/background).
1506
1507 Args:
1508 extra_headers: Send extra headers
1509
1510 extra_query: Add additional query parameters to the request
1511
1512 extra_body: Add additional JSON properties to the request
1513
1514 timeout: Override the client-level default timeout for this request, in seconds
1515 """
1516 if not response_id:
1517 raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
1518 return self._post(
1519 f"/responses/{response_id}/cancel",
1520 options=make_request_options(
1521 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1522 ),
1523 cast_to=Response,
1524 )
1525
1526 def compact(
1527 self,
1528 *,
1529 input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
1530 instructions: Optional[str] | Omit = omit,
1531 model: Union[
1532 Literal[
1533 "gpt-5.1",
1534 "gpt-5.1-2025-11-13",
1535 "gpt-5.1-codex",
1536 "gpt-5.1-mini",
1537 "gpt-5.1-chat-latest",
1538 "gpt-5",
1539 "gpt-5-mini",
1540 "gpt-5-nano",
1541 "gpt-5-2025-08-07",
1542 "gpt-5-mini-2025-08-07",
1543 "gpt-5-nano-2025-08-07",
1544 "gpt-5-chat-latest",
1545 "gpt-4.1",
1546 "gpt-4.1-mini",
1547 "gpt-4.1-nano",
1548 "gpt-4.1-2025-04-14",
1549 "gpt-4.1-mini-2025-04-14",
1550 "gpt-4.1-nano-2025-04-14",
1551 "o4-mini",
1552 "o4-mini-2025-04-16",
1553 "o3",
1554 "o3-2025-04-16",
1555 "o3-mini",
1556 "o3-mini-2025-01-31",
1557 "o1",
1558 "o1-2024-12-17",
1559 "o1-preview",
1560 "o1-preview-2024-09-12",
1561 "o1-mini",
1562 "o1-mini-2024-09-12",
1563 "gpt-4o",
1564 "gpt-4o-2024-11-20",
1565 "gpt-4o-2024-08-06",
1566 "gpt-4o-2024-05-13",
1567 "gpt-4o-audio-preview",
1568 "gpt-4o-audio-preview-2024-10-01",
1569 "gpt-4o-audio-preview-2024-12-17",
1570 "gpt-4o-audio-preview-2025-06-03",
1571 "gpt-4o-mini-audio-preview",
1572 "gpt-4o-mini-audio-preview-2024-12-17",
1573 "gpt-4o-search-preview",
1574 "gpt-4o-mini-search-preview",
1575 "gpt-4o-search-preview-2025-03-11",
1576 "gpt-4o-mini-search-preview-2025-03-11",
1577 "chatgpt-4o-latest",
1578 "codex-mini-latest",
1579 "gpt-4o-mini",
1580 "gpt-4o-mini-2024-07-18",
1581 "gpt-4-turbo",
1582 "gpt-4-turbo-2024-04-09",
1583 "gpt-4-0125-preview",
1584 "gpt-4-turbo-preview",
1585 "gpt-4-1106-preview",
1586 "gpt-4-vision-preview",
1587 "gpt-4",
1588 "gpt-4-0314",
1589 "gpt-4-0613",
1590 "gpt-4-32k",
1591 "gpt-4-32k-0314",
1592 "gpt-4-32k-0613",
1593 "gpt-3.5-turbo",
1594 "gpt-3.5-turbo-16k",
1595 "gpt-3.5-turbo-0301",
1596 "gpt-3.5-turbo-0613",
1597 "gpt-3.5-turbo-1106",
1598 "gpt-3.5-turbo-0125",
1599 "gpt-3.5-turbo-16k-0613",
1600 "o1-pro",
1601 "o1-pro-2025-03-19",
1602 "o3-pro",
1603 "o3-pro-2025-06-10",
1604 "o3-deep-research",
1605 "o3-deep-research-2025-06-26",
1606 "o4-mini-deep-research",
1607 "o4-mini-deep-research-2025-06-26",
1608 "computer-use-preview",
1609 "computer-use-preview-2025-03-11",
1610 "gpt-5-codex",
1611 "gpt-5-pro",
1612 "gpt-5-pro-2025-10-06",
1613 "gpt-5.1-codex-max",
1614 ],
1615 str,
1616 None,
1617 ]
1618 | Omit = omit,
1619 previous_response_id: Optional[str] | Omit = omit,
1620 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1621 # The extra values given here take precedence over values defined on the client or passed to this method.
1622 extra_headers: Headers | None = None,
1623 extra_query: Query | None = None,
1624 extra_body: Body | None = None,
1625 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1626 ) -> CompactedResponse:
1627 """
1628 Compact conversation
1629
1630 Args:
1631 input: Text, image, or file inputs to the model, used to generate a response
1632
1633 instructions: A system (or developer) message inserted into the model's context. When used
1634 along with `previous_response_id`, the instructions from a previous response
1635 will not be carried over to the next response. This makes it simple to swap out
1636 system (or developer) messages in new responses.
1637
1638 model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
1639 wide range of models with different capabilities, performance characteristics,
1640 and price points. Refer to the
1641 [model guide](https://platform.openai.com/docs/models) to browse and compare
1642 available models.
1643
1644 previous_response_id: The unique ID of the previous response to the model. Use this to create
1645 multi-turn conversations. Learn more about
1646 [conversation state](https://platform.openai.com/docs/guides/conversation-state).
1647 Cannot be used in conjunction with `conversation`.
1648
1649 extra_headers: Send extra headers
1650
1651 extra_query: Add additional query parameters to the request
1652
1653 extra_body: Add additional JSON properties to the request
1654
1655 timeout: Override the client-level default timeout for this request, in seconds
1656 """
1657 return self._post(
1658 "/responses/compact",
1659 body=maybe_transform(
1660 {
1661 "input": input,
1662 "instructions": instructions,
1663 "model": model,
1664 "previous_response_id": previous_response_id,
1665 },
1666 response_compact_params.ResponseCompactParams,
1667 ),
1668 options=make_request_options(
1669 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1670 ),
1671 cast_to=CompactedResponse,
1672 )
1673
1674
1675class AsyncResponses(AsyncAPIResource):
1676 @cached_property
1677 def input_items(self) -> AsyncInputItems:
1678 return AsyncInputItems(self._client)
1679
1680 @cached_property
1681 def input_tokens(self) -> AsyncInputTokens:
1682 return AsyncInputTokens(self._client)
1683
1684 @cached_property
1685 def with_raw_response(self) -> AsyncResponsesWithRawResponse:
1686 """
1687 This property can be used as a prefix for any HTTP method call to return
1688 the raw response object instead of the parsed content.
1689
1690 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
1691 """
1692 return AsyncResponsesWithRawResponse(self)
1693
1694 @cached_property
1695 def with_streaming_response(self) -> AsyncResponsesWithStreamingResponse:
1696 """
1697 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
1698
1699 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
1700 """
1701 return AsyncResponsesWithStreamingResponse(self)
1702
1703 @overload
1704 async def create(
1705 self,
1706 *,
1707 background: Optional[bool] | Omit = omit,
1708 conversation: Optional[response_create_params.Conversation] | Omit = omit,
1709 include: Optional[List[ResponseIncludable]] | Omit = omit,
1710 input: Union[str, ResponseInputParam] | Omit = omit,
1711 instructions: Optional[str] | Omit = omit,
1712 max_output_tokens: Optional[int] | Omit = omit,
1713 max_tool_calls: Optional[int] | Omit = omit,
1714 metadata: Optional[Metadata] | Omit = omit,
1715 model: ResponsesModel | Omit = omit,
1716 parallel_tool_calls: Optional[bool] | Omit = omit,
1717 previous_response_id: Optional[str] | Omit = omit,
1718 prompt: Optional[ResponsePromptParam] | Omit = omit,
1719 prompt_cache_key: str | Omit = omit,
1720 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1721 reasoning: Optional[Reasoning] | Omit = omit,
1722 safety_identifier: str | Omit = omit,
1723 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1724 store: Optional[bool] | Omit = omit,
1725 stream: Optional[Literal[False]] | Omit = omit,
1726 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
1727 temperature: Optional[float] | Omit = omit,
1728 text: ResponseTextConfigParam | Omit = omit,
1729 tool_choice: response_create_params.ToolChoice | Omit = omit,
1730 tools: Iterable[ToolParam] | Omit = omit,
1731 top_logprobs: Optional[int] | Omit = omit,
1732 top_p: Optional[float] | Omit = omit,
1733 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
1734 user: str | Omit = omit,
1735 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1736 # The extra values given here take precedence over values defined on the client or passed to this method.
1737 extra_headers: Headers | None = None,
1738 extra_query: Query | None = None,
1739 extra_body: Body | None = None,
1740 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1741 ) -> Response:
1742 """Creates a model response.
1743
1744 Provide
1745 [text](https://platform.openai.com/docs/guides/text) or
1746 [image](https://platform.openai.com/docs/guides/images) inputs to generate
1747 [text](https://platform.openai.com/docs/guides/text) or
1748 [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
1749 the model call your own
1750 [custom code](https://platform.openai.com/docs/guides/function-calling) or use
1751 built-in [tools](https://platform.openai.com/docs/guides/tools) like
1752 [web search](https://platform.openai.com/docs/guides/tools-web-search) or
1753 [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
1754 your own data as input for the model's response.
1755
1756 Args:
1757 background: Whether to run the model response in the background.
1758 [Learn more](https://platform.openai.com/docs/guides/background).
1759
1760 conversation: The conversation that this response belongs to. Items from this conversation are
1761 prepended to `input_items` for this response request. Input items and output
1762 items from this response are automatically added to this conversation after this
1763 response completes.
1764
1765 include: Specify additional output data to include in the model response. Currently
1766 supported values are:
1767
1768 - `web_search_call.action.sources`: Include the sources of the web search tool
1769 call.
1770 - `code_interpreter_call.outputs`: Includes the outputs of python code execution
1771 in code interpreter tool call items.
1772 - `computer_call_output.output.image_url`: Include image urls from the computer
1773 call output.
1774 - `file_search_call.results`: Include the search results of the file search tool
1775 call.
1776 - `message.input_image.image_url`: Include image urls from the input message.
1777 - `message.output_text.logprobs`: Include logprobs with assistant messages.
1778 - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
1779 tokens in reasoning item outputs. This enables reasoning items to be used in
1780 multi-turn conversations when using the Responses API statelessly (like when
1781 the `store` parameter is set to `false`, or when an organization is enrolled
1782 in the zero data retention program).
1783
1784 input: Text, image, or file inputs to the model, used to generate a response.
1785
1786 Learn more:
1787
1788 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
1789 - [Image inputs](https://platform.openai.com/docs/guides/images)
1790 - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
1791 - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
1792 - [Function calling](https://platform.openai.com/docs/guides/function-calling)
1793
1794 instructions: A system (or developer) message inserted into the model's context.
1795
1796 When using along with `previous_response_id`, the instructions from a previous
1797 response will not be carried over to the next response. This makes it simple to
1798 swap out system (or developer) messages in new responses.
1799
1800 max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
1801 including visible output tokens and
1802 [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
1803
1804 max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
1805 response. This maximum number applies across all built-in tool calls, not per
1806 individual tool. Any further attempts to call a tool by the model will be
1807 ignored.
1808
1809 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1810 for storing additional information about the object in a structured format, and
1811 querying for objects via API or the dashboard.
1812
1813 Keys are strings with a maximum length of 64 characters. Values are strings with
1814 a maximum length of 512 characters.
1815
1816 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
1817 wide range of models with different capabilities, performance characteristics,
1818 and price points. Refer to the
1819 [model guide](https://platform.openai.com/docs/models) to browse and compare
1820 available models.
1821
1822 parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
1823
1824 previous_response_id: The unique ID of the previous response to the model. Use this to create
1825 multi-turn conversations. Learn more about
1826 [conversation state](https://platform.openai.com/docs/guides/conversation-state).
1827 Cannot be used in conjunction with `conversation`.
1828
1829 prompt: Reference to a prompt template and its variables.
1830 [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
1831
1832 prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
1833 hit rates. Replaces the `user` field.
1834 [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
1835
1836 prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
1837 prompt caching, which keeps cached prefixes active for longer, up to a maximum
1838 of 24 hours.
1839 [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
1840
1841 reasoning: **gpt-5 and o-series models only**
1842
1843 Configuration options for
1844 [reasoning models](https://platform.openai.com/docs/guides/reasoning).
1845
1846 safety_identifier: A stable identifier used to help detect users of your application that may be
1847 violating OpenAI's usage policies. The IDs should be a string that uniquely
1848 identifies each user. We recommend hashing their username or email address, in
1849 order to avoid sending us any identifying information.
1850 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
1851
1852 service_tier: Specifies the processing type used for serving the request.
1853
1854 - If set to 'auto', then the request will be processed with the service tier
1855 configured in the Project settings. Unless otherwise configured, the Project
1856 will use 'default'.
1857 - If set to 'default', then the request will be processed with the standard
1858 pricing and performance for the selected model.
1859 - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
1860 '[priority](https://openai.com/api-priority-processing/)', then the request
1861 will be processed with the corresponding service tier.
1862 - When not set, the default behavior is 'auto'.
1863
1864 When the `service_tier` parameter is set, the response body will include the
1865 `service_tier` value based on the processing mode actually used to serve the
1866 request. This response value may be different from the value set in the
1867 parameter.
1868
1869 store: Whether to store the generated model response for later retrieval via API.
1870
1871 stream: If set to true, the model response data will be streamed to the client as it is
1872 generated using
1873 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
1874 See the
1875 [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
1876 for more information.
1877
1878 stream_options: Options for streaming responses. Only set this when you set `stream: true`.
1879
1880 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1881 make the output more random, while lower values like 0.2 will make it more
1882 focused and deterministic. We generally recommend altering this or `top_p` but
1883 not both.
1884
1885 text: Configuration options for a text response from the model. Can be plain text or
1886 structured JSON data. Learn more:
1887
1888 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
1889 - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
1890
1891 tool_choice: How the model should select which tool (or tools) to use when generating a
1892 response. See the `tools` parameter to see how to specify which tools the model
1893 can call.
1894
1895 tools: An array of tools the model may call while generating a response. You can
1896 specify which tool to use by setting the `tool_choice` parameter.
1897
1898 We support the following categories of tools:
1899
1900 - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
1901 capabilities, like
1902 [web search](https://platform.openai.com/docs/guides/tools-web-search) or
1903 [file search](https://platform.openai.com/docs/guides/tools-file-search).
1904 Learn more about
1905 [built-in tools](https://platform.openai.com/docs/guides/tools).
1906 - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
1907 predefined connectors such as Google Drive and SharePoint. Learn more about
1908 [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
1909 - **Function calls (custom tools)**: Functions that are defined by you, enabling
1910 the model to call your own code with strongly typed arguments and outputs.
1911 Learn more about
1912 [function calling](https://platform.openai.com/docs/guides/function-calling).
1913 You can also use custom tools to call your own code.
1914
1915 top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
1916 return at each token position, each with an associated log probability.
1917
1918 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1919 model considers the results of the tokens with top_p probability mass. So 0.1
1920 means only the tokens comprising the top 10% probability mass are considered.
1921
1922 We generally recommend altering this or `temperature` but not both.
1923
1924 truncation: The truncation strategy to use for the model response.
1925
1926 - `auto`: If the input to this Response exceeds the model's context window size,
1927 the model will truncate the response to fit the context window by dropping
1928 items from the beginning of the conversation.
1929 - `disabled` (default): If the input size will exceed the context window size
1930 for a model, the request will fail with a 400 error.
1931
1932 user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
1933 `prompt_cache_key` instead to maintain caching optimizations. A stable
1934 identifier for your end-users. Used to boost cache hit rates by better bucketing
1935 similar requests and to help OpenAI detect and prevent abuse.
1936 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
1937
1938 extra_headers: Send extra headers
1939
1940 extra_query: Add additional query parameters to the request
1941
1942 extra_body: Add additional JSON properties to the request
1943
1944 timeout: Override the client-level default timeout for this request, in seconds
1945 """
1946 ...
1947
1948 @overload
1949 async def create(
1950 self,
1951 *,
1952 stream: Literal[True],
1953 background: Optional[bool] | Omit = omit,
1954 conversation: Optional[response_create_params.Conversation] | Omit = omit,
1955 include: Optional[List[ResponseIncludable]] | Omit = omit,
1956 input: Union[str, ResponseInputParam] | Omit = omit,
1957 instructions: Optional[str] | Omit = omit,
1958 max_output_tokens: Optional[int] | Omit = omit,
1959 max_tool_calls: Optional[int] | Omit = omit,
1960 metadata: Optional[Metadata] | Omit = omit,
1961 model: ResponsesModel | Omit = omit,
1962 parallel_tool_calls: Optional[bool] | Omit = omit,
1963 previous_response_id: Optional[str] | Omit = omit,
1964 prompt: Optional[ResponsePromptParam] | Omit = omit,
1965 prompt_cache_key: str | Omit = omit,
1966 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1967 reasoning: Optional[Reasoning] | Omit = omit,
1968 safety_identifier: str | Omit = omit,
1969 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1970 store: Optional[bool] | Omit = omit,
1971 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
1972 temperature: Optional[float] | Omit = omit,
1973 text: ResponseTextConfigParam | Omit = omit,
1974 tool_choice: response_create_params.ToolChoice | Omit = omit,
1975 tools: Iterable[ToolParam] | Omit = omit,
1976 top_logprobs: Optional[int] | Omit = omit,
1977 top_p: Optional[float] | Omit = omit,
1978 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
1979 user: str | Omit = omit,
1980 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1981 # The extra values given here take precedence over values defined on the client or passed to this method.
1982 extra_headers: Headers | None = None,
1983 extra_query: Query | None = None,
1984 extra_body: Body | None = None,
1985 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1986 ) -> AsyncStream[ResponseStreamEvent]:
1987 """Creates a model response.
1988
1989 Provide
1990 [text](https://platform.openai.com/docs/guides/text) or
1991 [image](https://platform.openai.com/docs/guides/images) inputs to generate
1992 [text](https://platform.openai.com/docs/guides/text) or
1993 [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
1994 the model call your own
1995 [custom code](https://platform.openai.com/docs/guides/function-calling) or use
1996 built-in [tools](https://platform.openai.com/docs/guides/tools) like
1997 [web search](https://platform.openai.com/docs/guides/tools-web-search) or
1998 [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
1999 your own data as input for the model's response.
2000
2001 Args:
2002 stream: If set to true, the model response data will be streamed to the client as it is
2003 generated using
2004 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
2005 See the
2006 [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
2007 for more information.
2008
2009 background: Whether to run the model response in the background.
2010 [Learn more](https://platform.openai.com/docs/guides/background).
2011
2012 conversation: The conversation that this response belongs to. Items from this conversation are
2013 prepended to `input_items` for this response request. Input items and output
2014 items from this response are automatically added to this conversation after this
2015 response completes.
2016
2017 include: Specify additional output data to include in the model response. Currently
2018 supported values are:
2019
2020 - `web_search_call.action.sources`: Include the sources of the web search tool
2021 call.
2022 - `code_interpreter_call.outputs`: Includes the outputs of python code execution
2023 in code interpreter tool call items.
2024 - `computer_call_output.output.image_url`: Include image urls from the computer
2025 call output.
2026 - `file_search_call.results`: Include the search results of the file search tool
2027 call.
2028 - `message.input_image.image_url`: Include image urls from the input message.
2029 - `message.output_text.logprobs`: Include logprobs with assistant messages.
2030 - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
2031 tokens in reasoning item outputs. This enables reasoning items to be used in
2032 multi-turn conversations when using the Responses API statelessly (like when
2033 the `store` parameter is set to `false`, or when an organization is enrolled
2034 in the zero data retention program).
2035
2036 input: Text, image, or file inputs to the model, used to generate a response.
2037
2038 Learn more:
2039
2040 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
2041 - [Image inputs](https://platform.openai.com/docs/guides/images)
2042 - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
2043 - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
2044 - [Function calling](https://platform.openai.com/docs/guides/function-calling)
2045
2046 instructions: A system (or developer) message inserted into the model's context.
2047
2048 When using along with `previous_response_id`, the instructions from a previous
2049 response will not be carried over to the next response. This makes it simple to
2050 swap out system (or developer) messages in new responses.
2051
2052 max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
2053 including visible output tokens and
2054 [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
2055
2056 max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
2057 response. This maximum number applies across all built-in tool calls, not per
2058 individual tool. Any further attempts to call a tool by the model will be
2059 ignored.
2060
2061 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
2062 for storing additional information about the object in a structured format, and
2063 querying for objects via API or the dashboard.
2064
2065 Keys are strings with a maximum length of 64 characters. Values are strings with
2066 a maximum length of 512 characters.
2067
2068 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
2069 wide range of models with different capabilities, performance characteristics,
2070 and price points. Refer to the
2071 [model guide](https://platform.openai.com/docs/models) to browse and compare
2072 available models.
2073
2074 parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
2075
2076 previous_response_id: The unique ID of the previous response to the model. Use this to create
2077 multi-turn conversations. Learn more about
2078 [conversation state](https://platform.openai.com/docs/guides/conversation-state).
2079 Cannot be used in conjunction with `conversation`.
2080
2081 prompt: Reference to a prompt template and its variables.
2082 [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
2083
2084 prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
2085 hit rates. Replaces the `user` field.
2086 [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
2087
2088 prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
2089 prompt caching, which keeps cached prefixes active for longer, up to a maximum
2090 of 24 hours.
2091 [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
2092
2093 reasoning: **gpt-5 and o-series models only**
2094
2095 Configuration options for
2096 [reasoning models](https://platform.openai.com/docs/guides/reasoning).
2097
2098 safety_identifier: A stable identifier used to help detect users of your application that may be
2099 violating OpenAI's usage policies. The IDs should be a string that uniquely
2100 identifies each user. We recommend hashing their username or email address, in
2101 order to avoid sending us any identifying information.
2102 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2103
2104 service_tier: Specifies the processing type used for serving the request.
2105
2106 - If set to 'auto', then the request will be processed with the service tier
2107 configured in the Project settings. Unless otherwise configured, the Project
2108 will use 'default'.
2109 - If set to 'default', then the request will be processed with the standard
2110 pricing and performance for the selected model.
2111 - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
2112 '[priority](https://openai.com/api-priority-processing/)', then the request
2113 will be processed with the corresponding service tier.
2114 - When not set, the default behavior is 'auto'.
2115
2116 When the `service_tier` parameter is set, the response body will include the
2117 `service_tier` value based on the processing mode actually used to serve the
2118 request. This response value may be different from the value set in the
2119 parameter.
2120
2121 store: Whether to store the generated model response for later retrieval via API.
2122
2123 stream_options: Options for streaming responses. Only set this when you set `stream: true`.
2124
2125 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
2126 make the output more random, while lower values like 0.2 will make it more
2127 focused and deterministic. We generally recommend altering this or `top_p` but
2128 not both.
2129
2130 text: Configuration options for a text response from the model. Can be plain text or
2131 structured JSON data. Learn more:
2132
2133 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
2134 - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
2135
2136 tool_choice: How the model should select which tool (or tools) to use when generating a
2137 response. See the `tools` parameter to see how to specify which tools the model
2138 can call.
2139
2140 tools: An array of tools the model may call while generating a response. You can
2141 specify which tool to use by setting the `tool_choice` parameter.
2142
2143 We support the following categories of tools:
2144
2145 - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
2146 capabilities, like
2147 [web search](https://platform.openai.com/docs/guides/tools-web-search) or
2148 [file search](https://platform.openai.com/docs/guides/tools-file-search).
2149 Learn more about
2150 [built-in tools](https://platform.openai.com/docs/guides/tools).
2151 - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
2152 predefined connectors such as Google Drive and SharePoint. Learn more about
2153 [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
2154 - **Function calls (custom tools)**: Functions that are defined by you, enabling
2155 the model to call your own code with strongly typed arguments and outputs.
2156 Learn more about
2157 [function calling](https://platform.openai.com/docs/guides/function-calling).
2158 You can also use custom tools to call your own code.
2159
2160 top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
2161 return at each token position, each with an associated log probability.
2162
2163 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
2164 model considers the results of the tokens with top_p probability mass. So 0.1
2165 means only the tokens comprising the top 10% probability mass are considered.
2166
2167 We generally recommend altering this or `temperature` but not both.
2168
2169 truncation: The truncation strategy to use for the model response.
2170
2171 - `auto`: If the input to this Response exceeds the model's context window size,
2172 the model will truncate the response to fit the context window by dropping
2173 items from the beginning of the conversation.
2174 - `disabled` (default): If the input size will exceed the context window size
2175 for a model, the request will fail with a 400 error.
2176
2177 user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
2178 `prompt_cache_key` instead to maintain caching optimizations. A stable
2179 identifier for your end-users. Used to boost cache hit rates by better bucketing
2180 similar requests and to help OpenAI detect and prevent abuse.
2181 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2182
2183 extra_headers: Send extra headers
2184
2185 extra_query: Add additional query parameters to the request
2186
2187 extra_body: Add additional JSON properties to the request
2188
2189 timeout: Override the client-level default timeout for this request, in seconds
2190 """
2191 ...
2192
2193 @overload
2194 async def create(
2195 self,
2196 *,
2197 stream: bool,
2198 background: Optional[bool] | Omit = omit,
2199 conversation: Optional[response_create_params.Conversation] | Omit = omit,
2200 include: Optional[List[ResponseIncludable]] | Omit = omit,
2201 input: Union[str, ResponseInputParam] | Omit = omit,
2202 instructions: Optional[str] | Omit = omit,
2203 max_output_tokens: Optional[int] | Omit = omit,
2204 max_tool_calls: Optional[int] | Omit = omit,
2205 metadata: Optional[Metadata] | Omit = omit,
2206 model: ResponsesModel | Omit = omit,
2207 parallel_tool_calls: Optional[bool] | Omit = omit,
2208 previous_response_id: Optional[str] | Omit = omit,
2209 prompt: Optional[ResponsePromptParam] | Omit = omit,
2210 prompt_cache_key: str | Omit = omit,
2211 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2212 reasoning: Optional[Reasoning] | Omit = omit,
2213 safety_identifier: str | Omit = omit,
2214 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2215 store: Optional[bool] | Omit = omit,
2216 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
2217 temperature: Optional[float] | Omit = omit,
2218 text: ResponseTextConfigParam | Omit = omit,
2219 tool_choice: response_create_params.ToolChoice | Omit = omit,
2220 tools: Iterable[ToolParam] | Omit = omit,
2221 top_logprobs: Optional[int] | Omit = omit,
2222 top_p: Optional[float] | Omit = omit,
2223 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
2224 user: str | Omit = omit,
2225 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2226 # The extra values given here take precedence over values defined on the client or passed to this method.
2227 extra_headers: Headers | None = None,
2228 extra_query: Query | None = None,
2229 extra_body: Body | None = None,
2230 timeout: float | httpx.Timeout | None | NotGiven = not_given,
2231 ) -> Response | AsyncStream[ResponseStreamEvent]:
2232 """Creates a model response.
2233
2234 Provide
2235 [text](https://platform.openai.com/docs/guides/text) or
2236 [image](https://platform.openai.com/docs/guides/images) inputs to generate
2237 [text](https://platform.openai.com/docs/guides/text) or
2238 [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
2239 the model call your own
2240 [custom code](https://platform.openai.com/docs/guides/function-calling) or use
2241 built-in [tools](https://platform.openai.com/docs/guides/tools) like
2242 [web search](https://platform.openai.com/docs/guides/tools-web-search) or
2243 [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
2244 your own data as input for the model's response.
2245
2246 Args:
2247 stream: If set to true, the model response data will be streamed to the client as it is
2248 generated using
2249 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
2250 See the
2251 [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
2252 for more information.
2253
2254 background: Whether to run the model response in the background.
2255 [Learn more](https://platform.openai.com/docs/guides/background).
2256
2257 conversation: The conversation that this response belongs to. Items from this conversation are
2258 prepended to `input_items` for this response request. Input items and output
2259 items from this response are automatically added to this conversation after this
2260 response completes.
2261
2262 include: Specify additional output data to include in the model response. Currently
2263 supported values are:
2264
2265 - `web_search_call.action.sources`: Include the sources of the web search tool
2266 call.
2267 - `code_interpreter_call.outputs`: Includes the outputs of python code execution
2268 in code interpreter tool call items.
2269 - `computer_call_output.output.image_url`: Include image urls from the computer
2270 call output.
2271 - `file_search_call.results`: Include the search results of the file search tool
2272 call.
2273 - `message.input_image.image_url`: Include image urls from the input message.
2274 - `message.output_text.logprobs`: Include logprobs with assistant messages.
2275 - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
2276 tokens in reasoning item outputs. This enables reasoning items to be used in
2277 multi-turn conversations when using the Responses API statelessly (like when
2278 the `store` parameter is set to `false`, or when an organization is enrolled
2279 in the zero data retention program).
2280
2281 input: Text, image, or file inputs to the model, used to generate a response.
2282
2283 Learn more:
2284
2285 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
2286 - [Image inputs](https://platform.openai.com/docs/guides/images)
2287 - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
2288 - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
2289 - [Function calling](https://platform.openai.com/docs/guides/function-calling)
2290
2291 instructions: A system (or developer) message inserted into the model's context.
2292
2293 When using along with `previous_response_id`, the instructions from a previous
2294 response will not be carried over to the next response. This makes it simple to
2295 swap out system (or developer) messages in new responses.
2296
2297 max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
2298 including visible output tokens and
2299 [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
2300
2301 max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
2302 response. This maximum number applies across all built-in tool calls, not per
2303 individual tool. Any further attempts to call a tool by the model will be
2304 ignored.
2305
2306 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
2307 for storing additional information about the object in a structured format, and
2308 querying for objects via API or the dashboard.
2309
2310 Keys are strings with a maximum length of 64 characters. Values are strings with
2311 a maximum length of 512 characters.
2312
2313 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
2314 wide range of models with different capabilities, performance characteristics,
2315 and price points. Refer to the
2316 [model guide](https://platform.openai.com/docs/models) to browse and compare
2317 available models.
2318
2319 parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
2320
2321 previous_response_id: The unique ID of the previous response to the model. Use this to create
2322 multi-turn conversations. Learn more about
2323 [conversation state](https://platform.openai.com/docs/guides/conversation-state).
2324 Cannot be used in conjunction with `conversation`.
2325
2326 prompt: Reference to a prompt template and its variables.
2327 [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
2328
2329 prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
2330 hit rates. Replaces the `user` field.
2331 [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
2332
2333 prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
2334 prompt caching, which keeps cached prefixes active for longer, up to a maximum
2335 of 24 hours.
2336 [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
2337
2338 reasoning: **gpt-5 and o-series models only**
2339
2340 Configuration options for
2341 [reasoning models](https://platform.openai.com/docs/guides/reasoning).
2342
2343 safety_identifier: A stable identifier used to help detect users of your application that may be
2344 violating OpenAI's usage policies. The IDs should be a string that uniquely
2345 identifies each user. We recommend hashing their username or email address, in
2346 order to avoid sending us any identifying information.
2347 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2348
2349 service_tier: Specifies the processing type used for serving the request.
2350
2351 - If set to 'auto', then the request will be processed with the service tier
2352 configured in the Project settings. Unless otherwise configured, the Project
2353 will use 'default'.
2354 - If set to 'default', then the request will be processed with the standard
2355 pricing and performance for the selected model.
2356 - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
2357 '[priority](https://openai.com/api-priority-processing/)', then the request
2358 will be processed with the corresponding service tier.
2359 - When not set, the default behavior is 'auto'.
2360
2361 When the `service_tier` parameter is set, the response body will include the
2362 `service_tier` value based on the processing mode actually used to serve the
2363 request. This response value may be different from the value set in the
2364 parameter.
2365
2366 store: Whether to store the generated model response for later retrieval via API.
2367
2368 stream_options: Options for streaming responses. Only set this when you set `stream: true`.
2369
2370 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
2371 make the output more random, while lower values like 0.2 will make it more
2372 focused and deterministic. We generally recommend altering this or `top_p` but
2373 not both.
2374
2375 text: Configuration options for a text response from the model. Can be plain text or
2376 structured JSON data. Learn more:
2377
2378 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
2379 - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
2380
2381 tool_choice: How the model should select which tool (or tools) to use when generating a
2382 response. See the `tools` parameter to see how to specify which tools the model
2383 can call.
2384
2385 tools: An array of tools the model may call while generating a response. You can
2386 specify which tool to use by setting the `tool_choice` parameter.
2387
2388 We support the following categories of tools:
2389
2390 - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
2391 capabilities, like
2392 [web search](https://platform.openai.com/docs/guides/tools-web-search) or
2393 [file search](https://platform.openai.com/docs/guides/tools-file-search).
2394 Learn more about
2395 [built-in tools](https://platform.openai.com/docs/guides/tools).
2396 - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
2397 predefined connectors such as Google Drive and SharePoint. Learn more about
2398 [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
2399 - **Function calls (custom tools)**: Functions that are defined by you, enabling
2400 the model to call your own code with strongly typed arguments and outputs.
2401 Learn more about
2402 [function calling](https://platform.openai.com/docs/guides/function-calling).
2403 You can also use custom tools to call your own code.
2404
2405 top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
2406 return at each token position, each with an associated log probability.
2407
2408 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
2409 model considers the results of the tokens with top_p probability mass. So 0.1
2410 means only the tokens comprising the top 10% probability mass are considered.
2411
2412 We generally recommend altering this or `temperature` but not both.
2413
2414 truncation: The truncation strategy to use for the model response.
2415
2416 - `auto`: If the input to this Response exceeds the model's context window size,
2417 the model will truncate the response to fit the context window by dropping
2418 items from the beginning of the conversation.
2419 - `disabled` (default): If the input size will exceed the context window size
2420 for a model, the request will fail with a 400 error.
2421
2422 user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
2423 `prompt_cache_key` instead to maintain caching optimizations. A stable
2424 identifier for your end-users. Used to boost cache hit rates by better bucketing
2425 similar requests and to help OpenAI detect and prevent abuse.
2426 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2427
2428 extra_headers: Send extra headers
2429
2430 extra_query: Add additional query parameters to the request
2431
2432 extra_body: Add additional JSON properties to the request
2433
2434 timeout: Override the client-level default timeout for this request, in seconds
2435 """
2436 ...
2437
2438 async def create(
2439 self,
2440 *,
2441 background: Optional[bool] | Omit = omit,
2442 conversation: Optional[response_create_params.Conversation] | Omit = omit,
2443 include: Optional[List[ResponseIncludable]] | Omit = omit,
2444 input: Union[str, ResponseInputParam] | Omit = omit,
2445 instructions: Optional[str] | Omit = omit,
2446 max_output_tokens: Optional[int] | Omit = omit,
2447 max_tool_calls: Optional[int] | Omit = omit,
2448 metadata: Optional[Metadata] | Omit = omit,
2449 model: ResponsesModel | Omit = omit,
2450 parallel_tool_calls: Optional[bool] | Omit = omit,
2451 previous_response_id: Optional[str] | Omit = omit,
2452 prompt: Optional[ResponsePromptParam] | Omit = omit,
2453 prompt_cache_key: str | Omit = omit,
2454 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2455 reasoning: Optional[Reasoning] | Omit = omit,
2456 safety_identifier: str | Omit = omit,
2457 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2458 store: Optional[bool] | Omit = omit,
2459 stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
2460 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
2461 temperature: Optional[float] | Omit = omit,
2462 text: ResponseTextConfigParam | Omit = omit,
2463 tool_choice: response_create_params.ToolChoice | Omit = omit,
2464 tools: Iterable[ToolParam] | Omit = omit,
2465 top_logprobs: Optional[int] | Omit = omit,
2466 top_p: Optional[float] | Omit = omit,
2467 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
2468 user: str | Omit = omit,
2469 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2470 # The extra values given here take precedence over values defined on the client or passed to this method.
2471 extra_headers: Headers | None = None,
2472 extra_query: Query | None = None,
2473 extra_body: Body | None = None,
2474 timeout: float | httpx.Timeout | None | NotGiven = not_given,
2475 ) -> Response | AsyncStream[ResponseStreamEvent]:
2476 return await self._post(
2477 "/responses",
2478 body=await async_maybe_transform(
2479 {
2480 "background": background,
2481 "conversation": conversation,
2482 "include": include,
2483 "input": input,
2484 "instructions": instructions,
2485 "max_output_tokens": max_output_tokens,
2486 "max_tool_calls": max_tool_calls,
2487 "metadata": metadata,
2488 "model": model,
2489 "parallel_tool_calls": parallel_tool_calls,
2490 "previous_response_id": previous_response_id,
2491 "prompt": prompt,
2492 "prompt_cache_key": prompt_cache_key,
2493 "prompt_cache_retention": prompt_cache_retention,
2494 "reasoning": reasoning,
2495 "safety_identifier": safety_identifier,
2496 "service_tier": service_tier,
2497 "store": store,
2498 "stream": stream,
2499 "stream_options": stream_options,
2500 "temperature": temperature,
2501 "text": text,
2502 "tool_choice": tool_choice,
2503 "tools": tools,
2504 "top_logprobs": top_logprobs,
2505 "top_p": top_p,
2506 "truncation": truncation,
2507 "user": user,
2508 },
2509 response_create_params.ResponseCreateParamsStreaming
2510 if stream
2511 else response_create_params.ResponseCreateParamsNonStreaming,
2512 ),
2513 options=make_request_options(
2514 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
2515 ),
2516 cast_to=Response,
2517 stream=stream or False,
2518 stream_cls=AsyncStream[ResponseStreamEvent],
2519 )
2520
2521 @overload
2522 def stream(
2523 self,
2524 *,
2525 response_id: str,
2526 text_format: type[TextFormatT] | Omit = omit,
2527 starting_after: int | Omit = omit,
2528 tools: Iterable[ParseableToolParam] | Omit = omit,
2529 # The extra values given here take precedence over values defined on the client or passed to this method.
2530 extra_headers: Headers | None = None,
2531 extra_query: Query | None = None,
2532 extra_body: Body | None = None,
2533 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2534 ) -> AsyncResponseStreamManager[TextFormatT]: ...
2535
2536 @overload
2537 def stream(
2538 self,
2539 *,
2540 input: Union[str, ResponseInputParam],
2541 model: ResponsesModel,
2542 background: Optional[bool] | Omit = omit,
2543 text_format: type[TextFormatT] | Omit = omit,
2544 tools: Iterable[ParseableToolParam] | Omit = omit,
2545 conversation: Optional[response_create_params.Conversation] | Omit = omit,
2546 include: Optional[List[ResponseIncludable]] | Omit = omit,
2547 instructions: Optional[str] | Omit = omit,
2548 max_output_tokens: Optional[int] | Omit = omit,
2549 max_tool_calls: Optional[int] | Omit = omit,
2550 metadata: Optional[Metadata] | Omit = omit,
2551 parallel_tool_calls: Optional[bool] | Omit = omit,
2552 previous_response_id: Optional[str] | Omit = omit,
2553 prompt: Optional[ResponsePromptParam] | Omit = omit,
2554 prompt_cache_key: str | Omit = omit,
2555 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2556 reasoning: Optional[Reasoning] | Omit = omit,
2557 safety_identifier: str | Omit = omit,
2558 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2559 store: Optional[bool] | Omit = omit,
2560 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
2561 temperature: Optional[float] | Omit = omit,
2562 text: ResponseTextConfigParam | Omit = omit,
2563 tool_choice: response_create_params.ToolChoice | Omit = omit,
2564 top_logprobs: Optional[int] | Omit = omit,
2565 top_p: Optional[float] | Omit = omit,
2566 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
2567 user: str | Omit = omit,
2568 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2569 # The extra values given here take precedence over values defined on the client or passed to this method.
2570 extra_headers: Headers | None = None,
2571 extra_query: Query | None = None,
2572 extra_body: Body | None = None,
2573 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2574 ) -> AsyncResponseStreamManager[TextFormatT]: ...
2575
2576 def stream(
2577 self,
2578 *,
2579 response_id: str | Omit = omit,
2580 input: Union[str, ResponseInputParam] | Omit = omit,
2581 model: ResponsesModel | Omit = omit,
2582 background: Optional[bool] | Omit = omit,
2583 text_format: type[TextFormatT] | Omit = omit,
2584 tools: Iterable[ParseableToolParam] | Omit = omit,
2585 conversation: Optional[response_create_params.Conversation] | Omit = omit,
2586 include: Optional[List[ResponseIncludable]] | Omit = omit,
2587 instructions: Optional[str] | Omit = omit,
2588 max_output_tokens: Optional[int] | Omit = omit,
2589 max_tool_calls: Optional[int] | Omit = omit,
2590 metadata: Optional[Metadata] | Omit = omit,
2591 parallel_tool_calls: Optional[bool] | Omit = omit,
2592 previous_response_id: Optional[str] | Omit = omit,
2593 prompt: Optional[ResponsePromptParam] | Omit = omit,
2594 prompt_cache_key: str | Omit = omit,
2595 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2596 reasoning: Optional[Reasoning] | Omit = omit,
2597 safety_identifier: str | Omit = omit,
2598 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2599 store: Optional[bool] | Omit = omit,
2600 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
2601 temperature: Optional[float] | Omit = omit,
2602 text: ResponseTextConfigParam | Omit = omit,
2603 tool_choice: response_create_params.ToolChoice | Omit = omit,
2604 top_logprobs: Optional[int] | Omit = omit,
2605 top_p: Optional[float] | Omit = omit,
2606 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
2607 user: str | Omit = omit,
2608 starting_after: int | Omit = omit,
2609 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2610 # The extra values given here take precedence over values defined on the client or passed to this method.
2611 extra_headers: Headers | None = None,
2612 extra_query: Query | None = None,
2613 extra_body: Body | None = None,
2614 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2615 ) -> AsyncResponseStreamManager[TextFormatT]:
2616 new_response_args = {
2617 "input": input,
2618 "model": model,
2619 "conversation": conversation,
2620 "include": include,
2621 "instructions": instructions,
2622 "max_output_tokens": max_output_tokens,
2623 "max_tool_calls": max_tool_calls,
2624 "metadata": metadata,
2625 "parallel_tool_calls": parallel_tool_calls,
2626 "previous_response_id": previous_response_id,
2627 "prompt": prompt,
2628 "prompt_cache_key": prompt_cache_key,
2629 "prompt_cache_retention": prompt_cache_retention,
2630 "reasoning": reasoning,
2631 "safety_identifier": safety_identifier,
2632 "service_tier": service_tier,
2633 "store": store,
2634 "stream_options": stream_options,
2635 "temperature": temperature,
2636 "text": text,
2637 "tool_choice": tool_choice,
2638 "top_logprobs": top_logprobs,
2639 "top_p": top_p,
2640 "truncation": truncation,
2641 "user": user,
2642 "background": background,
2643 }
2644 new_response_args_names = [k for k, v in new_response_args.items() if is_given(v)]
2645
2646 if (is_given(response_id) or is_given(starting_after)) and len(new_response_args_names) > 0:
2647 raise ValueError(
2648 "Cannot provide both response_id/starting_after can't be provided together with "
2649 + ", ".join(new_response_args_names)
2650 )
2651
2652 tools = _make_tools(tools)
2653 if len(new_response_args_names) > 0:
2654 if isinstance(input, NotGiven):
2655 raise ValueError("input must be provided when creating a new response")
2656
2657 if not is_given(model):
2658 raise ValueError("model must be provided when creating a new response")
2659
2660 if is_given(text_format):
2661 if not text:
2662 text = {}
2663
2664 if "format" in text:
2665 raise TypeError("Cannot mix and match text.format with text_format")
2666 text = copy(text)
2667 text["format"] = _type_to_text_format_param(text_format)
2668
2669 api_request = self.create(
2670 input=input,
2671 model=model,
2672 stream=True,
2673 tools=tools,
2674 conversation=conversation,
2675 include=include,
2676 instructions=instructions,
2677 max_output_tokens=max_output_tokens,
2678 max_tool_calls=max_tool_calls,
2679 metadata=metadata,
2680 parallel_tool_calls=parallel_tool_calls,
2681 previous_response_id=previous_response_id,
2682 prompt=prompt,
2683 prompt_cache_key=prompt_cache_key,
2684 prompt_cache_retention=prompt_cache_retention,
2685 store=store,
2686 stream_options=stream_options,
2687 temperature=temperature,
2688 text=text,
2689 tool_choice=tool_choice,
2690 reasoning=reasoning,
2691 safety_identifier=safety_identifier,
2692 service_tier=service_tier,
2693 top_logprobs=top_logprobs,
2694 top_p=top_p,
2695 truncation=truncation,
2696 user=user,
2697 background=background,
2698 extra_headers=extra_headers,
2699 extra_query=extra_query,
2700 extra_body=extra_body,
2701 timeout=timeout,
2702 )
2703
2704 return AsyncResponseStreamManager(
2705 api_request,
2706 text_format=text_format,
2707 input_tools=tools,
2708 starting_after=None,
2709 )
2710 else:
2711 if isinstance(response_id, Omit):
2712 raise ValueError("response_id must be provided when streaming an existing response")
2713
2714 api_request = self.retrieve(
2715 response_id,
2716 stream=True,
2717 include=include or [],
2718 extra_headers=extra_headers,
2719 extra_query=extra_query,
2720 extra_body=extra_body,
2721 timeout=timeout,
2722 )
2723 return AsyncResponseStreamManager(
2724 api_request,
2725 text_format=text_format,
2726 input_tools=tools,
2727 starting_after=starting_after if is_given(starting_after) else None,
2728 )
2729
2730 async def parse(
2731 self,
2732 *,
2733 text_format: type[TextFormatT] | Omit = omit,
2734 background: Optional[bool] | Omit = omit,
2735 conversation: Optional[response_create_params.Conversation] | Omit = omit,
2736 include: Optional[List[ResponseIncludable]] | Omit = omit,
2737 input: Union[str, ResponseInputParam] | Omit = omit,
2738 instructions: Optional[str] | Omit = omit,
2739 max_output_tokens: Optional[int] | Omit = omit,
2740 max_tool_calls: Optional[int] | Omit = omit,
2741 metadata: Optional[Metadata] | Omit = omit,
2742 model: ResponsesModel | Omit = omit,
2743 parallel_tool_calls: Optional[bool] | Omit = omit,
2744 previous_response_id: Optional[str] | Omit = omit,
2745 prompt: Optional[ResponsePromptParam] | Omit = omit,
2746 prompt_cache_key: str | Omit = omit,
2747 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2748 reasoning: Optional[Reasoning] | Omit = omit,
2749 safety_identifier: str | Omit = omit,
2750 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2751 store: Optional[bool] | Omit = omit,
2752 stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
2753 stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
2754 temperature: Optional[float] | Omit = omit,
2755 text: ResponseTextConfigParam | Omit = omit,
2756 tool_choice: response_create_params.ToolChoice | Omit = omit,
2757 tools: Iterable[ParseableToolParam] | Omit = omit,
2758 top_logprobs: Optional[int] | Omit = omit,
2759 top_p: Optional[float] | Omit = omit,
2760 truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
2761 user: str | Omit = omit,
2762 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
2763 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2764 # The extra values given here take precedence over values defined on the client or passed to this method.
2765 extra_headers: Headers | None = None,
2766 extra_query: Query | None = None,
2767 extra_body: Body | None = None,
2768 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2769 ) -> ParsedResponse[TextFormatT]:
2770 if is_given(text_format):
2771 if not text:
2772 text = {}
2773
2774 if "format" in text:
2775 raise TypeError("Cannot mix and match text.format with text_format")
2776 text = copy(text)
2777 text["format"] = _type_to_text_format_param(text_format)
2778
2779 tools = _make_tools(tools)
2780
2781 def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
2782 return parse_response(
2783 input_tools=tools,
2784 text_format=text_format,
2785 response=raw_response,
2786 )
2787
2788 return await self._post(
2789 "/responses",
2790 body=maybe_transform(
2791 {
2792 "background": background,
2793 "conversation": conversation,
2794 "include": include,
2795 "input": input,
2796 "instructions": instructions,
2797 "max_output_tokens": max_output_tokens,
2798 "max_tool_calls": max_tool_calls,
2799 "metadata": metadata,
2800 "model": model,
2801 "parallel_tool_calls": parallel_tool_calls,
2802 "previous_response_id": previous_response_id,
2803 "prompt": prompt,
2804 "prompt_cache_key": prompt_cache_key,
2805 "prompt_cache_retention": prompt_cache_retention,
2806 "reasoning": reasoning,
2807 "safety_identifier": safety_identifier,
2808 "service_tier": service_tier,
2809 "store": store,
2810 "stream": stream,
2811 "stream_options": stream_options,
2812 "temperature": temperature,
2813 "text": text,
2814 "tool_choice": tool_choice,
2815 "tools": tools,
2816 "top_logprobs": top_logprobs,
2817 "top_p": top_p,
2818 "truncation": truncation,
2819 "user": user,
2820 "verbosity": verbosity,
2821 },
2822 response_create_params.ResponseCreateParams,
2823 ),
2824 options=make_request_options(
2825 extra_headers=extra_headers,
2826 extra_query=extra_query,
2827 extra_body=extra_body,
2828 timeout=timeout,
2829 post_parser=parser,
2830 ),
2831 # we turn the `Response` instance into a `ParsedResponse`
2832 # in the `parser` function above
2833 cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
2834 )
2835
2836 @overload
2837 async def retrieve(
2838 self,
2839 response_id: str,
2840 *,
2841 include: List[ResponseIncludable] | Omit = omit,
2842 include_obfuscation: bool | Omit = omit,
2843 starting_after: int | Omit = omit,
2844 stream: Literal[False] | Omit = omit,
2845 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2846 # The extra values given here take precedence over values defined on the client or passed to this method.
2847 extra_headers: Headers | None = None,
2848 extra_query: Query | None = None,
2849 extra_body: Body | None = None,
2850 timeout: float | httpx.Timeout | None | NotGiven = not_given,
2851 ) -> Response: ...
2852
2853 @overload
2854 async def retrieve(
2855 self,
2856 response_id: str,
2857 *,
2858 stream: Literal[True],
2859 include: List[ResponseIncludable] | Omit = omit,
2860 starting_after: int | Omit = omit,
2861 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2862 # The extra values given here take precedence over values defined on the client or passed to this method.
2863 extra_headers: Headers | None = None,
2864 extra_query: Query | None = None,
2865 extra_body: Body | None = None,
2866 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2867 ) -> AsyncStream[ResponseStreamEvent]: ...
2868
2869 @overload
2870 async def retrieve(
2871 self,
2872 response_id: str,
2873 *,
2874 stream: bool,
2875 include: List[ResponseIncludable] | Omit = omit,
2876 starting_after: int | Omit = omit,
2877 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2878 # The extra values given here take precedence over values defined on the client or passed to this method.
2879 extra_headers: Headers | None = None,
2880 extra_query: Query | None = None,
2881 extra_body: Body | None = None,
2882 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2883 ) -> Response | AsyncStream[ResponseStreamEvent]: ...
2884
2885 @overload
2886 async def retrieve(
2887 self,
2888 response_id: str,
2889 *,
2890 stream: bool = False,
2891 include: List[ResponseIncludable] | Omit = omit,
2892 starting_after: int | Omit = omit,
2893 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2894 # The extra values given here take precedence over values defined on the client or passed to this method.
2895 extra_headers: Headers | None = None,
2896 extra_query: Query | None = None,
2897 extra_body: Body | None = None,
2898 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2899 ) -> Response | AsyncStream[ResponseStreamEvent]:
2900 """
2901 Retrieves a model response with the given ID.
2902
2903 Args:
2904 include: Additional fields to include in the response. See the `include` parameter for
2905 Response creation above for more information.
2906
2907 include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
2908 characters to an `obfuscation` field on streaming delta events to normalize
2909 payload sizes as a mitigation to certain side-channel attacks. These obfuscation
2910 fields are included by default, but add a small amount of overhead to the data
2911 stream. You can set `include_obfuscation` to false to optimize for bandwidth if
2912 you trust the network links between your application and the OpenAI API.
2913
2914 starting_after: The sequence number of the event after which to start streaming.
2915
2916 stream: If set to true, the model response data will be streamed to the client as it is
2917 generated using
2918 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
2919 See the
2920 [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
2921 for more information.
2922
2923 extra_headers: Send extra headers
2924
2925 extra_query: Add additional query parameters to the request
2926
2927 extra_body: Add additional JSON properties to the request
2928
2929 timeout: Override the client-level default timeout for this request, in seconds
2930 """
2931 ...
2932
2933 @overload
2934 async def retrieve(
2935 self,
2936 response_id: str,
2937 *,
2938 stream: Literal[True],
2939 include: List[ResponseIncludable] | Omit = omit,
2940 include_obfuscation: bool | Omit = omit,
2941 starting_after: int | Omit = omit,
2942 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2943 # The extra values given here take precedence over values defined on the client or passed to this method.
2944 extra_headers: Headers | None = None,
2945 extra_query: Query | None = None,
2946 extra_body: Body | None = None,
2947 timeout: float | httpx.Timeout | None | NotGiven = not_given,
2948 ) -> AsyncStream[ResponseStreamEvent]:
2949 """
2950 Retrieves a model response with the given ID.
2951
2952 Args:
2953 stream: If set to true, the model response data will be streamed to the client as it is
2954 generated using
2955 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
2956 See the
2957 [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
2958 for more information.
2959
2960 include: Additional fields to include in the response. See the `include` parameter for
2961 Response creation above for more information.
2962
2963 include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
2964 characters to an `obfuscation` field on streaming delta events to normalize
2965 payload sizes as a mitigation to certain side-channel attacks. These obfuscation
2966 fields are included by default, but add a small amount of overhead to the data
2967 stream. You can set `include_obfuscation` to false to optimize for bandwidth if
2968 you trust the network links between your application and the OpenAI API.
2969
2970 starting_after: The sequence number of the event after which to start streaming.
2971
2972 extra_headers: Send extra headers
2973
2974 extra_query: Add additional query parameters to the request
2975
2976 extra_body: Add additional JSON properties to the request
2977
2978 timeout: Override the client-level default timeout for this request, in seconds
2979 """
2980 ...
2981
2982 @overload
2983 async def retrieve(
2984 self,
2985 response_id: str,
2986 *,
2987 stream: bool,
2988 include: List[ResponseIncludable] | Omit = omit,
2989 include_obfuscation: bool | Omit = omit,
2990 starting_after: int | Omit = omit,
2991 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2992 # The extra values given here take precedence over values defined on the client or passed to this method.
2993 extra_headers: Headers | None = None,
2994 extra_query: Query | None = None,
2995 extra_body: Body | None = None,
2996 timeout: float | httpx.Timeout | None | NotGiven = not_given,
2997 ) -> Response | AsyncStream[ResponseStreamEvent]:
2998 """
2999 Retrieves a model response with the given ID.
3000
3001 Args:
3002 stream: If set to true, the model response data will be streamed to the client as it is
3003 generated using
3004 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
3005 See the
3006 [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
3007 for more information.
3008
3009 include: Additional fields to include in the response. See the `include` parameter for
3010 Response creation above for more information.
3011
3012 include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
3013 characters to an `obfuscation` field on streaming delta events to normalize
3014 payload sizes as a mitigation to certain side-channel attacks. These obfuscation
3015 fields are included by default, but add a small amount of overhead to the data
3016 stream. You can set `include_obfuscation` to false to optimize for bandwidth if
3017 you trust the network links between your application and the OpenAI API.
3018
3019 starting_after: The sequence number of the event after which to start streaming.
3020
3021 extra_headers: Send extra headers
3022
3023 extra_query: Add additional query parameters to the request
3024
3025 extra_body: Add additional JSON properties to the request
3026
3027 timeout: Override the client-level default timeout for this request, in seconds
3028 """
3029 ...
3030
3031 async def retrieve(
3032 self,
3033 response_id: str,
3034 *,
3035 include: List[ResponseIncludable] | Omit = omit,
3036 include_obfuscation: bool | Omit = omit,
3037 starting_after: int | Omit = omit,
3038 stream: Literal[False] | Literal[True] | Omit = omit,
3039 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
3040 # The extra values given here take precedence over values defined on the client or passed to this method.
3041 extra_headers: Headers | None = None,
3042 extra_query: Query | None = None,
3043 extra_body: Body | None = None,
3044 timeout: float | httpx.Timeout | None | NotGiven = not_given,
3045 ) -> Response | AsyncStream[ResponseStreamEvent]:
3046 if not response_id:
3047 raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
3048 return await self._get(
3049 f"/responses/{response_id}",
3050 options=make_request_options(
3051 extra_headers=extra_headers,
3052 extra_query=extra_query,
3053 extra_body=extra_body,
3054 timeout=timeout,
3055 query=await async_maybe_transform(
3056 {
3057 "include": include,
3058 "include_obfuscation": include_obfuscation,
3059 "starting_after": starting_after,
3060 "stream": stream,
3061 },
3062 response_retrieve_params.ResponseRetrieveParams,
3063 ),
3064 ),
3065 cast_to=Response,
3066 stream=stream or False,
3067 stream_cls=AsyncStream[ResponseStreamEvent],
3068 )
3069
3070 async def delete(
3071 self,
3072 response_id: str,
3073 *,
3074 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
3075 # The extra values given here take precedence over values defined on the client or passed to this method.
3076 extra_headers: Headers | None = None,
3077 extra_query: Query | None = None,
3078 extra_body: Body | None = None,
3079 timeout: float | httpx.Timeout | None | NotGiven = not_given,
3080 ) -> None:
3081 """
3082 Deletes a model response with the given ID.
3083
3084 Args:
3085 extra_headers: Send extra headers
3086
3087 extra_query: Add additional query parameters to the request
3088
3089 extra_body: Add additional JSON properties to the request
3090
3091 timeout: Override the client-level default timeout for this request, in seconds
3092 """
3093 if not response_id:
3094 raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
3095 extra_headers = {"Accept": "*/*", **(extra_headers or {})}
3096 return await self._delete(
3097 f"/responses/{response_id}",
3098 options=make_request_options(
3099 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
3100 ),
3101 cast_to=NoneType,
3102 )
3103
3104 async def cancel(
3105 self,
3106 response_id: str,
3107 *,
3108 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
3109 # The extra values given here take precedence over values defined on the client or passed to this method.
3110 extra_headers: Headers | None = None,
3111 extra_query: Query | None = None,
3112 extra_body: Body | None = None,
3113 timeout: float | httpx.Timeout | None | NotGiven = not_given,
3114 ) -> Response:
3115 """Cancels a model response with the given ID.
3116
3117 Only responses created with the
3118 `background` parameter set to `true` can be cancelled.
3119 [Learn more](https://platform.openai.com/docs/guides/background).
3120
3121 Args:
3122 extra_headers: Send extra headers
3123
3124 extra_query: Add additional query parameters to the request
3125
3126 extra_body: Add additional JSON properties to the request
3127
3128 timeout: Override the client-level default timeout for this request, in seconds
3129 """
3130 if not response_id:
3131 raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
3132 return await self._post(
3133 f"/responses/{response_id}/cancel",
3134 options=make_request_options(
3135 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
3136 ),
3137 cast_to=Response,
3138 )
3139
3140 async def compact(
3141 self,
3142 *,
3143 input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
3144 instructions: Optional[str] | Omit = omit,
3145 model: Union[
3146 Literal[
3147 "gpt-5.1",
3148 "gpt-5.1-2025-11-13",
3149 "gpt-5.1-codex",
3150 "gpt-5.1-mini",
3151 "gpt-5.1-chat-latest",
3152 "gpt-5",
3153 "gpt-5-mini",
3154 "gpt-5-nano",
3155 "gpt-5-2025-08-07",
3156 "gpt-5-mini-2025-08-07",
3157 "gpt-5-nano-2025-08-07",
3158 "gpt-5-chat-latest",
3159 "gpt-4.1",
3160 "gpt-4.1-mini",
3161 "gpt-4.1-nano",
3162 "gpt-4.1-2025-04-14",
3163 "gpt-4.1-mini-2025-04-14",
3164 "gpt-4.1-nano-2025-04-14",
3165 "o4-mini",
3166 "o4-mini-2025-04-16",
3167 "o3",
3168 "o3-2025-04-16",
3169 "o3-mini",
3170 "o3-mini-2025-01-31",
3171 "o1",
3172 "o1-2024-12-17",
3173 "o1-preview",
3174 "o1-preview-2024-09-12",
3175 "o1-mini",
3176 "o1-mini-2024-09-12",
3177 "gpt-4o",
3178 "gpt-4o-2024-11-20",
3179 "gpt-4o-2024-08-06",
3180 "gpt-4o-2024-05-13",
3181 "gpt-4o-audio-preview",
3182 "gpt-4o-audio-preview-2024-10-01",
3183 "gpt-4o-audio-preview-2024-12-17",
3184 "gpt-4o-audio-preview-2025-06-03",
3185 "gpt-4o-mini-audio-preview",
3186 "gpt-4o-mini-audio-preview-2024-12-17",
3187 "gpt-4o-search-preview",
3188 "gpt-4o-mini-search-preview",
3189 "gpt-4o-search-preview-2025-03-11",
3190 "gpt-4o-mini-search-preview-2025-03-11",
3191 "chatgpt-4o-latest",
3192 "codex-mini-latest",
3193 "gpt-4o-mini",
3194 "gpt-4o-mini-2024-07-18",
3195 "gpt-4-turbo",
3196 "gpt-4-turbo-2024-04-09",
3197 "gpt-4-0125-preview",
3198 "gpt-4-turbo-preview",
3199 "gpt-4-1106-preview",
3200 "gpt-4-vision-preview",
3201 "gpt-4",
3202 "gpt-4-0314",
3203 "gpt-4-0613",
3204 "gpt-4-32k",
3205 "gpt-4-32k-0314",
3206 "gpt-4-32k-0613",
3207 "gpt-3.5-turbo",
3208 "gpt-3.5-turbo-16k",
3209 "gpt-3.5-turbo-0301",
3210 "gpt-3.5-turbo-0613",
3211 "gpt-3.5-turbo-1106",
3212 "gpt-3.5-turbo-0125",
3213 "gpt-3.5-turbo-16k-0613",
3214 "o1-pro",
3215 "o1-pro-2025-03-19",
3216 "o3-pro",
3217 "o3-pro-2025-06-10",
3218 "o3-deep-research",
3219 "o3-deep-research-2025-06-26",
3220 "o4-mini-deep-research",
3221 "o4-mini-deep-research-2025-06-26",
3222 "computer-use-preview",
3223 "computer-use-preview-2025-03-11",
3224 "gpt-5-codex",
3225 "gpt-5-pro",
3226 "gpt-5-pro-2025-10-06",
3227 "gpt-5.1-codex-max",
3228 ],
3229 str,
3230 None,
3231 ]
3232 | Omit = omit,
3233 previous_response_id: Optional[str] | Omit = omit,
3234 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
3235 # The extra values given here take precedence over values defined on the client or passed to this method.
3236 extra_headers: Headers | None = None,
3237 extra_query: Query | None = None,
3238 extra_body: Body | None = None,
3239 timeout: float | httpx.Timeout | None | NotGiven = not_given,
3240 ) -> CompactedResponse:
3241 """
3242 Compact conversation
3243
3244 Args:
3245 input: Text, image, or file inputs to the model, used to generate a response
3246
3247 instructions: A system (or developer) message inserted into the model's context. When used
3248 along with `previous_response_id`, the instructions from a previous response
3249 will not be carried over to the next response. This makes it simple to swap out
3250 system (or developer) messages in new responses.
3251
3252 model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
3253 wide range of models with different capabilities, performance characteristics,
3254 and price points. Refer to the
3255 [model guide](https://platform.openai.com/docs/models) to browse and compare
3256 available models.
3257
3258 previous_response_id: The unique ID of the previous response to the model. Use this to create
3259 multi-turn conversations. Learn more about
3260 [conversation state](https://platform.openai.com/docs/guides/conversation-state).
3261 Cannot be used in conjunction with `conversation`.
3262
3263 extra_headers: Send extra headers
3264
3265 extra_query: Add additional query parameters to the request
3266
3267 extra_body: Add additional JSON properties to the request
3268
3269 timeout: Override the client-level default timeout for this request, in seconds
3270 """
3271 return await self._post(
3272 "/responses/compact",
3273 body=await async_maybe_transform(
3274 {
3275 "input": input,
3276 "instructions": instructions,
3277 "model": model,
3278 "previous_response_id": previous_response_id,
3279 },
3280 response_compact_params.ResponseCompactParams,
3281 ),
3282 options=make_request_options(
3283 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
3284 ),
3285 cast_to=CompactedResponse,
3286 )
3287
3288
3289class ResponsesWithRawResponse:
3290 def __init__(self, responses: Responses) -> None:
3291 self._responses = responses
3292
3293 self.create = _legacy_response.to_raw_response_wrapper(
3294 responses.create,
3295 )
3296 self.retrieve = _legacy_response.to_raw_response_wrapper(
3297 responses.retrieve,
3298 )
3299 self.delete = _legacy_response.to_raw_response_wrapper(
3300 responses.delete,
3301 )
3302 self.cancel = _legacy_response.to_raw_response_wrapper(
3303 responses.cancel,
3304 )
3305 self.compact = _legacy_response.to_raw_response_wrapper(
3306 responses.compact,
3307 )
3308 self.parse = _legacy_response.to_raw_response_wrapper(
3309 responses.parse,
3310 )
3311
3312 @cached_property
3313 def input_items(self) -> InputItemsWithRawResponse:
3314 return InputItemsWithRawResponse(self._responses.input_items)
3315
3316 @cached_property
3317 def input_tokens(self) -> InputTokensWithRawResponse:
3318 return InputTokensWithRawResponse(self._responses.input_tokens)
3319
3320
3321class AsyncResponsesWithRawResponse:
3322 def __init__(self, responses: AsyncResponses) -> None:
3323 self._responses = responses
3324
3325 self.create = _legacy_response.async_to_raw_response_wrapper(
3326 responses.create,
3327 )
3328 self.retrieve = _legacy_response.async_to_raw_response_wrapper(
3329 responses.retrieve,
3330 )
3331 self.delete = _legacy_response.async_to_raw_response_wrapper(
3332 responses.delete,
3333 )
3334 self.cancel = _legacy_response.async_to_raw_response_wrapper(
3335 responses.cancel,
3336 )
3337 self.compact = _legacy_response.async_to_raw_response_wrapper(
3338 responses.compact,
3339 )
3340 self.parse = _legacy_response.async_to_raw_response_wrapper(
3341 responses.parse,
3342 )
3343
3344 @cached_property
3345 def input_items(self) -> AsyncInputItemsWithRawResponse:
3346 return AsyncInputItemsWithRawResponse(self._responses.input_items)
3347
3348 @cached_property
3349 def input_tokens(self) -> AsyncInputTokensWithRawResponse:
3350 return AsyncInputTokensWithRawResponse(self._responses.input_tokens)
3351
3352
3353class ResponsesWithStreamingResponse:
3354 def __init__(self, responses: Responses) -> None:
3355 self._responses = responses
3356
3357 self.create = to_streamed_response_wrapper(
3358 responses.create,
3359 )
3360 self.retrieve = to_streamed_response_wrapper(
3361 responses.retrieve,
3362 )
3363 self.delete = to_streamed_response_wrapper(
3364 responses.delete,
3365 )
3366 self.cancel = to_streamed_response_wrapper(
3367 responses.cancel,
3368 )
3369 self.compact = to_streamed_response_wrapper(
3370 responses.compact,
3371 )
3372
3373 @cached_property
3374 def input_items(self) -> InputItemsWithStreamingResponse:
3375 return InputItemsWithStreamingResponse(self._responses.input_items)
3376
3377 @cached_property
3378 def input_tokens(self) -> InputTokensWithStreamingResponse:
3379 return InputTokensWithStreamingResponse(self._responses.input_tokens)
3380
3381
3382class AsyncResponsesWithStreamingResponse:
3383 def __init__(self, responses: AsyncResponses) -> None:
3384 self._responses = responses
3385
3386 self.create = async_to_streamed_response_wrapper(
3387 responses.create,
3388 )
3389 self.retrieve = async_to_streamed_response_wrapper(
3390 responses.retrieve,
3391 )
3392 self.delete = async_to_streamed_response_wrapper(
3393 responses.delete,
3394 )
3395 self.cancel = async_to_streamed_response_wrapper(
3396 responses.cancel,
3397 )
3398 self.compact = async_to_streamed_response_wrapper(
3399 responses.compact,
3400 )
3401
3402 @cached_property
3403 def input_items(self) -> AsyncInputItemsWithStreamingResponse:
3404 return AsyncInputItemsWithStreamingResponse(self._responses.input_items)
3405
3406 @cached_property
3407 def input_tokens(self) -> AsyncInputTokensWithStreamingResponse:
3408 return AsyncInputTokensWithStreamingResponse(self._responses.input_tokens)
3409
3410
3411def _make_tools(tools: Iterable[ParseableToolParam] | Omit) -> List[ToolParam] | Omit:
3412 if not is_given(tools):
3413 return omit
3414
3415 converted_tools: List[ToolParam] = []
3416 for tool in tools:
3417 if tool["type"] != "function":
3418 converted_tools.append(tool)
3419 continue
3420
3421 if "function" not in tool:
3422 # standard Responses API case
3423 converted_tools.append(tool)
3424 continue
3425
3426 function = cast(Any, tool)["function"] # pyright: ignore[reportUnnecessaryCast]
3427 if not isinstance(function, PydanticFunctionTool):
3428 raise Exception(
3429 "Expected Chat Completions function tool shape to be created using `openai.pydantic_function_tool()`"
3430 )
3431
3432 assert "parameters" in function
3433 new_tool = ResponsesPydanticFunctionTool(
3434 {
3435 "type": "function",
3436 "name": function["name"],
3437 "description": function.get("description"),
3438 "parameters": function["parameters"],
3439 "strict": function.get("strict") or False,
3440 },
3441 function.model,
3442 )
3443
3444 converted_tools.append(new_tool.cast())
3445
3446 return converted_tools